diff options
author | Jan Kara <jack@suse.com> | 2016-02-19 00:18:25 -0500 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2016-02-19 00:18:25 -0500 |
commit | ed8ad83808f009ade97ebbf6519bc3a97fefbc0c (patch) | |
tree | 02befbe2dee42bf5a4b7a141888d43a6ecde7469 /fs | |
parent | c906f38e8853cfd407b30d2f4756a93c1d8f698f (diff) |
ext4: fix bh->b_state corruption
ext4 can update bh->b_state non-atomically in _ext4_get_block() and
ext4_da_get_block_prep(). Usually this is fine since bh is just a
temporary storage for mapping information on stack but in some cases it
can be fully living bh attached to a page. In such case non-atomic
update of bh->b_state can race with an atomic update which then gets
lost. Usually when we are mapping bh and thus updating bh->b_state
non-atomically, nobody else touches the bh and so things work out fine
but there is one case to especially worry about: ext4_finish_bio() uses
BH_Uptodate_Lock on the first bh in the page to synchronize handling of
PageWriteback state. So when blocksize < pagesize, we can be atomically
modifying bh->b_state of a buffer that actually isn't under IO and thus
can race e.g. with delalloc trying to map that buffer. The result is
that we can mistakenly set / clear BH_Uptodate_Lock bit resulting in the
corruption of PageWriteback state or missed unlock of BH_Uptodate_Lock.
Fix the problem by always updating bh->b_state bits atomically.
CC: stable@vger.kernel.org
Reported-by: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ext4/inode.c | 32 |
1 files changed, 30 insertions, 2 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 83bc8bfb3bea..d6674479269d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -686,6 +686,34 @@ out_sem: | |||
686 | return retval; | 686 | return retval; |
687 | } | 687 | } |
688 | 688 | ||
689 | /* | ||
690 | * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages | ||
691 | * we have to be careful as someone else may be manipulating b_state as well. | ||
692 | */ | ||
693 | static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) | ||
694 | { | ||
695 | unsigned long old_state; | ||
696 | unsigned long new_state; | ||
697 | |||
698 | flags &= EXT4_MAP_FLAGS; | ||
699 | |||
700 | /* Dummy buffer_head? Set non-atomically. */ | ||
701 | if (!bh->b_page) { | ||
702 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; | ||
703 | return; | ||
704 | } | ||
705 | /* | ||
706 | * Someone else may be modifying b_state. Be careful! This is ugly but | ||
707 | * once we get rid of using bh as a container for mapping information | ||
708 | * to pass to / from get_block functions, this can go away. | ||
709 | */ | ||
710 | do { | ||
711 | old_state = READ_ONCE(bh->b_state); | ||
712 | new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; | ||
713 | } while (unlikely( | ||
714 | cmpxchg(&bh->b_state, old_state, new_state) != old_state)); | ||
715 | } | ||
716 | |||
689 | /* Maximum number of blocks we map for direct IO at once. */ | 717 | /* Maximum number of blocks we map for direct IO at once. */ |
690 | #define DIO_MAX_BLOCKS 4096 | 718 | #define DIO_MAX_BLOCKS 4096 |
691 | 719 | ||
@@ -722,7 +750,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
722 | ext4_io_end_t *io_end = ext4_inode_aio(inode); | 750 | ext4_io_end_t *io_end = ext4_inode_aio(inode); |
723 | 751 | ||
724 | map_bh(bh, inode->i_sb, map.m_pblk); | 752 | map_bh(bh, inode->i_sb, map.m_pblk); |
725 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 753 | ext4_update_bh_state(bh, map.m_flags); |
726 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) | 754 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) |
727 | set_buffer_defer_completion(bh); | 755 | set_buffer_defer_completion(bh); |
728 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 756 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
@@ -1685,7 +1713,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
1685 | return ret; | 1713 | return ret; |
1686 | 1714 | ||
1687 | map_bh(bh, inode->i_sb, map.m_pblk); | 1715 | map_bh(bh, inode->i_sb, map.m_pblk); |
1688 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 1716 | ext4_update_bh_state(bh, map.m_flags); |
1689 | 1717 | ||
1690 | if (buffer_unwritten(bh)) { | 1718 | if (buffer_unwritten(bh)) { |
1691 | /* A delayed write to unwritten bh should be marked | 1719 | /* A delayed write to unwritten bh should be marked |