diff options
| author | Jan Kara <jack@suse.com> | 2016-02-19 00:18:25 -0500 |
|---|---|---|
| committer | Theodore Ts'o <tytso@mit.edu> | 2016-02-19 00:18:25 -0500 |
| commit | ed8ad83808f009ade97ebbf6519bc3a97fefbc0c (patch) | |
| tree | 02befbe2dee42bf5a4b7a141888d43a6ecde7469 | |
| parent | c906f38e8853cfd407b30d2f4756a93c1d8f698f (diff) | |
ext4: fix bh->b_state corruption
ext4 can update bh->b_state non-atomically in _ext4_get_block() and
ext4_da_get_block_prep(). Usually this is fine since bh is just a
temporary storage for mapping information on stack but in some cases it
can be fully living bh attached to a page. In such case non-atomic
update of bh->b_state can race with an atomic update which then gets
lost. Usually when we are mapping bh and thus updating bh->b_state
non-atomically, nobody else touches the bh and so things work out fine
but there is one case to especially worry about: ext4_finish_bio() uses
BH_Uptodate_Lock on the first bh in the page to synchronize handling of
PageWriteback state. So when blocksize < pagesize, we can be atomically
modifying bh->b_state of a buffer that actually isn't under IO and thus
can race e.g. with delalloc trying to map that buffer. The result is
that we can mistakenly set / clear BH_Uptodate_Lock bit resulting in the
corruption of PageWriteback state or missed unlock of BH_Uptodate_Lock.
Fix the problem by always updating bh->b_state bits atomically.
CC: stable@vger.kernel.org
Reported-by: Nikolay Borisov <kernel@kyup.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
| -rw-r--r-- | fs/ext4/inode.c | 32 |
1 files changed, 30 insertions, 2 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 83bc8bfb3bea..d6674479269d 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -686,6 +686,34 @@ out_sem: | |||
| 686 | return retval; | 686 | return retval; |
| 687 | } | 687 | } |
| 688 | 688 | ||
| 689 | /* | ||
| 690 | * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages | ||
| 691 | * we have to be careful as someone else may be manipulating b_state as well. | ||
| 692 | */ | ||
| 693 | static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) | ||
| 694 | { | ||
| 695 | unsigned long old_state; | ||
| 696 | unsigned long new_state; | ||
| 697 | |||
| 698 | flags &= EXT4_MAP_FLAGS; | ||
| 699 | |||
| 700 | /* Dummy buffer_head? Set non-atomically. */ | ||
| 701 | if (!bh->b_page) { | ||
| 702 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags; | ||
| 703 | return; | ||
| 704 | } | ||
| 705 | /* | ||
| 706 | * Someone else may be modifying b_state. Be careful! This is ugly but | ||
| 707 | * once we get rid of using bh as a container for mapping information | ||
| 708 | * to pass to / from get_block functions, this can go away. | ||
| 709 | */ | ||
| 710 | do { | ||
| 711 | old_state = READ_ONCE(bh->b_state); | ||
| 712 | new_state = (old_state & ~EXT4_MAP_FLAGS) | flags; | ||
| 713 | } while (unlikely( | ||
| 714 | cmpxchg(&bh->b_state, old_state, new_state) != old_state)); | ||
| 715 | } | ||
| 716 | |||
| 689 | /* Maximum number of blocks we map for direct IO at once. */ | 717 | /* Maximum number of blocks we map for direct IO at once. */ |
| 690 | #define DIO_MAX_BLOCKS 4096 | 718 | #define DIO_MAX_BLOCKS 4096 |
| 691 | 719 | ||
| @@ -722,7 +750,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
| 722 | ext4_io_end_t *io_end = ext4_inode_aio(inode); | 750 | ext4_io_end_t *io_end = ext4_inode_aio(inode); |
| 723 | 751 | ||
| 724 | map_bh(bh, inode->i_sb, map.m_pblk); | 752 | map_bh(bh, inode->i_sb, map.m_pblk); |
| 725 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 753 | ext4_update_bh_state(bh, map.m_flags); |
| 726 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) | 754 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) |
| 727 | set_buffer_defer_completion(bh); | 755 | set_buffer_defer_completion(bh); |
| 728 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 756 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
| @@ -1685,7 +1713,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
| 1685 | return ret; | 1713 | return ret; |
| 1686 | 1714 | ||
| 1687 | map_bh(bh, inode->i_sb, map.m_pblk); | 1715 | map_bh(bh, inode->i_sb, map.m_pblk); |
| 1688 | bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; | 1716 | ext4_update_bh_state(bh, map.m_flags); |
| 1689 | 1717 | ||
| 1690 | if (buffer_unwritten(bh)) { | 1718 | if (buffer_unwritten(bh)) { |
| 1691 | /* A delayed write to unwritten bh should be marked | 1719 | /* A delayed write to unwritten bh should be marked |
