aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
authorJan Kara <jack@suse.com>2015-12-07 15:10:44 -0500
committerTheodore Ts'o <tytso@mit.edu>2015-12-07 15:10:44 -0500
commitba5843f51d468644b094674c0317c9ab95632caa (patch)
treee2ab7e39a4eff12af5bc9f2b14dafc0f08731727 /fs/ext4/inode.c
parentc86d8db33a922da808a5560aa15ed663a9569b37 (diff)
ext4: use pre-zeroed blocks for DAX page faults
Make DAX fault path use pre-zeroed blocks to avoid races with extent conversion and zeroing when two page faults to the same block happen. Signed-off-by: Jan Kara <jack@suse.com> Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c86
1 files changed, 69 insertions, 17 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4241d0cff062..ff2f3cd38522 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -723,16 +723,6 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
723 723
724 map_bh(bh, inode->i_sb, map.m_pblk); 724 map_bh(bh, inode->i_sb, map.m_pblk);
725 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags; 725 bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
726 if (IS_DAX(inode) && buffer_unwritten(bh)) {
727 /*
728 * dgc: I suspect unwritten conversion on ext4+DAX is
729 * fundamentally broken here when there are concurrent
730 * read/write in progress on this inode.
731 */
732 WARN_ON_ONCE(io_end);
733 bh->b_assoc_map = inode->i_mapping;
734 bh->b_private = (void *)(unsigned long)iblock;
735 }
736 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) 726 if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
737 set_buffer_defer_completion(bh); 727 set_buffer_defer_completion(bh);
738 bh->b_size = inode->i_sb->s_blocksize * map.m_len; 728 bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -3097,17 +3087,79 @@ static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
3097 return ret; 3087 return ret;
3098} 3088}
3099 3089
3100int ext4_get_block_dax(struct inode *inode, sector_t iblock, 3090#ifdef CONFIG_FS_DAX
3101 struct buffer_head *bh_result, int create) 3091int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
3092 struct buffer_head *bh_result, int create)
3102{ 3093{
3103 int flags = EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_UNWRIT_EXT; 3094 int ret, err;
3095 int credits;
3096 struct ext4_map_blocks map;
3097 handle_t *handle = NULL;
3098 int flags = 0;
3104 3099
3105 if (create) 3100 ext4_debug("ext4_dax_mmap_get_block: inode %lu, create flag %d\n",
3106 flags |= EXT4_GET_BLOCKS_CREATE;
3107 ext4_debug("ext4_get_block_dax: inode %lu, create flag %d\n",
3108 inode->i_ino, create); 3101 inode->i_ino, create);
3109 return _ext4_get_block(inode, iblock, bh_result, flags); 3102 map.m_lblk = iblock;
3103 map.m_len = bh_result->b_size >> inode->i_blkbits;
3104 credits = ext4_chunk_trans_blocks(inode, map.m_len);
3105 if (create) {
3106 flags |= EXT4_GET_BLOCKS_PRE_IO | EXT4_GET_BLOCKS_CREATE_ZERO;
3107 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
3108 if (IS_ERR(handle)) {
3109 ret = PTR_ERR(handle);
3110 return ret;
3111 }
3112 }
3113
3114 ret = ext4_map_blocks(handle, inode, &map, flags);
3115 if (create) {
3116 err = ext4_journal_stop(handle);
3117 if (ret >= 0 && err < 0)
3118 ret = err;
3119 }
3120 if (ret <= 0)
3121 goto out;
3122 if (map.m_flags & EXT4_MAP_UNWRITTEN) {
3123 int err2;
3124
3125 /*
3126 * We are protected by i_mmap_sem so we know block cannot go
3127 * away from under us even though we dropped i_data_sem.
3128 * Convert extent to written and write zeros there.
3129 *
3130 * Note: We may get here even when create == 0.
3131 */
3132 handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
3133 if (IS_ERR(handle)) {
3134 ret = PTR_ERR(handle);
3135 goto out;
3136 }
3137
3138 err = ext4_map_blocks(handle, inode, &map,
3139 EXT4_GET_BLOCKS_CONVERT | EXT4_GET_BLOCKS_CREATE_ZERO);
3140 if (err < 0)
3141 ret = err;
3142 err2 = ext4_journal_stop(handle);
3143 if (err2 < 0 && ret > 0)
3144 ret = err2;
3145 }
3146out:
3147 WARN_ON_ONCE(ret == 0 && create);
3148 if (ret > 0) {
3149 map_bh(bh_result, inode->i_sb, map.m_pblk);
3150 bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
3151 map.m_flags;
3152 /*
3153 * At least for now we have to clear BH_New so that DAX code
3154 * doesn't attempt to zero blocks again in a racy way.
3155 */
3156 bh_result->b_state &= ~(1 << BH_New);
3157 bh_result->b_size = map.m_len << inode->i_blkbits;
3158 ret = 0;
3159 }
3160 return ret;
3110} 3161}
3162#endif
3111 3163
3112static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, 3164static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3113 ssize_t size, void *private) 3165 ssize_t size, void *private)