diff options
author | Zheng Liu <wenqing.lz@taobao.com> | 2012-07-09 16:29:29 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2012-07-09 16:29:29 -0400 |
commit | 729f52c6be51013c9268e5fc85acbc1091286fdb (patch) | |
tree | c3e8a6348ef23fcf15797b9f68905ec666ec43f8 | |
parent | fbe104942d3ff44f6802e8e4a3fbf267c1fb9ac4 (diff) |
ext4: add a new nolock flag in ext4_map_blocks
EXT4_GET_BLOCKS_NO_LOCK flag is added to indicate that we don't need
to acquire i_data_sem lock in ext4_map_blocks. Meanwhile, it changes
ext4_get_block() to not start a new journal because when we do a
overwrite dio, there is no any metadata that needs to be modified.
We define a new function called ext4_get_block_write_nolock, which is
used in dio overwrite nolock. In this function, it doesn't try to
acquire i_data_sem lock and doesn't start a new journal as it does a
lookup.
CC: Tao Ma <tm@tao.ma>
CC: Eric Sandeen <sandeen@redhat.com>
CC: Robin Dong <hao.bigrat@gmail.com>
Signed-off-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
-rw-r--r-- | fs/ext4/ext4.h | 2 | ||||
-rw-r--r-- | fs/ext4/inode.c | 59 |
2 files changed, 51 insertions, 10 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 01434f25917d..4a49f8225d0b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -571,6 +571,8 @@ enum { | |||
571 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 | 571 | #define EXT4_GET_BLOCKS_NO_NORMALIZE 0x0040 |
572 | /* Request will not result in inode size update (user for fallocate) */ | 572 | /* Request will not result in inode size update (user for fallocate) */ |
573 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 | 573 | #define EXT4_GET_BLOCKS_KEEP_SIZE 0x0080 |
574 | /* Do not take i_data_sem locking in ext4_map_blocks */ | ||
575 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 | ||
574 | 576 | ||
575 | /* | 577 | /* |
576 | * Flags used by ext4_free_blocks | 578 | * Flags used by ext4_free_blocks |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 02bc8cbe7281..76cb3b1ad78a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -544,7 +544,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
544 | * Try to see if we can get the block without requesting a new | 544 | * Try to see if we can get the block without requesting a new |
545 | * file system block. | 545 | * file system block. |
546 | */ | 546 | */ |
547 | down_read((&EXT4_I(inode)->i_data_sem)); | 547 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) |
548 | down_read((&EXT4_I(inode)->i_data_sem)); | ||
548 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 549 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
549 | retval = ext4_ext_map_blocks(handle, inode, map, flags & | 550 | retval = ext4_ext_map_blocks(handle, inode, map, flags & |
550 | EXT4_GET_BLOCKS_KEEP_SIZE); | 551 | EXT4_GET_BLOCKS_KEEP_SIZE); |
@@ -552,7 +553,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
552 | retval = ext4_ind_map_blocks(handle, inode, map, flags & | 553 | retval = ext4_ind_map_blocks(handle, inode, map, flags & |
553 | EXT4_GET_BLOCKS_KEEP_SIZE); | 554 | EXT4_GET_BLOCKS_KEEP_SIZE); |
554 | } | 555 | } |
555 | up_read((&EXT4_I(inode)->i_data_sem)); | 556 | if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) |
557 | up_read((&EXT4_I(inode)->i_data_sem)); | ||
556 | 558 | ||
557 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 559 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
558 | int ret = check_block_validity(inode, map); | 560 | int ret = check_block_validity(inode, map); |
@@ -2818,6 +2820,32 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock, | |||
2818 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | 2820 | EXT4_GET_BLOCKS_IO_CREATE_EXT); |
2819 | } | 2821 | } |
2820 | 2822 | ||
2823 | static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock, | ||
2824 | struct buffer_head *bh_result, int flags) | ||
2825 | { | ||
2826 | handle_t *handle = ext4_journal_current_handle(); | ||
2827 | struct ext4_map_blocks map; | ||
2828 | int ret = 0; | ||
2829 | |||
2830 | ext4_debug("ext4_get_block_write_nolock: inode %lu, flag %d\n", | ||
2831 | inode->i_ino, flags); | ||
2832 | |||
2833 | flags = EXT4_GET_BLOCKS_NO_LOCK; | ||
2834 | |||
2835 | map.m_lblk = iblock; | ||
2836 | map.m_len = bh_result->b_size >> inode->i_blkbits; | ||
2837 | |||
2838 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
2839 | if (ret > 0) { | ||
2840 | map_bh(bh_result, inode->i_sb, map.m_pblk); | ||
2841 | bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) | | ||
2842 | map.m_flags; | ||
2843 | bh_result->b_size = inode->i_sb->s_blocksize * map.m_len; | ||
2844 | ret = 0; | ||
2845 | } | ||
2846 | return ret; | ||
2847 | } | ||
2848 | |||
2821 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 2849 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
2822 | ssize_t size, void *private, int ret, | 2850 | ssize_t size, void *private, int ret, |
2823 | bool is_async) | 2851 | bool is_async) |
@@ -2966,6 +2994,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
2966 | 2994 | ||
2967 | loff_t final_size = offset + count; | 2995 | loff_t final_size = offset + count; |
2968 | if (rw == WRITE && final_size <= inode->i_size) { | 2996 | if (rw == WRITE && final_size <= inode->i_size) { |
2997 | int overwrite = 0; | ||
2998 | |||
2969 | /* | 2999 | /* |
2970 | * We could direct write to holes and fallocate. | 3000 | * We could direct write to holes and fallocate. |
2971 | * | 3001 | * |
@@ -3005,13 +3035,22 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3005 | EXT4_I(inode)->cur_aio_dio = iocb->private; | 3035 | EXT4_I(inode)->cur_aio_dio = iocb->private; |
3006 | } | 3036 | } |
3007 | 3037 | ||
3008 | ret = __blockdev_direct_IO(rw, iocb, inode, | 3038 | if (overwrite) |
3009 | inode->i_sb->s_bdev, iov, | 3039 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3010 | offset, nr_segs, | 3040 | inode->i_sb->s_bdev, iov, |
3011 | ext4_get_block_write, | 3041 | offset, nr_segs, |
3012 | ext4_end_io_dio, | 3042 | ext4_get_block_write_nolock, |
3013 | NULL, | 3043 | ext4_end_io_dio, |
3014 | DIO_LOCKING); | 3044 | NULL, |
3045 | 0); | ||
3046 | else | ||
3047 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
3048 | inode->i_sb->s_bdev, iov, | ||
3049 | offset, nr_segs, | ||
3050 | ext4_get_block_write, | ||
3051 | ext4_end_io_dio, | ||
3052 | NULL, | ||
3053 | DIO_LOCKING); | ||
3015 | if (iocb->private) | 3054 | if (iocb->private) |
3016 | EXT4_I(inode)->cur_aio_dio = NULL; | 3055 | EXT4_I(inode)->cur_aio_dio = NULL; |
3017 | /* | 3056 | /* |
@@ -3031,7 +3070,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3031 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3070 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { |
3032 | ext4_free_io_end(iocb->private); | 3071 | ext4_free_io_end(iocb->private); |
3033 | iocb->private = NULL; | 3072 | iocb->private = NULL; |
3034 | } else if (ret > 0 && ext4_test_inode_state(inode, | 3073 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, |
3035 | EXT4_STATE_DIO_UNWRITTEN)) { | 3074 | EXT4_STATE_DIO_UNWRITTEN)) { |
3036 | int err; | 3075 | int err; |
3037 | /* | 3076 | /* |