diff options
-rw-r--r-- | fs/ext4/inode.c | 211 |
1 files changed, 103 insertions, 108 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cf5d30a7cce3..91a24967b8ae 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -2927,10 +2927,10 @@ retry: | |||
2927 | * fall back to buffered IO. | 2927 | * fall back to buffered IO. |
2928 | * | 2928 | * |
2929 | * For holes, we fallocate those blocks, mark them as uninitialized | 2929 | * For holes, we fallocate those blocks, mark them as uninitialized |
2930 | * If those blocks were preallocated, we mark sure they are splited, but | 2930 | * If those blocks were preallocated, we mark sure they are split, but |
2931 | * still keep the range to write as uninitialized. | 2931 | * still keep the range to write as uninitialized. |
2932 | * | 2932 | * |
2933 | * The unwrritten extents will be converted to written when DIO is completed. | 2933 | * The unwritten extents will be converted to written when DIO is completed. |
2934 | * For async direct IO, since the IO may still pending when return, we | 2934 | * For async direct IO, since the IO may still pending when return, we |
2935 | * set up an end_io call back function, which will do the conversion | 2935 | * set up an end_io call back function, which will do the conversion |
2936 | * when async direct IO completed. | 2936 | * when async direct IO completed. |
@@ -2948,125 +2948,120 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
2948 | struct inode *inode = file->f_mapping->host; | 2948 | struct inode *inode = file->f_mapping->host; |
2949 | ssize_t ret; | 2949 | ssize_t ret; |
2950 | size_t count = iov_length(iov, nr_segs); | 2950 | size_t count = iov_length(iov, nr_segs); |
2951 | 2951 | int overwrite = 0; | |
2952 | get_block_t *get_block_func = NULL; | ||
2953 | int dio_flags = 0; | ||
2952 | loff_t final_size = offset + count; | 2954 | loff_t final_size = offset + count; |
2953 | if (rw == WRITE && final_size <= inode->i_size) { | ||
2954 | int overwrite = 0; | ||
2955 | get_block_t *get_block_func = NULL; | ||
2956 | int dio_flags = 0; | ||
2957 | 2955 | ||
2958 | BUG_ON(iocb->private == NULL); | 2956 | /* Use the old path for reads and writes beyond i_size. */ |
2957 | if (rw != WRITE || final_size > inode->i_size) | ||
2958 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | ||
2959 | 2959 | ||
2960 | /* If we do a overwrite dio, i_mutex locking can be released */ | 2960 | BUG_ON(iocb->private == NULL); |
2961 | overwrite = *((int *)iocb->private); | ||
2962 | 2961 | ||
2963 | if (overwrite) { | 2962 | /* If we do a overwrite dio, i_mutex locking can be released */ |
2964 | atomic_inc(&inode->i_dio_count); | 2963 | overwrite = *((int *)iocb->private); |
2965 | down_read(&EXT4_I(inode)->i_data_sem); | ||
2966 | mutex_unlock(&inode->i_mutex); | ||
2967 | } | ||
2968 | 2964 | ||
2969 | /* | 2965 | if (overwrite) { |
2970 | * We could direct write to holes and fallocate. | 2966 | atomic_inc(&inode->i_dio_count); |
2971 | * | 2967 | down_read(&EXT4_I(inode)->i_data_sem); |
2972 | * Allocated blocks to fill the hole are marked as uninitialized | 2968 | mutex_unlock(&inode->i_mutex); |
2973 | * to prevent parallel buffered read to expose the stale data | 2969 | } |
2974 | * before DIO complete the data IO. | ||
2975 | * | ||
2976 | * As to previously fallocated extents, ext4 get_block | ||
2977 | * will just simply mark the buffer mapped but still | ||
2978 | * keep the extents uninitialized. | ||
2979 | * | ||
2980 | * for non AIO case, we will convert those unwritten extents | ||
2981 | * to written after return back from blockdev_direct_IO. | ||
2982 | * | ||
2983 | * for async DIO, the conversion needs to be defered when | ||
2984 | * the IO is completed. The ext4 end_io callback function | ||
2985 | * will be called to take care of the conversion work. | ||
2986 | * Here for async case, we allocate an io_end structure to | ||
2987 | * hook to the iocb. | ||
2988 | */ | ||
2989 | iocb->private = NULL; | ||
2990 | ext4_inode_aio_set(inode, NULL); | ||
2991 | if (!is_sync_kiocb(iocb)) { | ||
2992 | ext4_io_end_t *io_end = | ||
2993 | ext4_init_io_end(inode, GFP_NOFS); | ||
2994 | if (!io_end) { | ||
2995 | ret = -ENOMEM; | ||
2996 | goto retake_lock; | ||
2997 | } | ||
2998 | io_end->flag |= EXT4_IO_END_DIRECT; | ||
2999 | iocb->private = io_end; | ||
3000 | /* | ||
3001 | * we save the io structure for current async | ||
3002 | * direct IO, so that later ext4_map_blocks() | ||
3003 | * could flag the io structure whether there | ||
3004 | * is a unwritten extents needs to be converted | ||
3005 | * when IO is completed. | ||
3006 | */ | ||
3007 | ext4_inode_aio_set(inode, io_end); | ||
3008 | } | ||
3009 | 2970 | ||
3010 | if (overwrite) { | 2971 | /* |
3011 | get_block_func = ext4_get_block_write_nolock; | 2972 | * We could direct write to holes and fallocate. |
3012 | } else { | 2973 | * |
3013 | get_block_func = ext4_get_block_write; | 2974 | * Allocated blocks to fill the hole are marked as |
3014 | dio_flags = DIO_LOCKING; | 2975 | * uninitialized to prevent parallel buffered read to expose |
2976 | * the stale data before DIO complete the data IO. | ||
2977 | * | ||
2978 | * As to previously fallocated extents, ext4 get_block will | ||
2979 | * just simply mark the buffer mapped but still keep the | ||
2980 | * extents uninitialized. | ||
2981 | * | ||
2982 | * For non AIO case, we will convert those unwritten extents | ||
2983 | * to written after return back from blockdev_direct_IO. | ||
2984 | * | ||
2985 | * For async DIO, the conversion needs to be deferred when the | ||
2986 | * IO is completed. The ext4 end_io callback function will be | ||
2987 | * called to take care of the conversion work. Here for async | ||
2988 | * case, we allocate an io_end structure to hook to the iocb. | ||
2989 | */ | ||
2990 | iocb->private = NULL; | ||
2991 | ext4_inode_aio_set(inode, NULL); | ||
2992 | if (!is_sync_kiocb(iocb)) { | ||
2993 | ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
2994 | if (!io_end) { | ||
2995 | ret = -ENOMEM; | ||
2996 | goto retake_lock; | ||
3015 | } | 2997 | } |
3016 | ret = __blockdev_direct_IO(rw, iocb, inode, | 2998 | io_end->flag |= EXT4_IO_END_DIRECT; |
3017 | inode->i_sb->s_bdev, iov, | 2999 | iocb->private = io_end; |
3018 | offset, nr_segs, | ||
3019 | get_block_func, | ||
3020 | ext4_end_io_dio, | ||
3021 | NULL, | ||
3022 | dio_flags); | ||
3023 | |||
3024 | if (iocb->private) | ||
3025 | ext4_inode_aio_set(inode, NULL); | ||
3026 | /* | 3000 | /* |
3027 | * The io_end structure takes a reference to the inode, | 3001 | * we save the io structure for current async direct |
3028 | * that structure needs to be destroyed and the | 3002 | * IO, so that later ext4_map_blocks() could flag the |
3029 | * reference to the inode need to be dropped, when IO is | 3003 | * io structure whether there is a unwritten extents |
3030 | * complete, even with 0 byte write, or failed. | 3004 | * needs to be converted when IO is completed. |
3031 | * | ||
3032 | * In the successful AIO DIO case, the io_end structure will be | ||
3033 | * desctroyed and the reference to the inode will be dropped | ||
3034 | * after the end_io call back function is called. | ||
3035 | * | ||
3036 | * In the case there is 0 byte write, or error case, since | ||
3037 | * VFS direct IO won't invoke the end_io call back function, | ||
3038 | * we need to free the end_io structure here. | ||
3039 | */ | 3005 | */ |
3040 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3006 | ext4_inode_aio_set(inode, io_end); |
3041 | ext4_free_io_end(iocb->private); | 3007 | } |
3042 | iocb->private = NULL; | ||
3043 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
3044 | EXT4_STATE_DIO_UNWRITTEN)) { | ||
3045 | int err; | ||
3046 | /* | ||
3047 | * for non AIO case, since the IO is already | ||
3048 | * completed, we could do the conversion right here | ||
3049 | */ | ||
3050 | err = ext4_convert_unwritten_extents(inode, | ||
3051 | offset, ret); | ||
3052 | if (err < 0) | ||
3053 | ret = err; | ||
3054 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | ||
3055 | } | ||
3056 | 3008 | ||
3057 | retake_lock: | 3009 | if (overwrite) { |
3058 | /* take i_mutex locking again if we do a ovewrite dio */ | 3010 | get_block_func = ext4_get_block_write_nolock; |
3059 | if (overwrite) { | 3011 | } else { |
3060 | inode_dio_done(inode); | 3012 | get_block_func = ext4_get_block_write; |
3061 | up_read(&EXT4_I(inode)->i_data_sem); | 3013 | dio_flags = DIO_LOCKING; |
3062 | mutex_lock(&inode->i_mutex); | 3014 | } |
3063 | } | 3015 | ret = __blockdev_direct_IO(rw, iocb, inode, |
3016 | inode->i_sb->s_bdev, iov, | ||
3017 | offset, nr_segs, | ||
3018 | get_block_func, | ||
3019 | ext4_end_io_dio, | ||
3020 | NULL, | ||
3021 | dio_flags); | ||
3022 | |||
3023 | if (iocb->private) | ||
3024 | ext4_inode_aio_set(inode, NULL); | ||
3025 | /* | ||
3026 | * The io_end structure takes a reference to the inode, that | ||
3027 | * structure needs to be destroyed and the reference to the | ||
3028 | * inode need to be dropped, when IO is complete, even with 0 | ||
3029 | * byte write, or failed. | ||
3030 | * | ||
3031 | * In the successful AIO DIO case, the io_end structure will | ||
3032 | * be destroyed and the reference to the inode will be dropped | ||
3033 | * after the end_io call back function is called. | ||
3034 | * | ||
3035 | * In the case there is 0 byte write, or error case, since VFS | ||
3036 | * direct IO won't invoke the end_io call back function, we | ||
3037 | * need to free the end_io structure here. | ||
3038 | */ | ||
3039 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | ||
3040 | ext4_free_io_end(iocb->private); | ||
3041 | iocb->private = NULL; | ||
3042 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
3043 | EXT4_STATE_DIO_UNWRITTEN)) { | ||
3044 | int err; | ||
3045 | /* | ||
3046 | * for non AIO case, since the IO is already | ||
3047 | * completed, we could do the conversion right here | ||
3048 | */ | ||
3049 | err = ext4_convert_unwritten_extents(inode, | ||
3050 | offset, ret); | ||
3051 | if (err < 0) | ||
3052 | ret = err; | ||
3053 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | ||
3054 | } | ||
3064 | 3055 | ||
3065 | return ret; | 3056 | retake_lock: |
3057 | /* take i_mutex locking again if we do a ovewrite dio */ | ||
3058 | if (overwrite) { | ||
3059 | inode_dio_done(inode); | ||
3060 | up_read(&EXT4_I(inode)->i_data_sem); | ||
3061 | mutex_lock(&inode->i_mutex); | ||
3066 | } | 3062 | } |
3067 | 3063 | ||
3068 | /* for write the the end of file case, we fall back to old way */ | 3064 | return ret; |
3069 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | ||
3070 | } | 3065 | } |
3071 | 3066 | ||
3072 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | 3067 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, |