aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/inode.c211
1 files changed, 103 insertions, 108 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cf5d30a7cce3..91a24967b8ae 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2927,10 +2927,10 @@ retry:
2927 * fall back to buffered IO. 2927 * fall back to buffered IO.
2928 * 2928 *
2929 * For holes, we fallocate those blocks, mark them as uninitialized 2929 * For holes, we fallocate those blocks, mark them as uninitialized
2930 * If those blocks were preallocated, we mark sure they are splited, but 2930 * If those blocks were preallocated, we mark sure they are split, but
2931 * still keep the range to write as uninitialized. 2931 * still keep the range to write as uninitialized.
2932 * 2932 *
2933 * The unwrritten extents will be converted to written when DIO is completed. 2933 * The unwritten extents will be converted to written when DIO is completed.
2934 * For async direct IO, since the IO may still pending when return, we 2934 * For async direct IO, since the IO may still pending when return, we
2935 * set up an end_io call back function, which will do the conversion 2935 * set up an end_io call back function, which will do the conversion
2936 * when async direct IO completed. 2936 * when async direct IO completed.
@@ -2948,125 +2948,120 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
2948 struct inode *inode = file->f_mapping->host; 2948 struct inode *inode = file->f_mapping->host;
2949 ssize_t ret; 2949 ssize_t ret;
2950 size_t count = iov_length(iov, nr_segs); 2950 size_t count = iov_length(iov, nr_segs);
2951 2951 int overwrite = 0;
2952 get_block_t *get_block_func = NULL;
2953 int dio_flags = 0;
2952 loff_t final_size = offset + count; 2954 loff_t final_size = offset + count;
2953 if (rw == WRITE && final_size <= inode->i_size) {
2954 int overwrite = 0;
2955 get_block_t *get_block_func = NULL;
2956 int dio_flags = 0;
2957 2955
2958 BUG_ON(iocb->private == NULL); 2956 /* Use the old path for reads and writes beyond i_size. */
2957 if (rw != WRITE || final_size > inode->i_size)
2958 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
2959 2959
2960 /* If we do a overwrite dio, i_mutex locking can be released */ 2960 BUG_ON(iocb->private == NULL);
2961 overwrite = *((int *)iocb->private);
2962 2961
2963 if (overwrite) { 2962 /* If we do a overwrite dio, i_mutex locking can be released */
2964 atomic_inc(&inode->i_dio_count); 2963 overwrite = *((int *)iocb->private);
2965 down_read(&EXT4_I(inode)->i_data_sem);
2966 mutex_unlock(&inode->i_mutex);
2967 }
2968 2964
2969 /* 2965 if (overwrite) {
2970 * We could direct write to holes and fallocate. 2966 atomic_inc(&inode->i_dio_count);
2971 * 2967 down_read(&EXT4_I(inode)->i_data_sem);
2972 * Allocated blocks to fill the hole are marked as uninitialized 2968 mutex_unlock(&inode->i_mutex);
2973 * to prevent parallel buffered read to expose the stale data 2969 }
2974 * before DIO complete the data IO.
2975 *
2976 * As to previously fallocated extents, ext4 get_block
2977 * will just simply mark the buffer mapped but still
2978 * keep the extents uninitialized.
2979 *
2980 * for non AIO case, we will convert those unwritten extents
2981 * to written after return back from blockdev_direct_IO.
2982 *
2983 * for async DIO, the conversion needs to be defered when
2984 * the IO is completed. The ext4 end_io callback function
2985 * will be called to take care of the conversion work.
2986 * Here for async case, we allocate an io_end structure to
2987 * hook to the iocb.
2988 */
2989 iocb->private = NULL;
2990 ext4_inode_aio_set(inode, NULL);
2991 if (!is_sync_kiocb(iocb)) {
2992 ext4_io_end_t *io_end =
2993 ext4_init_io_end(inode, GFP_NOFS);
2994 if (!io_end) {
2995 ret = -ENOMEM;
2996 goto retake_lock;
2997 }
2998 io_end->flag |= EXT4_IO_END_DIRECT;
2999 iocb->private = io_end;
3000 /*
3001 * we save the io structure for current async
3002 * direct IO, so that later ext4_map_blocks()
3003 * could flag the io structure whether there
3004 * is a unwritten extents needs to be converted
3005 * when IO is completed.
3006 */
3007 ext4_inode_aio_set(inode, io_end);
3008 }
3009 2970
3010 if (overwrite) { 2971 /*
3011 get_block_func = ext4_get_block_write_nolock; 2972 * We could direct write to holes and fallocate.
3012 } else { 2973 *
3013 get_block_func = ext4_get_block_write; 2974 * Allocated blocks to fill the hole are marked as
3014 dio_flags = DIO_LOCKING; 2975 * uninitialized to prevent parallel buffered read to expose
2976 * the stale data before DIO complete the data IO.
2977 *
2978 * As to previously fallocated extents, ext4 get_block will
2979 * just simply mark the buffer mapped but still keep the
2980 * extents uninitialized.
2981 *
2982 * For non AIO case, we will convert those unwritten extents
2983 * to written after return back from blockdev_direct_IO.
2984 *
2985 * For async DIO, the conversion needs to be deferred when the
2986 * IO is completed. The ext4 end_io callback function will be
2987 * called to take care of the conversion work. Here for async
2988 * case, we allocate an io_end structure to hook to the iocb.
2989 */
2990 iocb->private = NULL;
2991 ext4_inode_aio_set(inode, NULL);
2992 if (!is_sync_kiocb(iocb)) {
2993 ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS);
2994 if (!io_end) {
2995 ret = -ENOMEM;
2996 goto retake_lock;
3015 } 2997 }
3016 ret = __blockdev_direct_IO(rw, iocb, inode, 2998 io_end->flag |= EXT4_IO_END_DIRECT;
3017 inode->i_sb->s_bdev, iov, 2999 iocb->private = io_end;
3018 offset, nr_segs,
3019 get_block_func,
3020 ext4_end_io_dio,
3021 NULL,
3022 dio_flags);
3023
3024 if (iocb->private)
3025 ext4_inode_aio_set(inode, NULL);
3026 /* 3000 /*
3027 * The io_end structure takes a reference to the inode, 3001 * we save the io structure for current async direct
3028 * that structure needs to be destroyed and the 3002 * IO, so that later ext4_map_blocks() could flag the
3029 * reference to the inode need to be dropped, when IO is 3003 * io structure whether there is a unwritten extents
3030 * complete, even with 0 byte write, or failed. 3004 * needs to be converted when IO is completed.
3031 *
3032 * In the successful AIO DIO case, the io_end structure will be
3033 * desctroyed and the reference to the inode will be dropped
3034 * after the end_io call back function is called.
3035 *
3036 * In the case there is 0 byte write, or error case, since
3037 * VFS direct IO won't invoke the end_io call back function,
3038 * we need to free the end_io structure here.
3039 */ 3005 */
3040 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3006 ext4_inode_aio_set(inode, io_end);
3041 ext4_free_io_end(iocb->private); 3007 }
3042 iocb->private = NULL;
3043 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3044 EXT4_STATE_DIO_UNWRITTEN)) {
3045 int err;
3046 /*
3047 * for non AIO case, since the IO is already
3048 * completed, we could do the conversion right here
3049 */
3050 err = ext4_convert_unwritten_extents(inode,
3051 offset, ret);
3052 if (err < 0)
3053 ret = err;
3054 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3055 }
3056 3008
3057 retake_lock: 3009 if (overwrite) {
3058 /* take i_mutex locking again if we do a ovewrite dio */ 3010 get_block_func = ext4_get_block_write_nolock;
3059 if (overwrite) { 3011 } else {
3060 inode_dio_done(inode); 3012 get_block_func = ext4_get_block_write;
3061 up_read(&EXT4_I(inode)->i_data_sem); 3013 dio_flags = DIO_LOCKING;
3062 mutex_lock(&inode->i_mutex); 3014 }
3063 } 3015 ret = __blockdev_direct_IO(rw, iocb, inode,
3016 inode->i_sb->s_bdev, iov,
3017 offset, nr_segs,
3018 get_block_func,
3019 ext4_end_io_dio,
3020 NULL,
3021 dio_flags);
3022
3023 if (iocb->private)
3024 ext4_inode_aio_set(inode, NULL);
3025 /*
3026 * The io_end structure takes a reference to the inode, that
3027 * structure needs to be destroyed and the reference to the
3028 * inode need to be dropped, when IO is complete, even with 0
3029 * byte write, or failed.
3030 *
3031 * In the successful AIO DIO case, the io_end structure will
3032 * be destroyed and the reference to the inode will be dropped
3033 * after the end_io call back function is called.
3034 *
3035 * In the case there is 0 byte write, or error case, since VFS
3036 * direct IO won't invoke the end_io call back function, we
3037 * need to free the end_io structure here.
3038 */
3039 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3040 ext4_free_io_end(iocb->private);
3041 iocb->private = NULL;
3042 } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
3043 EXT4_STATE_DIO_UNWRITTEN)) {
3044 int err;
3045 /*
3046 * for non AIO case, since the IO is already
3047 * completed, we could do the conversion right here
3048 */
3049 err = ext4_convert_unwritten_extents(inode,
3050 offset, ret);
3051 if (err < 0)
3052 ret = err;
3053 ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
3054 }
3064 3055
3065 return ret; 3056retake_lock:
3057 /* take i_mutex locking again if we do a ovewrite dio */
3058 if (overwrite) {
3059 inode_dio_done(inode);
3060 up_read(&EXT4_I(inode)->i_data_sem);
3061 mutex_lock(&inode->i_mutex);
3066 } 3062 }
3067 3063
3068 /* for write the the end of file case, we fall back to old way */ 3064 return ret;
3069 return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs);
3070} 3065}
3071 3066
3072static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, 3067static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,