diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 402 |
1 files changed, 267 insertions, 135 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index aee960b1af34..dab84a2530ff 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -216,7 +216,6 @@ void ext4_evict_inode(struct inode *inode) | |||
216 | } | 216 | } |
217 | truncate_inode_pages_final(&inode->i_data); | 217 | truncate_inode_pages_final(&inode->i_data); |
218 | 218 | ||
219 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
220 | goto no_delete; | 219 | goto no_delete; |
221 | } | 220 | } |
222 | 221 | ||
@@ -228,8 +227,6 @@ void ext4_evict_inode(struct inode *inode) | |||
228 | ext4_begin_ordered_truncate(inode, 0); | 227 | ext4_begin_ordered_truncate(inode, 0); |
229 | truncate_inode_pages_final(&inode->i_data); | 228 | truncate_inode_pages_final(&inode->i_data); |
230 | 229 | ||
231 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
232 | |||
233 | /* | 230 | /* |
234 | * Protect us against freezing - iput() caller didn't have to have any | 231 | * Protect us against freezing - iput() caller didn't have to have any |
235 | * protection against it | 232 | * protection against it |
@@ -458,13 +455,13 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, | |||
458 | * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping | 455 | * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping |
459 | * based files | 456 | * based files |
460 | * | 457 | * |
461 | * On success, it returns the number of blocks being mapped or allocated. | 458 | * On success, it returns the number of blocks being mapped or allocated. if |
462 | * if create==0 and the blocks are pre-allocated and unwritten block, | 459 | * create==0 and the blocks are pre-allocated and unwritten, the resulting @map |
463 | * the result buffer head is unmapped. If the create ==1, it will make sure | 460 | * is marked as unwritten. If the create == 1, it will mark @map as mapped. |
464 | * the buffer head is mapped. | ||
465 | * | 461 | * |
466 | * It returns 0 if plain look up failed (blocks have not been allocated), in | 462 | * It returns 0 if plain look up failed (blocks have not been allocated), in |
467 | * that case, buffer head is unmapped | 463 | * that case, @map is returned as unmapped but we still do fill map->m_len to |
464 | * indicate the length of a hole starting at map->m_lblk. | ||
468 | * | 465 | * |
469 | * It returns the error in case of allocation failure. | 466 | * It returns the error in case of allocation failure. |
470 | */ | 467 | */ |
@@ -507,6 +504,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
507 | retval = map->m_len; | 504 | retval = map->m_len; |
508 | map->m_len = retval; | 505 | map->m_len = retval; |
509 | } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { | 506 | } else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) { |
507 | map->m_pblk = 0; | ||
508 | retval = es.es_len - (map->m_lblk - es.es_lblk); | ||
509 | if (retval > map->m_len) | ||
510 | retval = map->m_len; | ||
511 | map->m_len = retval; | ||
510 | retval = 0; | 512 | retval = 0; |
511 | } else { | 513 | } else { |
512 | BUG_ON(1); | 514 | BUG_ON(1); |
@@ -714,16 +716,11 @@ static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags) | |||
714 | cmpxchg(&bh->b_state, old_state, new_state) != old_state)); | 716 | cmpxchg(&bh->b_state, old_state, new_state) != old_state)); |
715 | } | 717 | } |
716 | 718 | ||
717 | /* Maximum number of blocks we map for direct IO at once. */ | ||
718 | #define DIO_MAX_BLOCKS 4096 | ||
719 | |||
720 | static int _ext4_get_block(struct inode *inode, sector_t iblock, | 719 | static int _ext4_get_block(struct inode *inode, sector_t iblock, |
721 | struct buffer_head *bh, int flags) | 720 | struct buffer_head *bh, int flags) |
722 | { | 721 | { |
723 | handle_t *handle = ext4_journal_current_handle(); | ||
724 | struct ext4_map_blocks map; | 722 | struct ext4_map_blocks map; |
725 | int ret = 0, started = 0; | 723 | int ret = 0; |
726 | int dio_credits; | ||
727 | 724 | ||
728 | if (ext4_has_inline_data(inode)) | 725 | if (ext4_has_inline_data(inode)) |
729 | return -ERANGE; | 726 | return -ERANGE; |
@@ -731,33 +728,14 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock, | |||
731 | map.m_lblk = iblock; | 728 | map.m_lblk = iblock; |
732 | map.m_len = bh->b_size >> inode->i_blkbits; | 729 | map.m_len = bh->b_size >> inode->i_blkbits; |
733 | 730 | ||
734 | if (flags && !handle) { | 731 | ret = ext4_map_blocks(ext4_journal_current_handle(), inode, &map, |
735 | /* Direct IO write... */ | 732 | flags); |
736 | if (map.m_len > DIO_MAX_BLOCKS) | ||
737 | map.m_len = DIO_MAX_BLOCKS; | ||
738 | dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); | ||
739 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | ||
740 | dio_credits); | ||
741 | if (IS_ERR(handle)) { | ||
742 | ret = PTR_ERR(handle); | ||
743 | return ret; | ||
744 | } | ||
745 | started = 1; | ||
746 | } | ||
747 | |||
748 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
749 | if (ret > 0) { | 733 | if (ret > 0) { |
750 | ext4_io_end_t *io_end = ext4_inode_aio(inode); | ||
751 | |||
752 | map_bh(bh, inode->i_sb, map.m_pblk); | 734 | map_bh(bh, inode->i_sb, map.m_pblk); |
753 | ext4_update_bh_state(bh, map.m_flags); | 735 | ext4_update_bh_state(bh, map.m_flags); |
754 | if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN) | ||
755 | set_buffer_defer_completion(bh); | ||
756 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; | 736 | bh->b_size = inode->i_sb->s_blocksize * map.m_len; |
757 | ret = 0; | 737 | ret = 0; |
758 | } | 738 | } |
759 | if (started) | ||
760 | ext4_journal_stop(handle); | ||
761 | return ret; | 739 | return ret; |
762 | } | 740 | } |
763 | 741 | ||
@@ -769,6 +747,155 @@ int ext4_get_block(struct inode *inode, sector_t iblock, | |||
769 | } | 747 | } |
770 | 748 | ||
771 | /* | 749 | /* |
750 | * Get block function used when preparing for buffered write if we require | ||
751 | * creating an unwritten extent if blocks haven't been allocated. The extent | ||
752 | * will be converted to written after the IO is complete. | ||
753 | */ | ||
754 | int ext4_get_block_unwritten(struct inode *inode, sector_t iblock, | ||
755 | struct buffer_head *bh_result, int create) | ||
756 | { | ||
757 | ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n", | ||
758 | inode->i_ino, create); | ||
759 | return _ext4_get_block(inode, iblock, bh_result, | ||
760 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | ||
761 | } | ||
762 | |||
763 | /* Maximum number of blocks we map for direct IO at once. */ | ||
764 | #define DIO_MAX_BLOCKS 4096 | ||
765 | |||
766 | static handle_t *start_dio_trans(struct inode *inode, | ||
767 | struct buffer_head *bh_result) | ||
768 | { | ||
769 | int dio_credits; | ||
770 | |||
771 | /* Trim mapping request to maximum we can map at once for DIO */ | ||
772 | if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS) | ||
773 | bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits; | ||
774 | dio_credits = ext4_chunk_trans_blocks(inode, | ||
775 | bh_result->b_size >> inode->i_blkbits); | ||
776 | return ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits); | ||
777 | } | ||
778 | |||
779 | /* Get block function for DIO reads and writes to inodes without extents */ | ||
780 | int ext4_dio_get_block(struct inode *inode, sector_t iblock, | ||
781 | struct buffer_head *bh, int create) | ||
782 | { | ||
783 | handle_t *handle; | ||
784 | int ret; | ||
785 | |||
786 | /* We don't expect handle for direct IO */ | ||
787 | WARN_ON_ONCE(ext4_journal_current_handle()); | ||
788 | |||
789 | if (create) { | ||
790 | handle = start_dio_trans(inode, bh); | ||
791 | if (IS_ERR(handle)) | ||
792 | return PTR_ERR(handle); | ||
793 | } | ||
794 | ret = _ext4_get_block(inode, iblock, bh, | ||
795 | create ? EXT4_GET_BLOCKS_CREATE : 0); | ||
796 | if (create) | ||
797 | ext4_journal_stop(handle); | ||
798 | return ret; | ||
799 | } | ||
800 | |||
801 | /* | ||
802 | * Get block function for AIO DIO writes when we create unwritten extent if | ||
803 | * blocks are not allocated yet. The extent will be converted to written | ||
804 | * after IO is complete. | ||
805 | */ | ||
806 | static int ext4_dio_get_block_unwritten_async(struct inode *inode, | ||
807 | sector_t iblock, struct buffer_head *bh_result, int create) | ||
808 | { | ||
809 | handle_t *handle; | ||
810 | int ret; | ||
811 | |||
812 | /* We don't expect handle for direct IO */ | ||
813 | WARN_ON_ONCE(ext4_journal_current_handle()); | ||
814 | |||
815 | handle = start_dio_trans(inode, bh_result); | ||
816 | if (IS_ERR(handle)) | ||
817 | return PTR_ERR(handle); | ||
818 | ret = _ext4_get_block(inode, iblock, bh_result, | ||
819 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | ||
820 | ext4_journal_stop(handle); | ||
821 | |||
822 | /* | ||
823 | * When doing DIO using unwritten extents, we need io_end to convert | ||
824 | * unwritten extents to written on IO completion. We allocate io_end | ||
825 | * once we spot unwritten extent and store it in b_private. Generic | ||
826 | * DIO code keeps b_private set and furthermore passes the value to | ||
827 | * our completion callback in 'private' argument. | ||
828 | */ | ||
829 | if (!ret && buffer_unwritten(bh_result)) { | ||
830 | if (!bh_result->b_private) { | ||
831 | ext4_io_end_t *io_end; | ||
832 | |||
833 | io_end = ext4_init_io_end(inode, GFP_KERNEL); | ||
834 | if (!io_end) | ||
835 | return -ENOMEM; | ||
836 | bh_result->b_private = io_end; | ||
837 | ext4_set_io_unwritten_flag(inode, io_end); | ||
838 | } | ||
839 | set_buffer_defer_completion(bh_result); | ||
840 | } | ||
841 | |||
842 | return ret; | ||
843 | } | ||
844 | |||
845 | /* | ||
846 | * Get block function for non-AIO DIO writes when we create unwritten extent if | ||
847 | * blocks are not allocated yet. The extent will be converted to written | ||
848 | * after IO is complete from ext4_ext_direct_IO() function. | ||
849 | */ | ||
850 | static int ext4_dio_get_block_unwritten_sync(struct inode *inode, | ||
851 | sector_t iblock, struct buffer_head *bh_result, int create) | ||
852 | { | ||
853 | handle_t *handle; | ||
854 | int ret; | ||
855 | |||
856 | /* We don't expect handle for direct IO */ | ||
857 | WARN_ON_ONCE(ext4_journal_current_handle()); | ||
858 | |||
859 | handle = start_dio_trans(inode, bh_result); | ||
860 | if (IS_ERR(handle)) | ||
861 | return PTR_ERR(handle); | ||
862 | ret = _ext4_get_block(inode, iblock, bh_result, | ||
863 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | ||
864 | ext4_journal_stop(handle); | ||
865 | |||
866 | /* | ||
867 | * Mark inode as having pending DIO writes to unwritten extents. | ||
868 | * ext4_ext_direct_IO() checks this flag and converts extents to | ||
869 | * written. | ||
870 | */ | ||
871 | if (!ret && buffer_unwritten(bh_result)) | ||
872 | ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | ||
873 | |||
874 | return ret; | ||
875 | } | ||
876 | |||
877 | static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock, | ||
878 | struct buffer_head *bh_result, int create) | ||
879 | { | ||
880 | int ret; | ||
881 | |||
882 | ext4_debug("ext4_dio_get_block_overwrite: inode %lu, create flag %d\n", | ||
883 | inode->i_ino, create); | ||
884 | /* We don't expect handle for direct IO */ | ||
885 | WARN_ON_ONCE(ext4_journal_current_handle()); | ||
886 | |||
887 | ret = _ext4_get_block(inode, iblock, bh_result, 0); | ||
888 | /* | ||
889 | * Blocks should have been preallocated! ext4_file_write_iter() checks | ||
890 | * that. | ||
891 | */ | ||
892 | WARN_ON_ONCE(!buffer_mapped(bh_result) || buffer_unwritten(bh_result)); | ||
893 | |||
894 | return ret; | ||
895 | } | ||
896 | |||
897 | |||
898 | /* | ||
772 | * `handle' can be NULL if create is zero | 899 | * `handle' can be NULL if create is zero |
773 | */ | 900 | */ |
774 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | 901 | struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, |
@@ -1079,13 +1206,14 @@ retry_journal: | |||
1079 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | 1206 | #ifdef CONFIG_EXT4_FS_ENCRYPTION |
1080 | if (ext4_should_dioread_nolock(inode)) | 1207 | if (ext4_should_dioread_nolock(inode)) |
1081 | ret = ext4_block_write_begin(page, pos, len, | 1208 | ret = ext4_block_write_begin(page, pos, len, |
1082 | ext4_get_block_write); | 1209 | ext4_get_block_unwritten); |
1083 | else | 1210 | else |
1084 | ret = ext4_block_write_begin(page, pos, len, | 1211 | ret = ext4_block_write_begin(page, pos, len, |
1085 | ext4_get_block); | 1212 | ext4_get_block); |
1086 | #else | 1213 | #else |
1087 | if (ext4_should_dioread_nolock(inode)) | 1214 | if (ext4_should_dioread_nolock(inode)) |
1088 | ret = __block_write_begin(page, pos, len, ext4_get_block_write); | 1215 | ret = __block_write_begin(page, pos, len, |
1216 | ext4_get_block_unwritten); | ||
1089 | else | 1217 | else |
1090 | ret = __block_write_begin(page, pos, len, ext4_get_block); | 1218 | ret = __block_write_begin(page, pos, len, ext4_get_block); |
1091 | #endif | 1219 | #endif |
@@ -3088,37 +3216,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3088 | return try_to_free_buffers(page); | 3216 | return try_to_free_buffers(page); |
3089 | } | 3217 | } |
3090 | 3218 | ||
3091 | /* | ||
3092 | * ext4_get_block used when preparing for a DIO write or buffer write. | ||
3093 | * We allocate an uinitialized extent if blocks haven't been allocated. | ||
3094 | * The extent will be converted to initialized after the IO is complete. | ||
3095 | */ | ||
3096 | int ext4_get_block_write(struct inode *inode, sector_t iblock, | ||
3097 | struct buffer_head *bh_result, int create) | ||
3098 | { | ||
3099 | ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n", | ||
3100 | inode->i_ino, create); | ||
3101 | return _ext4_get_block(inode, iblock, bh_result, | ||
3102 | EXT4_GET_BLOCKS_IO_CREATE_EXT); | ||
3103 | } | ||
3104 | |||
3105 | static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock, | ||
3106 | struct buffer_head *bh_result, int create) | ||
3107 | { | ||
3108 | int ret; | ||
3109 | |||
3110 | ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n", | ||
3111 | inode->i_ino, create); | ||
3112 | ret = _ext4_get_block(inode, iblock, bh_result, 0); | ||
3113 | /* | ||
3114 | * Blocks should have been preallocated! ext4_file_write_iter() checks | ||
3115 | * that. | ||
3116 | */ | ||
3117 | WARN_ON_ONCE(!buffer_mapped(bh_result)); | ||
3118 | |||
3119 | return ret; | ||
3120 | } | ||
3121 | |||
3122 | #ifdef CONFIG_FS_DAX | 3219 | #ifdef CONFIG_FS_DAX |
3123 | int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, | 3220 | int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock, |
3124 | struct buffer_head *bh_result, int create) | 3221 | struct buffer_head *bh_result, int create) |
@@ -3179,13 +3276,12 @@ out: | |||
3179 | WARN_ON_ONCE(ret == 0 && create); | 3276 | WARN_ON_ONCE(ret == 0 && create); |
3180 | if (ret > 0) { | 3277 | if (ret > 0) { |
3181 | map_bh(bh_result, inode->i_sb, map.m_pblk); | 3278 | map_bh(bh_result, inode->i_sb, map.m_pblk); |
3182 | bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) | | ||
3183 | map.m_flags; | ||
3184 | /* | 3279 | /* |
3185 | * At least for now we have to clear BH_New so that DAX code | 3280 | * At least for now we have to clear BH_New so that DAX code |
3186 | * doesn't attempt to zero blocks again in a racy way. | 3281 | * doesn't attempt to zero blocks again in a racy way. |
3187 | */ | 3282 | */ |
3188 | bh_result->b_state &= ~(1 << BH_New); | 3283 | map.m_flags &= ~EXT4_MAP_NEW; |
3284 | ext4_update_bh_state(bh_result, map.m_flags); | ||
3189 | bh_result->b_size = map.m_len << inode->i_blkbits; | 3285 | bh_result->b_size = map.m_len << inode->i_blkbits; |
3190 | ret = 0; | 3286 | ret = 0; |
3191 | } | 3287 | } |
@@ -3193,24 +3289,32 @@ out: | |||
3193 | } | 3289 | } |
3194 | #endif | 3290 | #endif |
3195 | 3291 | ||
3196 | static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | 3292 | static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, |
3197 | ssize_t size, void *private) | 3293 | ssize_t size, void *private) |
3198 | { | 3294 | { |
3199 | ext4_io_end_t *io_end = iocb->private; | 3295 | ext4_io_end_t *io_end = private; |
3200 | 3296 | ||
3201 | /* if not async direct IO just return */ | 3297 | /* if not async direct IO just return */ |
3202 | if (!io_end) | 3298 | if (!io_end) |
3203 | return; | 3299 | return 0; |
3204 | 3300 | ||
3205 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 3301 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
3206 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", | 3302 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
3207 | iocb->private, io_end->inode->i_ino, iocb, offset, | 3303 | io_end, io_end->inode->i_ino, iocb, offset, size); |
3208 | size); | ||
3209 | 3304 | ||
3210 | iocb->private = NULL; | 3305 | /* |
3306 | * Error during AIO DIO. We cannot convert unwritten extents as the | ||
3307 | * data was not written. Just clear the unwritten flag and drop io_end. | ||
3308 | */ | ||
3309 | if (size <= 0) { | ||
3310 | ext4_clear_io_unwritten_flag(io_end); | ||
3311 | size = 0; | ||
3312 | } | ||
3211 | io_end->offset = offset; | 3313 | io_end->offset = offset; |
3212 | io_end->size = size; | 3314 | io_end->size = size; |
3213 | ext4_put_io_end(io_end); | 3315 | ext4_put_io_end(io_end); |
3316 | |||
3317 | return 0; | ||
3214 | } | 3318 | } |
3215 | 3319 | ||
3216 | /* | 3320 | /* |
@@ -3243,7 +3347,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3243 | get_block_t *get_block_func = NULL; | 3347 | get_block_t *get_block_func = NULL; |
3244 | int dio_flags = 0; | 3348 | int dio_flags = 0; |
3245 | loff_t final_size = offset + count; | 3349 | loff_t final_size = offset + count; |
3246 | ext4_io_end_t *io_end = NULL; | ||
3247 | 3350 | ||
3248 | /* Use the old path for reads and writes beyond i_size. */ | 3351 | /* Use the old path for reads and writes beyond i_size. */ |
3249 | if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) | 3352 | if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size) |
@@ -3268,16 +3371,17 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3268 | /* | 3371 | /* |
3269 | * We could direct write to holes and fallocate. | 3372 | * We could direct write to holes and fallocate. |
3270 | * | 3373 | * |
3271 | * Allocated blocks to fill the hole are marked as | 3374 | * Allocated blocks to fill the hole are marked as unwritten to prevent |
3272 | * unwritten to prevent parallel buffered read to expose | 3375 | * parallel buffered read to expose the stale data before DIO complete |
3273 | * the stale data before DIO complete the data IO. | 3376 | * the data IO. |
3274 | * | 3377 | * |
3275 | * As to previously fallocated extents, ext4 get_block will | 3378 | * As to previously fallocated extents, ext4 get_block will just simply |
3276 | * just simply mark the buffer mapped but still keep the | 3379 | * mark the buffer mapped but still keep the extents unwritten. |
3277 | * extents unwritten. | ||
3278 | * | 3380 | * |
3279 | * For non AIO case, we will convert those unwritten extents | 3381 | * For non AIO case, we will convert those unwritten extents to written |
3280 | * to written after return back from blockdev_direct_IO. | 3382 | * after return back from blockdev_direct_IO. That way we save us from |
3383 | * allocating io_end structure and also the overhead of offloading | ||
3384 | * the extent convertion to a workqueue. | ||
3281 | * | 3385 | * |
3282 | * For async DIO, the conversion needs to be deferred when the | 3386 | * For async DIO, the conversion needs to be deferred when the |
3283 | * IO is completed. The ext4 end_io callback function will be | 3387 | * IO is completed. The ext4 end_io callback function will be |
@@ -3285,30 +3389,13 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3285 | * case, we allocate an io_end structure to hook to the iocb. | 3389 | * case, we allocate an io_end structure to hook to the iocb. |
3286 | */ | 3390 | */ |
3287 | iocb->private = NULL; | 3391 | iocb->private = NULL; |
3288 | if (overwrite) { | 3392 | if (overwrite) |
3289 | get_block_func = ext4_get_block_overwrite; | 3393 | get_block_func = ext4_dio_get_block_overwrite; |
3394 | else if (is_sync_kiocb(iocb)) { | ||
3395 | get_block_func = ext4_dio_get_block_unwritten_sync; | ||
3396 | dio_flags = DIO_LOCKING; | ||
3290 | } else { | 3397 | } else { |
3291 | ext4_inode_aio_set(inode, NULL); | 3398 | get_block_func = ext4_dio_get_block_unwritten_async; |
3292 | if (!is_sync_kiocb(iocb)) { | ||
3293 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
3294 | if (!io_end) { | ||
3295 | ret = -ENOMEM; | ||
3296 | goto retake_lock; | ||
3297 | } | ||
3298 | /* | ||
3299 | * Grab reference for DIO. Will be dropped in | ||
3300 | * ext4_end_io_dio() | ||
3301 | */ | ||
3302 | iocb->private = ext4_get_io_end(io_end); | ||
3303 | /* | ||
3304 | * we save the io structure for current async direct | ||
3305 | * IO, so that later ext4_map_blocks() could flag the | ||
3306 | * io structure whether there is a unwritten extents | ||
3307 | * needs to be converted when IO is completed. | ||
3308 | */ | ||
3309 | ext4_inode_aio_set(inode, io_end); | ||
3310 | } | ||
3311 | get_block_func = ext4_get_block_write; | ||
3312 | dio_flags = DIO_LOCKING; | 3399 | dio_flags = DIO_LOCKING; |
3313 | } | 3400 | } |
3314 | #ifdef CONFIG_EXT4_FS_ENCRYPTION | 3401 | #ifdef CONFIG_EXT4_FS_ENCRYPTION |
@@ -3323,27 +3410,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3323 | get_block_func, | 3410 | get_block_func, |
3324 | ext4_end_io_dio, NULL, dio_flags); | 3411 | ext4_end_io_dio, NULL, dio_flags); |
3325 | 3412 | ||
3326 | /* | ||
3327 | * Put our reference to io_end. This can free the io_end structure e.g. | ||
3328 | * in sync IO case or in case of error. It can even perform extent | ||
3329 | * conversion if all bios we submitted finished before we got here. | ||
3330 | * Note that in that case iocb->private can be already set to NULL | ||
3331 | * here. | ||
3332 | */ | ||
3333 | if (io_end) { | ||
3334 | ext4_inode_aio_set(inode, NULL); | ||
3335 | ext4_put_io_end(io_end); | ||
3336 | /* | ||
3337 | * When no IO was submitted ext4_end_io_dio() was not | ||
3338 | * called so we have to put iocb's reference. | ||
3339 | */ | ||
3340 | if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { | ||
3341 | WARN_ON(iocb->private != io_end); | ||
3342 | WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | ||
3343 | ext4_put_io_end(io_end); | ||
3344 | iocb->private = NULL; | ||
3345 | } | ||
3346 | } | ||
3347 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | 3413 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, |
3348 | EXT4_STATE_DIO_UNWRITTEN)) { | 3414 | EXT4_STATE_DIO_UNWRITTEN)) { |
3349 | int err; | 3415 | int err; |
@@ -3358,7 +3424,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter, | |||
3358 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); | 3424 | ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); |
3359 | } | 3425 | } |
3360 | 3426 | ||
3361 | retake_lock: | ||
3362 | if (iov_iter_rw(iter) == WRITE) | 3427 | if (iov_iter_rw(iter) == WRITE) |
3363 | inode_dio_end(inode); | 3428 | inode_dio_end(inode); |
3364 | /* take i_mutex locking again if we do a ovewrite dio */ | 3429 | /* take i_mutex locking again if we do a ovewrite dio */ |
@@ -5261,6 +5326,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5261 | might_sleep(); | 5326 | might_sleep(); |
5262 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); | 5327 | trace_ext4_mark_inode_dirty(inode, _RET_IP_); |
5263 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 5328 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
5329 | if (err) | ||
5330 | return err; | ||
5264 | if (ext4_handle_valid(handle) && | 5331 | if (ext4_handle_valid(handle) && |
5265 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && | 5332 | EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && |
5266 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { | 5333 | !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { |
@@ -5291,9 +5358,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) | |||
5291 | } | 5358 | } |
5292 | } | 5359 | } |
5293 | } | 5360 | } |
5294 | if (!err) | 5361 | return ext4_mark_iloc_dirty(handle, inode, &iloc); |
5295 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); | ||
5296 | return err; | ||
5297 | } | 5362 | } |
5298 | 5363 | ||
5299 | /* | 5364 | /* |
@@ -5502,7 +5567,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5502 | unlock_page(page); | 5567 | unlock_page(page); |
5503 | /* OK, we need to fill the hole... */ | 5568 | /* OK, we need to fill the hole... */ |
5504 | if (ext4_should_dioread_nolock(inode)) | 5569 | if (ext4_should_dioread_nolock(inode)) |
5505 | get_block = ext4_get_block_write; | 5570 | get_block = ext4_get_block_unwritten; |
5506 | else | 5571 | else |
5507 | get_block = ext4_get_block; | 5572 | get_block = ext4_get_block; |
5508 | retry_alloc: | 5573 | retry_alloc: |
@@ -5545,3 +5610,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5545 | 5610 | ||
5546 | return err; | 5611 | return err; |
5547 | } | 5612 | } |
5613 | |||
5614 | /* | ||
5615 | * Find the first extent at or after @lblk in an inode that is not a hole. | ||
5616 | * Search for @map_len blocks at most. The extent is returned in @result. | ||
5617 | * | ||
5618 | * The function returns 1 if we found an extent. The function returns 0 in | ||
5619 | * case there is no extent at or after @lblk and in that case also sets | ||
5620 | * @result->es_len to 0. In case of error, the error code is returned. | ||
5621 | */ | ||
5622 | int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk, | ||
5623 | unsigned int map_len, struct extent_status *result) | ||
5624 | { | ||
5625 | struct ext4_map_blocks map; | ||
5626 | struct extent_status es = {}; | ||
5627 | int ret; | ||
5628 | |||
5629 | map.m_lblk = lblk; | ||
5630 | map.m_len = map_len; | ||
5631 | |||
5632 | /* | ||
5633 | * For non-extent based files this loop may iterate several times since | ||
5634 | * we do not determine full hole size. | ||
5635 | */ | ||
5636 | while (map.m_len > 0) { | ||
5637 | ret = ext4_map_blocks(NULL, inode, &map, 0); | ||
5638 | if (ret < 0) | ||
5639 | return ret; | ||
5640 | /* There's extent covering m_lblk? Just return it. */ | ||
5641 | if (ret > 0) { | ||
5642 | int status; | ||
5643 | |||
5644 | ext4_es_store_pblock(result, map.m_pblk); | ||
5645 | result->es_lblk = map.m_lblk; | ||
5646 | result->es_len = map.m_len; | ||
5647 | if (map.m_flags & EXT4_MAP_UNWRITTEN) | ||
5648 | status = EXTENT_STATUS_UNWRITTEN; | ||
5649 | else | ||
5650 | status = EXTENT_STATUS_WRITTEN; | ||
5651 | ext4_es_store_status(result, status); | ||
5652 | return 1; | ||
5653 | } | ||
5654 | ext4_es_find_delayed_extent_range(inode, map.m_lblk, | ||
5655 | map.m_lblk + map.m_len - 1, | ||
5656 | &es); | ||
5657 | /* Is delalloc data before next block in extent tree? */ | ||
5658 | if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) { | ||
5659 | ext4_lblk_t offset = 0; | ||
5660 | |||
5661 | if (es.es_lblk < lblk) | ||
5662 | offset = lblk - es.es_lblk; | ||
5663 | result->es_lblk = es.es_lblk + offset; | ||
5664 | ext4_es_store_pblock(result, | ||
5665 | ext4_es_pblock(&es) + offset); | ||
5666 | result->es_len = es.es_len - offset; | ||
5667 | ext4_es_store_status(result, ext4_es_status(&es)); | ||
5668 | |||
5669 | return 1; | ||
5670 | } | ||
5671 | /* There's a hole at m_lblk, advance us after it */ | ||
5672 | map.m_lblk += map.m_len; | ||
5673 | map_len -= map.m_len; | ||
5674 | map.m_len = map_len; | ||
5675 | cond_resched(); | ||
5676 | } | ||
5677 | result->es_len = 0; | ||
5678 | return 0; | ||
5679 | } | ||