aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/move_extent.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/move_extent.c')
-rw-r--r--fs/ext4/move_extent.c520
1 files changed, 316 insertions, 204 deletions
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index c5826c623e7a..292daeeed455 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -141,55 +141,21 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
141} 141}
142 142
143/** 143/**
144 * mext_check_null_inode - NULL check for two inodes
145 *
146 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
147 */
148static int
149mext_check_null_inode(struct inode *inode1, struct inode *inode2,
150 const char *function, unsigned int line)
151{
152 int ret = 0;
153
154 if (inode1 == NULL) {
155 __ext4_error(inode2->i_sb, function, line,
156 "Both inodes should not be NULL: "
157 "inode1 NULL inode2 %lu", inode2->i_ino);
158 ret = -EIO;
159 } else if (inode2 == NULL) {
160 __ext4_error(inode1->i_sb, function, line,
161 "Both inodes should not be NULL: "
162 "inode1 %lu inode2 NULL", inode1->i_ino);
163 ret = -EIO;
164 }
165 return ret;
166}
167
168/**
169 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem 144 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
170 * 145 *
171 * @orig_inode: original inode structure 146 * Acquire write lock of i_data_sem of the two inodes
172 * @donor_inode: donor inode structure
173 * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
174 * i_ino order.
175 */ 147 */
176static void 148static void
177double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) 149double_down_write_data_sem(struct inode *first, struct inode *second)
178{ 150{
179 struct inode *first = orig_inode, *second = donor_inode; 151 if (first < second) {
152 down_write(&EXT4_I(first)->i_data_sem);
153 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
154 } else {
155 down_write(&EXT4_I(second)->i_data_sem);
156 down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
180 157
181 /*
182 * Use the inode number to provide the stable locking order instead
183 * of its address, because the C language doesn't guarantee you can
184 * compare pointers that don't come from the same array.
185 */
186 if (donor_inode->i_ino < orig_inode->i_ino) {
187 first = donor_inode;
188 second = orig_inode;
189 } 158 }
190
191 down_write(&EXT4_I(first)->i_data_sem);
192 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
193} 159}
194 160
195/** 161/**
@@ -604,9 +570,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
604 diff = donor_off - le32_to_cpu(tmp_dext->ee_block); 570 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
605 571
606 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); 572 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
607 tmp_dext->ee_block = 573 le32_add_cpu(&tmp_dext->ee_block, diff);
608 cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); 574 le16_add_cpu(&tmp_dext->ee_len, -diff);
609 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
610 575
611 if (max_count < ext4_ext_get_actual_len(tmp_dext)) 576 if (max_count < ext4_ext_get_actual_len(tmp_dext))
612 tmp_dext->ee_len = cpu_to_le16(max_count); 577 tmp_dext->ee_len = cpu_to_le16(max_count);
@@ -629,6 +594,43 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
629} 594}
630 595
631/** 596/**
597 * mext_check_coverage - Check that all extents in range has the same type
598 *
599 * @inode: inode in question
600 * @from: block offset of inode
601 * @count: block count to be checked
602 * @uninit: extents expected to be uninitialized
603 * @err: pointer to save error value
604 *
605 * Return 1 if all extents in range has expected type, and zero otherwise.
606 */
607static int
608mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
609 int uninit, int *err)
610{
611 struct ext4_ext_path *path = NULL;
612 struct ext4_extent *ext;
613 ext4_lblk_t last = from + count;
614 while (from < last) {
615 *err = get_ext_path(inode, from, &path);
616 if (*err)
617 return 0;
618 ext = path[ext_depth(inode)].p_ext;
619 if (!ext) {
620 ext4_ext_drop_refs(path);
621 return 0;
622 }
623 if (uninit != ext4_ext_is_uninitialized(ext)) {
624 ext4_ext_drop_refs(path);
625 return 0;
626 }
627 from += ext4_ext_get_actual_len(ext);
628 ext4_ext_drop_refs(path);
629 }
630 return 1;
631}
632
633/**
632 * mext_replace_branches - Replace original extents with new extents 634 * mext_replace_branches - Replace original extents with new extents
633 * 635 *
634 * @handle: journal handle 636 * @handle: journal handle
@@ -663,9 +665,6 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
663 int replaced_count = 0; 665 int replaced_count = 0;
664 int dext_alen; 666 int dext_alen;
665 667
666 /* Protect extent trees against block allocations via delalloc */
667 double_down_write_data_sem(orig_inode, donor_inode);
668
669 /* Get the original extent for the block "orig_off" */ 668 /* Get the original extent for the block "orig_off" */
670 *err = get_ext_path(orig_inode, orig_off, &orig_path); 669 *err = get_ext_path(orig_inode, orig_off, &orig_path);
671 if (*err) 670 if (*err)
@@ -764,12 +763,122 @@ out:
764 ext4_ext_invalidate_cache(orig_inode); 763 ext4_ext_invalidate_cache(orig_inode);
765 ext4_ext_invalidate_cache(donor_inode); 764 ext4_ext_invalidate_cache(donor_inode);
766 765
767 double_up_write_data_sem(orig_inode, donor_inode);
768
769 return replaced_count; 766 return replaced_count;
770} 767}
771 768
772/** 769/**
770 * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
771 *
772 * @inode1: the inode structure
773 * @inode2: the inode structure
774 * @index: page index
775 * @page: result page vector
776 *
777 * Grab two locked pages for inode's by inode order
778 */
779static int
780mext_page_double_lock(struct inode *inode1, struct inode *inode2,
781 pgoff_t index, struct page *page[2])
782{
783 struct address_space *mapping[2];
784 unsigned fl = AOP_FLAG_NOFS;
785
786 BUG_ON(!inode1 || !inode2);
787 if (inode1 < inode2) {
788 mapping[0] = inode1->i_mapping;
789 mapping[1] = inode2->i_mapping;
790 } else {
791 mapping[0] = inode2->i_mapping;
792 mapping[1] = inode1->i_mapping;
793 }
794
795 page[0] = grab_cache_page_write_begin(mapping[0], index, fl);
796 if (!page[0])
797 return -ENOMEM;
798
799 page[1] = grab_cache_page_write_begin(mapping[1], index, fl);
800 if (!page[1]) {
801 unlock_page(page[0]);
802 page_cache_release(page[0]);
803 return -ENOMEM;
804 }
805
806 if (inode1 > inode2) {
807 struct page *tmp;
808 tmp = page[0];
809 page[0] = page[1];
810 page[1] = tmp;
811 }
812 return 0;
813}
814
815/* Force page buffers uptodate w/o dropping page's lock */
816static int
817mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
818{
819 struct inode *inode = page->mapping->host;
820 sector_t block;
821 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
822 unsigned int blocksize, block_start, block_end;
823 int i, err, nr = 0, partial = 0;
824 BUG_ON(!PageLocked(page));
825 BUG_ON(PageWriteback(page));
826
827 if (PageUptodate(page))
828 return 0;
829
830 blocksize = 1 << inode->i_blkbits;
831 if (!page_has_buffers(page))
832 create_empty_buffers(page, blocksize, 0);
833
834 head = page_buffers(page);
835 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
836 for (bh = head, block_start = 0; bh != head || !block_start;
837 block++, block_start = block_end, bh = bh->b_this_page) {
838 block_end = block_start + blocksize;
839 if (block_end <= from || block_start >= to) {
840 if (!buffer_uptodate(bh))
841 partial = 1;
842 continue;
843 }
844 if (buffer_uptodate(bh))
845 continue;
846 if (!buffer_mapped(bh)) {
847 int err = 0;
848 err = ext4_get_block(inode, block, bh, 0);
849 if (err) {
850 SetPageError(page);
851 return err;
852 }
853 if (!buffer_mapped(bh)) {
854 zero_user(page, block_start, blocksize);
855 if (!err)
856 set_buffer_uptodate(bh);
857 continue;
858 }
859 }
860 BUG_ON(nr >= MAX_BUF_PER_PAGE);
861 arr[nr++] = bh;
862 }
863 /* No io required */
864 if (!nr)
865 goto out;
866
867 for (i = 0; i < nr; i++) {
868 bh = arr[i];
869 if (!bh_uptodate_or_lock(bh)) {
870 err = bh_submit_read(bh);
871 if (err)
872 return err;
873 }
874 }
875out:
876 if (!partial)
877 SetPageUptodate(page);
878 return 0;
879}
880
881/**
773 * move_extent_per_page - Move extent data per page 882 * move_extent_per_page - Move extent data per page
774 * 883 *
775 * @o_filp: file structure of original file 884 * @o_filp: file structure of original file
@@ -791,26 +900,24 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
791 int block_len_in_page, int uninit, int *err) 900 int block_len_in_page, int uninit, int *err)
792{ 901{
793 struct inode *orig_inode = o_filp->f_dentry->d_inode; 902 struct inode *orig_inode = o_filp->f_dentry->d_inode;
794 struct address_space *mapping = orig_inode->i_mapping; 903 struct page *pagep[2] = {NULL, NULL};
795 struct buffer_head *bh;
796 struct page *page = NULL;
797 const struct address_space_operations *a_ops = mapping->a_ops;
798 handle_t *handle; 904 handle_t *handle;
799 ext4_lblk_t orig_blk_offset; 905 ext4_lblk_t orig_blk_offset;
800 long long offs = orig_page_offset << PAGE_CACHE_SHIFT; 906 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
801 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 907 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
802 unsigned int w_flags = 0; 908 unsigned int w_flags = 0;
803 unsigned int tmp_data_size, data_size, replaced_size; 909 unsigned int tmp_data_size, data_size, replaced_size;
804 void *fsdata; 910 int err2, jblocks, retries = 0;
805 int i, jblocks;
806 int err2 = 0;
807 int replaced_count = 0; 911 int replaced_count = 0;
912 int from = data_offset_in_page << orig_inode->i_blkbits;
808 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 913 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
809 914
810 /* 915 /*
811 * It needs twice the amount of ordinary journal buffers because 916 * It needs twice the amount of ordinary journal buffers because
812 * inode and donor_inode may change each different metadata blocks. 917 * inode and donor_inode may change each different metadata blocks.
813 */ 918 */
919again:
920 *err = 0;
814 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 921 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
815 handle = ext4_journal_start(orig_inode, jblocks); 922 handle = ext4_journal_start(orig_inode, jblocks);
816 if (IS_ERR(handle)) { 923 if (IS_ERR(handle)) {
@@ -824,19 +931,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
824 orig_blk_offset = orig_page_offset * blocks_per_page + 931 orig_blk_offset = orig_page_offset * blocks_per_page +
825 data_offset_in_page; 932 data_offset_in_page;
826 933
827 /*
828 * If orig extent is uninitialized one,
829 * it's not necessary force the page into memory
830 * and then force it to be written out again.
831 * Just swap data blocks between orig and donor.
832 */
833 if (uninit) {
834 replaced_count = mext_replace_branches(handle, orig_inode,
835 donor_inode, orig_blk_offset,
836 block_len_in_page, err);
837 goto out2;
838 }
839
840 offs = (long long)orig_blk_offset << orig_inode->i_blkbits; 934 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
841 935
842 /* Calculate data_size */ 936 /* Calculate data_size */
@@ -858,75 +952,120 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
858 952
859 replaced_size = data_size; 953 replaced_size = data_size;
860 954
861 *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, 955 *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
862 &page, &fsdata); 956 pagep);
863 if (unlikely(*err < 0)) 957 if (unlikely(*err < 0))
864 goto out; 958 goto stop_journal;
865
866 if (!PageUptodate(page)) {
867 mapping->a_ops->readpage(o_filp, page);
868 lock_page(page);
869 }
870
871 /* 959 /*
872 * try_to_release_page() doesn't call releasepage in writeback mode. 960 * If orig extent was uninitialized it can become initialized
873 * We should care about the order of writing to the same file 961 * at any time after i_data_sem was dropped, in order to
874 * by multiple move extent processes. 962 * serialize with delalloc we have recheck extent while we
875 * It needs to call wait_on_page_writeback() to wait for the 963 * hold page's lock, if it is still the case data copy is not
876 * writeback of the page. 964 * necessary, just swap data blocks between orig and donor.
877 */ 965 */
878 wait_on_page_writeback(page); 966 if (uninit) {
967 double_down_write_data_sem(orig_inode, donor_inode);
968 /* If any of extents in range became initialized we have to
969 * fallback to data copying */
970 uninit = mext_check_coverage(orig_inode, orig_blk_offset,
971 block_len_in_page, 1, err);
972 if (*err)
973 goto drop_data_sem;
879 974
880 /* Release old bh and drop refs */ 975 uninit &= mext_check_coverage(donor_inode, orig_blk_offset,
881 try_to_release_page(page, 0); 976 block_len_in_page, 1, err);
977 if (*err)
978 goto drop_data_sem;
979
980 if (!uninit) {
981 double_up_write_data_sem(orig_inode, donor_inode);
982 goto data_copy;
983 }
984 if ((page_has_private(pagep[0]) &&
985 !try_to_release_page(pagep[0], 0)) ||
986 (page_has_private(pagep[1]) &&
987 !try_to_release_page(pagep[1], 0))) {
988 *err = -EBUSY;
989 goto drop_data_sem;
990 }
991 replaced_count = mext_replace_branches(handle, orig_inode,
992 donor_inode, orig_blk_offset,
993 block_len_in_page, err);
994 drop_data_sem:
995 double_up_write_data_sem(orig_inode, donor_inode);
996 goto unlock_pages;
997 }
998data_copy:
999 *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
1000 if (*err)
1001 goto unlock_pages;
1002
1003 /* At this point all buffers in range are uptodate, old mapping layout
1004 * is no longer required, try to drop it now. */
1005 if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
1006 (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
1007 *err = -EBUSY;
1008 goto unlock_pages;
1009 }
882 1010
883 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, 1011 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
884 orig_blk_offset, block_len_in_page, 1012 orig_blk_offset,
885 &err2); 1013 block_len_in_page, err);
886 if (err2) { 1014 if (*err) {
887 if (replaced_count) { 1015 if (replaced_count) {
888 block_len_in_page = replaced_count; 1016 block_len_in_page = replaced_count;
889 replaced_size = 1017 replaced_size =
890 block_len_in_page << orig_inode->i_blkbits; 1018 block_len_in_page << orig_inode->i_blkbits;
891 } else 1019 } else
892 goto out; 1020 goto unlock_pages;
893 } 1021 }
1022 /* Perform all necessary steps similar write_begin()/write_end()
1023 * but keeping in mind that i_size will not change */
1024 *err = __block_write_begin(pagep[0], from, from + replaced_size,
1025 ext4_get_block);
1026 if (!*err)
1027 *err = block_commit_write(pagep[0], from, from + replaced_size);
894 1028
895 if (!page_has_buffers(page)) 1029 if (unlikely(*err < 0))
896 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); 1030 goto repair_branches;
897 1031
898 bh = page_buffers(page); 1032 /* Even in case of data=writeback it is reasonable to pin
899 for (i = 0; i < data_offset_in_page; i++) 1033 * inode to transaction, to prevent unexpected data loss */
900 bh = bh->b_this_page; 1034 *err = ext4_jbd2_file_inode(handle, orig_inode);
901 1035
902 for (i = 0; i < block_len_in_page; i++) { 1036unlock_pages:
903 *err = ext4_get_block(orig_inode, 1037 unlock_page(pagep[0]);
904 (sector_t)(orig_blk_offset + i), bh, 0); 1038 page_cache_release(pagep[0]);
905 if (*err < 0) 1039 unlock_page(pagep[1]);
906 goto out; 1040 page_cache_release(pagep[1]);
907 1041stop_journal:
908 if (bh->b_this_page != NULL)
909 bh = bh->b_this_page;
910 }
911
912 *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
913 page, fsdata);
914 page = NULL;
915
916out:
917 if (unlikely(page)) {
918 if (PageLocked(page))
919 unlock_page(page);
920 page_cache_release(page);
921 ext4_journal_stop(handle);
922 }
923out2:
924 ext4_journal_stop(handle); 1042 ext4_journal_stop(handle);
925 1043 /* Buffer was busy because probably is pinned to journal transaction,
926 if (err2) 1044 * force transaction commit may help to free it. */
927 *err = err2; 1045 if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
928 1046 &retries))
1047 goto again;
929 return replaced_count; 1048 return replaced_count;
1049
1050repair_branches:
1051 /*
1052 * This should never ever happen!
1053 * Extents are swapped already, but we are not able to copy data.
1054 * Try to swap extents to it's original places
1055 */
1056 double_down_write_data_sem(orig_inode, donor_inode);
1057 replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
1058 orig_blk_offset,
1059 block_len_in_page, &err2);
1060 double_up_write_data_sem(orig_inode, donor_inode);
1061 if (replaced_count != block_len_in_page) {
1062 EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
1063 "Unable to copy data block,"
1064 " data will be lost.");
1065 *err = -EIO;
1066 }
1067 replaced_count = 0;
1068 goto unlock_pages;
930} 1069}
931 1070
932/** 1071/**
@@ -969,14 +1108,6 @@ mext_check_arguments(struct inode *orig_inode,
969 return -EINVAL; 1108 return -EINVAL;
970 } 1109 }
971 1110
972 /* Files should be in the same ext4 FS */
973 if (orig_inode->i_sb != donor_inode->i_sb) {
974 ext4_debug("ext4 move extent: The argument files "
975 "should be in same FS [ino:orig %lu, donor %lu]\n",
976 orig_inode->i_ino, donor_inode->i_ino);
977 return -EINVAL;
978 }
979
980 /* Ext4 move extent supports only extent based file */ 1111 /* Ext4 move extent supports only extent based file */
981 if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { 1112 if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
982 ext4_debug("ext4 move extent: orig file is not extents " 1113 ext4_debug("ext4 move extent: orig file is not extents "
@@ -1002,7 +1133,6 @@ mext_check_arguments(struct inode *orig_inode,
1002 } 1133 }
1003 1134
1004 if ((orig_start >= EXT_MAX_BLOCKS) || 1135 if ((orig_start >= EXT_MAX_BLOCKS) ||
1005 (donor_start >= EXT_MAX_BLOCKS) ||
1006 (*len > EXT_MAX_BLOCKS) || 1136 (*len > EXT_MAX_BLOCKS) ||
1007 (orig_start + *len >= EXT_MAX_BLOCKS)) { 1137 (orig_start + *len >= EXT_MAX_BLOCKS)) {
1008 ext4_debug("ext4 move extent: Can't handle over [%u] blocks " 1138 ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
@@ -1072,35 +1202,19 @@ mext_check_arguments(struct inode *orig_inode,
1072 * @inode1: the inode structure 1202 * @inode1: the inode structure
1073 * @inode2: the inode structure 1203 * @inode2: the inode structure
1074 * 1204 *
1075 * Lock two inodes' i_mutex by i_ino order. 1205 * Lock two inodes' i_mutex
1076 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
1077 */ 1206 */
1078static int 1207static void
1079mext_inode_double_lock(struct inode *inode1, struct inode *inode2) 1208mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
1080{ 1209{
1081 int ret = 0; 1210 BUG_ON(inode1 == inode2);
1082 1211 if (inode1 < inode2) {
1083 BUG_ON(inode1 == NULL && inode2 == NULL);
1084
1085 ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1086 if (ret < 0)
1087 goto out;
1088
1089 if (inode1 == inode2) {
1090 mutex_lock(&inode1->i_mutex);
1091 goto out;
1092 }
1093
1094 if (inode1->i_ino < inode2->i_ino) {
1095 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); 1212 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
1096 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); 1213 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
1097 } else { 1214 } else {
1098 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); 1215 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
1099 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); 1216 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
1100 } 1217 }
1101
1102out:
1103 return ret;
1104} 1218}
1105 1219
1106/** 1220/**
@@ -1109,28 +1223,13 @@ out:
1109 * @inode1: the inode that is released first 1223 * @inode1: the inode that is released first
1110 * @inode2: the inode that is released second 1224 * @inode2: the inode that is released second
1111 * 1225 *
1112 * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
1113 */ 1226 */
1114 1227
1115static int 1228static void
1116mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) 1229mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
1117{ 1230{
1118 int ret = 0; 1231 mutex_unlock(&inode1->i_mutex);
1119 1232 mutex_unlock(&inode2->i_mutex);
1120 BUG_ON(inode1 == NULL && inode2 == NULL);
1121
1122 ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
1123 if (ret < 0)
1124 goto out;
1125
1126 if (inode1)
1127 mutex_unlock(&inode1->i_mutex);
1128
1129 if (inode2 && inode2 != inode1)
1130 mutex_unlock(&inode2->i_mutex);
1131
1132out:
1133 return ret;
1134} 1233}
1135 1234
1136/** 1235/**
@@ -1187,16 +1286,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1187 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0; 1286 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
1188 ext4_lblk_t rest_blocks; 1287 ext4_lblk_t rest_blocks;
1189 pgoff_t orig_page_offset = 0, seq_end_page; 1288 pgoff_t orig_page_offset = 0, seq_end_page;
1190 int ret1, ret2, depth, last_extent = 0; 1289 int ret, depth, last_extent = 0;
1191 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 1290 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1192 int data_offset_in_page; 1291 int data_offset_in_page;
1193 int block_len_in_page; 1292 int block_len_in_page;
1194 int uninit; 1293 int uninit;
1195 1294
1196 /* orig and donor should be different file */ 1295 if (orig_inode->i_sb != donor_inode->i_sb) {
1197 if (orig_inode->i_ino == donor_inode->i_ino) { 1296 ext4_debug("ext4 move extent: The argument files "
1297 "should be in same FS [ino:orig %lu, donor %lu]\n",
1298 orig_inode->i_ino, donor_inode->i_ino);
1299 return -EINVAL;
1300 }
1301
1302 /* orig and donor should be different inodes */
1303 if (orig_inode == donor_inode) {
1198 ext4_debug("ext4 move extent: The argument files should not " 1304 ext4_debug("ext4 move extent: The argument files should not "
1199 "be same file [ino:orig %lu, donor %lu]\n", 1305 "be same inode [ino:orig %lu, donor %lu]\n",
1200 orig_inode->i_ino, donor_inode->i_ino); 1306 orig_inode->i_ino, donor_inode->i_ino);
1201 return -EINVAL; 1307 return -EINVAL;
1202 } 1308 }
@@ -1208,18 +1314,27 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1208 orig_inode->i_ino, donor_inode->i_ino); 1314 orig_inode->i_ino, donor_inode->i_ino);
1209 return -EINVAL; 1315 return -EINVAL;
1210 } 1316 }
1211 1317 /* TODO: This is non obvious task to swap blocks for inodes with full
1318 jornaling enabled */
1319 if (ext4_should_journal_data(orig_inode) ||
1320 ext4_should_journal_data(donor_inode)) {
1321 return -EINVAL;
1322 }
1212 /* Protect orig and donor inodes against a truncate */ 1323 /* Protect orig and donor inodes against a truncate */
1213 ret1 = mext_inode_double_lock(orig_inode, donor_inode); 1324 mext_inode_double_lock(orig_inode, donor_inode);
1214 if (ret1 < 0) 1325
1215 return ret1; 1326 /* Wait for all existing dio workers */
1327 ext4_inode_block_unlocked_dio(orig_inode);
1328 ext4_inode_block_unlocked_dio(donor_inode);
1329 inode_dio_wait(orig_inode);
1330 inode_dio_wait(donor_inode);
1216 1331
1217 /* Protect extent tree against block allocations via delalloc */ 1332 /* Protect extent tree against block allocations via delalloc */
1218 double_down_write_data_sem(orig_inode, donor_inode); 1333 double_down_write_data_sem(orig_inode, donor_inode);
1219 /* Check the filesystem environment whether move_extent can be done */ 1334 /* Check the filesystem environment whether move_extent can be done */
1220 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, 1335 ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
1221 donor_start, &len); 1336 donor_start, &len);
1222 if (ret1) 1337 if (ret)
1223 goto out; 1338 goto out;
1224 1339
1225 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; 1340 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
@@ -1227,13 +1342,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1227 if (file_end < block_end) 1342 if (file_end < block_end)
1228 len -= block_end - file_end; 1343 len -= block_end - file_end;
1229 1344
1230 ret1 = get_ext_path(orig_inode, block_start, &orig_path); 1345 ret = get_ext_path(orig_inode, block_start, &orig_path);
1231 if (ret1) 1346 if (ret)
1232 goto out; 1347 goto out;
1233 1348
1234 /* Get path structure to check the hole */ 1349 /* Get path structure to check the hole */
1235 ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); 1350 ret = get_ext_path(orig_inode, block_start, &holecheck_path);
1236 if (ret1) 1351 if (ret)
1237 goto out; 1352 goto out;
1238 1353
1239 depth = ext_depth(orig_inode); 1354 depth = ext_depth(orig_inode);
@@ -1252,13 +1367,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1252 last_extent = mext_next_extent(orig_inode, 1367 last_extent = mext_next_extent(orig_inode,
1253 holecheck_path, &ext_cur); 1368 holecheck_path, &ext_cur);
1254 if (last_extent < 0) { 1369 if (last_extent < 0) {
1255 ret1 = last_extent; 1370 ret = last_extent;
1256 goto out; 1371 goto out;
1257 } 1372 }
1258 last_extent = mext_next_extent(orig_inode, orig_path, 1373 last_extent = mext_next_extent(orig_inode, orig_path,
1259 &ext_dummy); 1374 &ext_dummy);
1260 if (last_extent < 0) { 1375 if (last_extent < 0) {
1261 ret1 = last_extent; 1376 ret = last_extent;
1262 goto out; 1377 goto out;
1263 } 1378 }
1264 seq_start = le32_to_cpu(ext_cur->ee_block); 1379 seq_start = le32_to_cpu(ext_cur->ee_block);
@@ -1272,7 +1387,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1272 if (le32_to_cpu(ext_cur->ee_block) > block_end) { 1387 if (le32_to_cpu(ext_cur->ee_block) > block_end) {
1273 ext4_debug("ext4 move extent: The specified range of file " 1388 ext4_debug("ext4 move extent: The specified range of file "
1274 "may be the hole\n"); 1389 "may be the hole\n");
1275 ret1 = -EINVAL; 1390 ret = -EINVAL;
1276 goto out; 1391 goto out;
1277 } 1392 }
1278 1393
@@ -1292,7 +1407,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1292 last_extent = mext_next_extent(orig_inode, holecheck_path, 1407 last_extent = mext_next_extent(orig_inode, holecheck_path,
1293 &ext_cur); 1408 &ext_cur);
1294 if (last_extent < 0) { 1409 if (last_extent < 0) {
1295 ret1 = last_extent; 1410 ret = last_extent;
1296 break; 1411 break;
1297 } 1412 }
1298 add_blocks = ext4_ext_get_actual_len(ext_cur); 1413 add_blocks = ext4_ext_get_actual_len(ext_cur);
@@ -1349,18 +1464,18 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1349 orig_page_offset, 1464 orig_page_offset,
1350 data_offset_in_page, 1465 data_offset_in_page,
1351 block_len_in_page, uninit, 1466 block_len_in_page, uninit,
1352 &ret1); 1467 &ret);
1353 1468
1354 /* Count how many blocks we have exchanged */ 1469 /* Count how many blocks we have exchanged */
1355 *moved_len += block_len_in_page; 1470 *moved_len += block_len_in_page;
1356 if (ret1 < 0) 1471 if (ret < 0)
1357 break; 1472 break;
1358 if (*moved_len > len) { 1473 if (*moved_len > len) {
1359 EXT4_ERROR_INODE(orig_inode, 1474 EXT4_ERROR_INODE(orig_inode,
1360 "We replaced blocks too much! " 1475 "We replaced blocks too much! "
1361 "sum of replaced: %llu requested: %llu", 1476 "sum of replaced: %llu requested: %llu",
1362 *moved_len, len); 1477 *moved_len, len);
1363 ret1 = -EIO; 1478 ret = -EIO;
1364 break; 1479 break;
1365 } 1480 }
1366 1481
@@ -1374,22 +1489,22 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1374 } 1489 }
1375 1490
1376 double_down_write_data_sem(orig_inode, donor_inode); 1491 double_down_write_data_sem(orig_inode, donor_inode);
1377 if (ret1 < 0) 1492 if (ret < 0)
1378 break; 1493 break;
1379 1494
1380 /* Decrease buffer counter */ 1495 /* Decrease buffer counter */
1381 if (holecheck_path) 1496 if (holecheck_path)
1382 ext4_ext_drop_refs(holecheck_path); 1497 ext4_ext_drop_refs(holecheck_path);
1383 ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); 1498 ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
1384 if (ret1) 1499 if (ret)
1385 break; 1500 break;
1386 depth = holecheck_path->p_depth; 1501 depth = holecheck_path->p_depth;
1387 1502
1388 /* Decrease buffer counter */ 1503 /* Decrease buffer counter */
1389 if (orig_path) 1504 if (orig_path)
1390 ext4_ext_drop_refs(orig_path); 1505 ext4_ext_drop_refs(orig_path);
1391 ret1 = get_ext_path(orig_inode, seq_start, &orig_path); 1506 ret = get_ext_path(orig_inode, seq_start, &orig_path);
1392 if (ret1) 1507 if (ret)
1393 break; 1508 break;
1394 1509
1395 ext_cur = holecheck_path[depth].p_ext; 1510 ext_cur = holecheck_path[depth].p_ext;
@@ -1412,12 +1527,9 @@ out:
1412 kfree(holecheck_path); 1527 kfree(holecheck_path);
1413 } 1528 }
1414 double_up_write_data_sem(orig_inode, donor_inode); 1529 double_up_write_data_sem(orig_inode, donor_inode);
1415 ret2 = mext_inode_double_unlock(orig_inode, donor_inode); 1530 ext4_inode_resume_unlocked_dio(orig_inode);
1416 1531 ext4_inode_resume_unlocked_dio(donor_inode);
1417 if (ret1) 1532 mext_inode_double_unlock(orig_inode, donor_inode);
1418 return ret1;
1419 else if (ret2)
1420 return ret2;
1421 1533
1422 return 0; 1534 return ret;
1423} 1535}