aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/move_extent.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/move_extent.c')
-rw-r--r--fs/ext4/move_extent.c313
1 files changed, 150 insertions, 163 deletions
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 25b6b1457360..d1fc662cc311 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/fs.h> 16#include <linux/fs.h>
17#include <linux/quotaops.h> 17#include <linux/quotaops.h>
18#include <linux/slab.h>
18#include "ext4_jbd2.h" 19#include "ext4_jbd2.h"
19#include "ext4_extents.h" 20#include "ext4_extents.h"
20#include "ext4.h" 21#include "ext4.h"
@@ -77,12 +78,14 @@ static int
77mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 78mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
78 struct ext4_extent **extent) 79 struct ext4_extent **extent)
79{ 80{
81 struct ext4_extent_header *eh;
80 int ppos, leaf_ppos = path->p_depth; 82 int ppos, leaf_ppos = path->p_depth;
81 83
82 ppos = leaf_ppos; 84 ppos = leaf_ppos;
83 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { 85 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
84 /* leaf block */ 86 /* leaf block */
85 *extent = ++path[ppos].p_ext; 87 *extent = ++path[ppos].p_ext;
88 path[ppos].p_block = ext_pblock(path[ppos].p_ext);
86 return 0; 89 return 0;
87 } 90 }
88 91
@@ -119,9 +122,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
119 ext_block_hdr(path[cur_ppos+1].p_bh); 122 ext_block_hdr(path[cur_ppos+1].p_bh);
120 } 123 }
121 124
125 path[leaf_ppos].p_ext = *extent = NULL;
126
127 eh = path[leaf_ppos].p_hdr;
128 if (le16_to_cpu(eh->eh_entries) == 0)
129 /* empty leaf is found */
130 return -ENODATA;
131
122 /* leaf block */ 132 /* leaf block */
123 path[leaf_ppos].p_ext = *extent = 133 path[leaf_ppos].p_ext = *extent =
124 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); 134 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
135 path[leaf_ppos].p_block =
136 ext_pblock(path[leaf_ppos].p_ext);
125 return 0; 137 return 0;
126 } 138 }
127 } 139 }
@@ -141,12 +153,12 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
141 int ret = 0; 153 int ret = 0;
142 154
143 if (inode1 == NULL) { 155 if (inode1 == NULL) {
144 ext4_error(inode2->i_sb, function, 156 __ext4_error(inode2->i_sb, function,
145 "Both inodes should not be NULL: " 157 "Both inodes should not be NULL: "
146 "inode1 NULL inode2 %lu", inode2->i_ino); 158 "inode1 NULL inode2 %lu", inode2->i_ino);
147 ret = -EIO; 159 ret = -EIO;
148 } else if (inode2 == NULL) { 160 } else if (inode2 == NULL) {
149 ext4_error(inode1->i_sb, function, 161 __ext4_error(inode1->i_sb, function,
150 "Both inodes should not be NULL: " 162 "Both inodes should not be NULL: "
151 "inode1 %lu inode2 NULL", inode1->i_ino); 163 "inode1 %lu inode2 NULL", inode1->i_ino);
152 ret = -EIO; 164 ret = -EIO;
@@ -155,40 +167,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
155} 167}
156 168
157/** 169/**
158 * mext_double_down_read - Acquire two inodes' read semaphore 170 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
159 * 171 *
160 * @orig_inode: original inode structure 172 * @orig_inode: original inode structure
161 * @donor_inode: donor inode structure 173 * @donor_inode: donor inode structure
162 * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. 174 * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
175 * i_ino order.
163 */ 176 */
164static void 177static void
165mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) 178double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
166{
167 struct inode *first = orig_inode, *second = donor_inode;
168
169 /*
170 * Use the inode number to provide the stable locking order instead
171 * of its address, because the C language doesn't guarantee you can
172 * compare pointers that don't come from the same array.
173 */
174 if (donor_inode->i_ino < orig_inode->i_ino) {
175 first = donor_inode;
176 second = orig_inode;
177 }
178
179 down_read(&EXT4_I(first)->i_data_sem);
180 down_read(&EXT4_I(second)->i_data_sem);
181}
182
183/**
184 * mext_double_down_write - Acquire two inodes' write semaphore
185 *
186 * @orig_inode: original inode structure
187 * @donor_inode: donor inode structure
188 * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
189 */
190static void
191mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
192{ 179{
193 struct inode *first = orig_inode, *second = donor_inode; 180 struct inode *first = orig_inode, *second = donor_inode;
194 181
@@ -203,32 +190,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
203 } 190 }
204 191
205 down_write(&EXT4_I(first)->i_data_sem); 192 down_write(&EXT4_I(first)->i_data_sem);
206 down_write(&EXT4_I(second)->i_data_sem); 193 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
207}
208
209/**
210 * mext_double_up_read - Release two inodes' read semaphore
211 *
212 * @orig_inode: original inode structure to be released its lock first
213 * @donor_inode: donor inode structure to be released its lock second
214 * Release read semaphore of two inodes (orig and donor).
215 */
216static void
217mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
218{
219 up_read(&EXT4_I(orig_inode)->i_data_sem);
220 up_read(&EXT4_I(donor_inode)->i_data_sem);
221} 194}
222 195
223/** 196/**
224 * mext_double_up_write - Release two inodes' write semaphore 197 * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
225 * 198 *
226 * @orig_inode: original inode structure to be released its lock first 199 * @orig_inode: original inode structure to be released its lock first
227 * @donor_inode: donor inode structure to be released its lock second 200 * @donor_inode: donor inode structure to be released its lock second
228 * Release write semaphore of two inodes (orig and donor). 201 * Release write lock of i_data_sem of two inodes (orig and donor).
229 */ 202 */
230static void 203static void
231mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) 204double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
232{ 205{
233 up_write(&EXT4_I(orig_inode)->i_data_sem); 206 up_write(&EXT4_I(orig_inode)->i_data_sem);
234 up_write(&EXT4_I(donor_inode)->i_data_sem); 207 up_write(&EXT4_I(donor_inode)->i_data_sem);
@@ -280,6 +253,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
280 } 253 }
281 254
282 o_start->ee_len = start_ext->ee_len; 255 o_start->ee_len = start_ext->ee_len;
256 eblock = le32_to_cpu(start_ext->ee_block);
283 new_flag = 1; 257 new_flag = 1;
284 258
285 } else if (start_ext->ee_len && new_ext->ee_len && 259 } else if (start_ext->ee_len && new_ext->ee_len &&
@@ -290,6 +264,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
290 * orig |------------------------------| 264 * orig |------------------------------|
291 */ 265 */
292 o_start->ee_len = start_ext->ee_len; 266 o_start->ee_len = start_ext->ee_len;
267 eblock = le32_to_cpu(start_ext->ee_block);
293 new_flag = 1; 268 new_flag = 1;
294 269
295 } else if (!start_ext->ee_len && new_ext->ee_len && 270 } else if (!start_ext->ee_len && new_ext->ee_len &&
@@ -503,7 +478,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
503 struct ext4_extent *oext, *o_start, *o_end, *prev_ext; 478 struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
504 struct ext4_extent new_ext, start_ext, end_ext; 479 struct ext4_extent new_ext, start_ext, end_ext;
505 ext4_lblk_t new_ext_end; 480 ext4_lblk_t new_ext_end;
506 ext4_fsblk_t new_phys_end;
507 int oext_alen, new_ext_alen, end_ext_alen; 481 int oext_alen, new_ext_alen, end_ext_alen;
508 int depth = ext_depth(orig_inode); 482 int depth = ext_depth(orig_inode);
509 int ret; 483 int ret;
@@ -517,7 +491,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
517 new_ext.ee_len = dext->ee_len; 491 new_ext.ee_len = dext->ee_len;
518 new_ext_alen = ext4_ext_get_actual_len(&new_ext); 492 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
519 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; 493 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
520 new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
521 494
522 /* 495 /*
523 * Case: original extent is first 496 * Case: original extent is first
@@ -530,6 +503,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
530 le32_to_cpu(oext->ee_block) + oext_alen) { 503 le32_to_cpu(oext->ee_block) + oext_alen) {
531 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - 504 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
532 le32_to_cpu(oext->ee_block)); 505 le32_to_cpu(oext->ee_block));
506 start_ext.ee_block = oext->ee_block;
533 copy_extent_status(oext, &start_ext); 507 copy_extent_status(oext, &start_ext);
534 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { 508 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
535 prev_ext = oext - 1; 509 prev_ext = oext - 1;
@@ -543,6 +517,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
543 start_ext.ee_len = cpu_to_le16( 517 start_ext.ee_len = cpu_to_le16(
544 ext4_ext_get_actual_len(prev_ext) + 518 ext4_ext_get_actual_len(prev_ext) +
545 new_ext_alen); 519 new_ext_alen);
520 start_ext.ee_block = oext->ee_block;
546 copy_extent_status(prev_ext, &start_ext); 521 copy_extent_status(prev_ext, &start_ext);
547 new_ext.ee_len = 0; 522 new_ext.ee_len = 0;
548 } 523 }
@@ -554,7 +529,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
554 * new_ext |-------| 529 * new_ext |-------|
555 */ 530 */
556 if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { 531 if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
557 ext4_error(orig_inode->i_sb, __func__, 532 ext4_error(orig_inode->i_sb,
558 "new_ext_end(%u) should be less than or equal to " 533 "new_ext_end(%u) should be less than or equal to "
559 "oext->ee_block(%u) + oext_alen(%d) - 1", 534 "oext->ee_block(%u) + oext_alen(%d) - 1",
560 new_ext_end, le32_to_cpu(oext->ee_block), 535 new_ext_end, le32_to_cpu(oext->ee_block),
@@ -596,7 +571,7 @@ out:
596 * @tmp_oext: the extent that will belong to the donor inode 571 * @tmp_oext: the extent that will belong to the donor inode
597 * @orig_off: block offset of original inode 572 * @orig_off: block offset of original inode
598 * @donor_off: block offset of donor inode 573 * @donor_off: block offset of donor inode
599 * @max_count: the maximun length of extents 574 * @max_count: the maximum length of extents
600 * 575 *
601 * Return 0 on success, or a negative error value on failure. 576 * Return 0 on success, or a negative error value on failure.
602 */ 577 */
@@ -661,6 +636,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
661 * @donor_inode: donor inode 636 * @donor_inode: donor inode
662 * @from: block offset of orig_inode 637 * @from: block offset of orig_inode
663 * @count: block count to be replaced 638 * @count: block count to be replaced
639 * @err: pointer to save return value
664 * 640 *
665 * Replace original inode extents and donor inode extents page by page. 641 * Replace original inode extents and donor inode extents page by page.
666 * We implement this replacement in the following three steps: 642 * We implement this replacement in the following three steps:
@@ -671,33 +647,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
671 * 3. Change the block information of donor inode to point at the saved 647 * 3. Change the block information of donor inode to point at the saved
672 * original inode blocks in the dummy extents. 648 * original inode blocks in the dummy extents.
673 * 649 *
674 * Return 0 on success, or a negative error value on failure. 650 * Return replaced block count.
675 */ 651 */
676static int 652static int
677mext_replace_branches(handle_t *handle, struct inode *orig_inode, 653mext_replace_branches(handle_t *handle, struct inode *orig_inode,
678 struct inode *donor_inode, ext4_lblk_t from, 654 struct inode *donor_inode, ext4_lblk_t from,
679 ext4_lblk_t count) 655 ext4_lblk_t count, int *err)
680{ 656{
681 struct ext4_ext_path *orig_path = NULL; 657 struct ext4_ext_path *orig_path = NULL;
682 struct ext4_ext_path *donor_path = NULL; 658 struct ext4_ext_path *donor_path = NULL;
683 struct ext4_extent *oext, *dext; 659 struct ext4_extent *oext, *dext;
684 struct ext4_extent tmp_dext, tmp_oext; 660 struct ext4_extent tmp_dext, tmp_oext;
685 ext4_lblk_t orig_off = from, donor_off = from; 661 ext4_lblk_t orig_off = from, donor_off = from;
686 int err = 0;
687 int depth; 662 int depth;
688 int replaced_count = 0; 663 int replaced_count = 0;
689 int dext_alen; 664 int dext_alen;
690 665
691 mext_double_down_write(orig_inode, donor_inode); 666 /* Protect extent trees against block allocations via delalloc */
667 double_down_write_data_sem(orig_inode, donor_inode);
692 668
693 /* Get the original extent for the block "orig_off" */ 669 /* Get the original extent for the block "orig_off" */
694 err = get_ext_path(orig_inode, orig_off, &orig_path); 670 *err = get_ext_path(orig_inode, orig_off, &orig_path);
695 if (err) 671 if (*err)
696 goto out; 672 goto out;
697 673
698 /* Get the donor extent for the head */ 674 /* Get the donor extent for the head */
699 err = get_ext_path(donor_inode, donor_off, &donor_path); 675 *err = get_ext_path(donor_inode, donor_off, &donor_path);
700 if (err) 676 if (*err)
701 goto out; 677 goto out;
702 depth = ext_depth(orig_inode); 678 depth = ext_depth(orig_inode);
703 oext = orig_path[depth].p_ext; 679 oext = orig_path[depth].p_ext;
@@ -707,39 +683,39 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
707 dext = donor_path[depth].p_ext; 683 dext = donor_path[depth].p_ext;
708 tmp_dext = *dext; 684 tmp_dext = *dext;
709 685
710 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 686 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
711 donor_off, count); 687 donor_off, count);
712 if (err) 688 if (*err)
713 goto out; 689 goto out;
714 690
715 /* Loop for the donor extents */ 691 /* Loop for the donor extents */
716 while (1) { 692 while (1) {
717 /* The extent for donor must be found. */ 693 /* The extent for donor must be found. */
718 if (!dext) { 694 if (!dext) {
719 ext4_error(donor_inode->i_sb, __func__, 695 ext4_error(donor_inode->i_sb,
720 "The extent for donor must be found"); 696 "The extent for donor must be found");
721 err = -EIO; 697 *err = -EIO;
722 goto out; 698 goto out;
723 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { 699 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
724 ext4_error(donor_inode->i_sb, __func__, 700 ext4_error(donor_inode->i_sb,
725 "Donor offset(%u) and the first block of donor " 701 "Donor offset(%u) and the first block of donor "
726 "extent(%u) should be equal", 702 "extent(%u) should be equal",
727 donor_off, 703 donor_off,
728 le32_to_cpu(tmp_dext.ee_block)); 704 le32_to_cpu(tmp_dext.ee_block));
729 err = -EIO; 705 *err = -EIO;
730 goto out; 706 goto out;
731 } 707 }
732 708
733 /* Set donor extent to orig extent */ 709 /* Set donor extent to orig extent */
734 err = mext_leaf_block(handle, orig_inode, 710 *err = mext_leaf_block(handle, orig_inode,
735 orig_path, &tmp_dext, &orig_off); 711 orig_path, &tmp_dext, &orig_off);
736 if (err < 0) 712 if (*err)
737 goto out; 713 goto out;
738 714
739 /* Set orig extent to donor extent */ 715 /* Set orig extent to donor extent */
740 err = mext_leaf_block(handle, donor_inode, 716 *err = mext_leaf_block(handle, donor_inode,
741 donor_path, &tmp_oext, &donor_off); 717 donor_path, &tmp_oext, &donor_off);
742 if (err < 0) 718 if (*err)
743 goto out; 719 goto out;
744 720
745 dext_alen = ext4_ext_get_actual_len(&tmp_dext); 721 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
@@ -753,35 +729,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
753 729
754 if (orig_path) 730 if (orig_path)
755 ext4_ext_drop_refs(orig_path); 731 ext4_ext_drop_refs(orig_path);
756 err = get_ext_path(orig_inode, orig_off, &orig_path); 732 *err = get_ext_path(orig_inode, orig_off, &orig_path);
757 if (err) 733 if (*err)
758 goto out; 734 goto out;
759 depth = ext_depth(orig_inode); 735 depth = ext_depth(orig_inode);
760 oext = orig_path[depth].p_ext; 736 oext = orig_path[depth].p_ext;
761 if (le32_to_cpu(oext->ee_block) +
762 ext4_ext_get_actual_len(oext) <= orig_off) {
763 err = 0;
764 goto out;
765 }
766 tmp_oext = *oext; 737 tmp_oext = *oext;
767 738
768 if (donor_path) 739 if (donor_path)
769 ext4_ext_drop_refs(donor_path); 740 ext4_ext_drop_refs(donor_path);
770 err = get_ext_path(donor_inode, donor_off, &donor_path); 741 *err = get_ext_path(donor_inode, donor_off, &donor_path);
771 if (err) 742 if (*err)
772 goto out; 743 goto out;
773 depth = ext_depth(donor_inode); 744 depth = ext_depth(donor_inode);
774 dext = donor_path[depth].p_ext; 745 dext = donor_path[depth].p_ext;
775 if (le32_to_cpu(dext->ee_block) +
776 ext4_ext_get_actual_len(dext) <= donor_off) {
777 err = 0;
778 goto out;
779 }
780 tmp_dext = *dext; 746 tmp_dext = *dext;
781 747
782 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 748 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
783 donor_off, count - replaced_count); 749 donor_off, count - replaced_count);
784 if (err) 750 if (*err)
785 goto out; 751 goto out;
786 } 752 }
787 753
@@ -795,8 +761,12 @@ out:
795 kfree(donor_path); 761 kfree(donor_path);
796 } 762 }
797 763
798 mext_double_up_write(orig_inode, donor_inode); 764 ext4_ext_invalidate_cache(orig_inode);
799 return err; 765 ext4_ext_invalidate_cache(donor_inode);
766
767 double_up_write_data_sem(orig_inode, donor_inode);
768
769 return replaced_count;
800} 770}
801 771
802/** 772/**
@@ -808,16 +778,17 @@ out:
808 * @data_offset_in_page: block index where data swapping starts 778 * @data_offset_in_page: block index where data swapping starts
809 * @block_len_in_page: the number of blocks to be swapped 779 * @block_len_in_page: the number of blocks to be swapped
810 * @uninit: orig extent is uninitialized or not 780 * @uninit: orig extent is uninitialized or not
781 * @err: pointer to save return value
811 * 782 *
812 * Save the data in original inode blocks and replace original inode extents 783 * Save the data in original inode blocks and replace original inode extents
813 * with donor inode extents by calling mext_replace_branches(). 784 * with donor inode extents by calling mext_replace_branches().
814 * Finally, write out the saved data in new original inode blocks. Return 0 785 * Finally, write out the saved data in new original inode blocks. Return
815 * on success, or a negative error value on failure. 786 * replaced block count.
816 */ 787 */
817static int 788static int
818move_extent_per_page(struct file *o_filp, struct inode *donor_inode, 789move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
819 pgoff_t orig_page_offset, int data_offset_in_page, 790 pgoff_t orig_page_offset, int data_offset_in_page,
820 int block_len_in_page, int uninit) 791 int block_len_in_page, int uninit, int *err)
821{ 792{
822 struct inode *orig_inode = o_filp->f_dentry->d_inode; 793 struct inode *orig_inode = o_filp->f_dentry->d_inode;
823 struct address_space *mapping = orig_inode->i_mapping; 794 struct address_space *mapping = orig_inode->i_mapping;
@@ -829,9 +800,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
829 long long offs = orig_page_offset << PAGE_CACHE_SHIFT; 800 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
830 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 801 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
831 unsigned int w_flags = 0; 802 unsigned int w_flags = 0;
832 unsigned int tmp_data_len, data_len; 803 unsigned int tmp_data_size, data_size, replaced_size;
833 void *fsdata; 804 void *fsdata;
834 int ret, i, jblocks; 805 int i, jblocks;
806 int err2 = 0;
807 int replaced_count = 0;
835 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 808 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
836 809
837 /* 810 /*
@@ -841,8 +814,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
841 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 814 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
842 handle = ext4_journal_start(orig_inode, jblocks); 815 handle = ext4_journal_start(orig_inode, jblocks);
843 if (IS_ERR(handle)) { 816 if (IS_ERR(handle)) {
844 ret = PTR_ERR(handle); 817 *err = PTR_ERR(handle);
845 return ret; 818 return 0;
846 } 819 }
847 820
848 if (segment_eq(get_fs(), KERNEL_DS)) 821 if (segment_eq(get_fs(), KERNEL_DS))
@@ -858,39 +831,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
858 * Just swap data blocks between orig and donor. 831 * Just swap data blocks between orig and donor.
859 */ 832 */
860 if (uninit) { 833 if (uninit) {
861 ret = mext_replace_branches(handle, orig_inode, 834 replaced_count = mext_replace_branches(handle, orig_inode,
862 donor_inode, orig_blk_offset, 835 donor_inode, orig_blk_offset,
863 block_len_in_page); 836 block_len_in_page, err);
864
865 /* Clear the inode cache not to refer to the old data */
866 ext4_ext_invalidate_cache(orig_inode);
867 ext4_ext_invalidate_cache(donor_inode);
868 goto out2; 837 goto out2;
869 } 838 }
870 839
871 offs = (long long)orig_blk_offset << orig_inode->i_blkbits; 840 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
872 841
873 /* Calculate data_len */ 842 /* Calculate data_size */
874 if ((orig_blk_offset + block_len_in_page - 1) == 843 if ((orig_blk_offset + block_len_in_page - 1) ==
875 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { 844 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
876 /* Replace the last block */ 845 /* Replace the last block */
877 tmp_data_len = orig_inode->i_size & (blocksize - 1); 846 tmp_data_size = orig_inode->i_size & (blocksize - 1);
878 /* 847 /*
879 * If data_len equal zero, it shows data_len is multiples of 848 * If data_size equal zero, it shows data_size is multiples of
880 * blocksize. So we set appropriate value. 849 * blocksize. So we set appropriate value.
881 */ 850 */
882 if (tmp_data_len == 0) 851 if (tmp_data_size == 0)
883 tmp_data_len = blocksize; 852 tmp_data_size = blocksize;
884 853
885 data_len = tmp_data_len + 854 data_size = tmp_data_size +
886 ((block_len_in_page - 1) << orig_inode->i_blkbits); 855 ((block_len_in_page - 1) << orig_inode->i_blkbits);
887 } else { 856 } else
888 data_len = block_len_in_page << orig_inode->i_blkbits; 857 data_size = block_len_in_page << orig_inode->i_blkbits;
889 } 858
859 replaced_size = data_size;
890 860
891 ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, 861 *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
892 &page, &fsdata); 862 &page, &fsdata);
893 if (unlikely(ret < 0)) 863 if (unlikely(*err < 0))
894 goto out; 864 goto out;
895 865
896 if (!PageUptodate(page)) { 866 if (!PageUptodate(page)) {
@@ -911,14 +881,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
911 /* Release old bh and drop refs */ 881 /* Release old bh and drop refs */
912 try_to_release_page(page, 0); 882 try_to_release_page(page, 0);
913 883
914 ret = mext_replace_branches(handle, orig_inode, donor_inode, 884 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
915 orig_blk_offset, block_len_in_page); 885 orig_blk_offset, block_len_in_page,
916 if (ret < 0) 886 &err2);
917 goto out; 887 if (err2) {
918 888 if (replaced_count) {
919 /* Clear the inode cache not to refer to the old data */ 889 block_len_in_page = replaced_count;
920 ext4_ext_invalidate_cache(orig_inode); 890 replaced_size =
921 ext4_ext_invalidate_cache(donor_inode); 891 block_len_in_page << orig_inode->i_blkbits;
892 } else
893 goto out;
894 }
922 895
923 if (!page_has_buffers(page)) 896 if (!page_has_buffers(page))
924 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); 897 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
@@ -928,16 +901,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
928 bh = bh->b_this_page; 901 bh = bh->b_this_page;
929 902
930 for (i = 0; i < block_len_in_page; i++) { 903 for (i = 0; i < block_len_in_page; i++) {
931 ret = ext4_get_block(orig_inode, 904 *err = ext4_get_block(orig_inode,
932 (sector_t)(orig_blk_offset + i), bh, 0); 905 (sector_t)(orig_blk_offset + i), bh, 0);
933 if (ret < 0) 906 if (*err < 0)
934 goto out; 907 goto out;
935 908
936 if (bh->b_this_page != NULL) 909 if (bh->b_this_page != NULL)
937 bh = bh->b_this_page; 910 bh = bh->b_this_page;
938 } 911 }
939 912
940 ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, 913 *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
941 page, fsdata); 914 page, fsdata);
942 page = NULL; 915 page = NULL;
943 916
@@ -951,18 +924,20 @@ out:
951out2: 924out2:
952 ext4_journal_stop(handle); 925 ext4_journal_stop(handle);
953 926
954 return ret < 0 ? ret : 0; 927 if (err2)
928 *err = err2;
929
930 return replaced_count;
955} 931}
956 932
957/** 933/**
958 * mext_check_argumants - Check whether move extent can be done 934 * mext_check_arguments - Check whether move extent can be done
959 * 935 *
960 * @orig_inode: original inode 936 * @orig_inode: original inode
961 * @donor_inode: donor inode 937 * @donor_inode: donor inode
962 * @orig_start: logical start offset in block for orig 938 * @orig_start: logical start offset in block for orig
963 * @donor_start: logical start offset in block for donor 939 * @donor_start: logical start offset in block for donor
964 * @len: the number of blocks to be moved 940 * @len: the number of blocks to be moved
965 * @moved_len: moved block length
966 * 941 *
967 * Check the arguments of ext4_move_extents() whether the files can be 942 * Check the arguments of ext4_move_extents() whether the files can be
968 * exchanged with each other. 943 * exchanged with each other.
@@ -970,18 +945,17 @@ out2:
970 */ 945 */
971static int 946static int
972mext_check_arguments(struct inode *orig_inode, 947mext_check_arguments(struct inode *orig_inode,
973 struct inode *donor_inode, __u64 orig_start, 948 struct inode *donor_inode, __u64 orig_start,
974 __u64 donor_start, __u64 *len, __u64 moved_len) 949 __u64 donor_start, __u64 *len)
975{ 950{
976 ext4_lblk_t orig_blocks, donor_blocks; 951 ext4_lblk_t orig_blocks, donor_blocks;
977 unsigned int blkbits = orig_inode->i_blkbits; 952 unsigned int blkbits = orig_inode->i_blkbits;
978 unsigned int blocksize = 1 << blkbits; 953 unsigned int blocksize = 1 << blkbits;
979 954
980 /* Regular file check */ 955 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
981 if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { 956 ext4_debug("ext4 move extent: suid or sgid is set"
982 ext4_debug("ext4 move extent: The argument files should be " 957 " to donor file [ino:orig %lu, donor %lu]\n",
983 "regular file [ino:orig %lu, donor %lu]\n", 958 orig_inode->i_ino, donor_inode->i_ino);
984 orig_inode->i_ino, donor_inode->i_ino);
985 return -EINVAL; 959 return -EINVAL;
986 } 960 }
987 961
@@ -1025,13 +999,6 @@ mext_check_arguments(struct inode *orig_inode,
1025 return -EINVAL; 999 return -EINVAL;
1026 } 1000 }
1027 1001
1028 if (moved_len) {
1029 ext4_debug("ext4 move extent: moved_len should be 0 "
1030 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1031 donor_inode->i_ino);
1032 return -EINVAL;
1033 }
1034
1035 if ((orig_start > EXT_MAX_BLOCK) || 1002 if ((orig_start > EXT_MAX_BLOCK) ||
1036 (donor_start > EXT_MAX_BLOCK) || 1003 (donor_start > EXT_MAX_BLOCK) ||
1037 (*len > EXT_MAX_BLOCK) || 1004 (*len > EXT_MAX_BLOCK) ||
@@ -1088,7 +1055,7 @@ mext_check_arguments(struct inode *orig_inode,
1088 } 1055 }
1089 1056
1090 if (!*len) { 1057 if (!*len) {
1091 ext4_debug("ext4 move extent: len shoudld not be 0 " 1058 ext4_debug("ext4 move extent: len should not be 0 "
1092 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, 1059 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1093 donor_inode->i_ino); 1060 donor_inode->i_ino);
1094 return -EINVAL; 1061 return -EINVAL;
@@ -1232,16 +1199,24 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1232 return -EINVAL; 1199 return -EINVAL;
1233 } 1200 }
1234 1201
1235 /* protect orig and donor against a truncate */ 1202 /* Regular file check */
1203 if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
1204 ext4_debug("ext4 move extent: The argument files should be "
1205 "regular file [ino:orig %lu, donor %lu]\n",
1206 orig_inode->i_ino, donor_inode->i_ino);
1207 return -EINVAL;
1208 }
1209
1210 /* Protect orig and donor inodes against a truncate */
1236 ret1 = mext_inode_double_lock(orig_inode, donor_inode); 1211 ret1 = mext_inode_double_lock(orig_inode, donor_inode);
1237 if (ret1 < 0) 1212 if (ret1 < 0)
1238 return ret1; 1213 return ret1;
1239 1214
1240 mext_double_down_read(orig_inode, donor_inode); 1215 /* Protect extent tree against block allocations via delalloc */
1216 double_down_write_data_sem(orig_inode, donor_inode);
1241 /* Check the filesystem environment whether move_extent can be done */ 1217 /* Check the filesystem environment whether move_extent can be done */
1242 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, 1218 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
1243 donor_start, &len, *moved_len); 1219 donor_start, &len);
1244 mext_double_up_read(orig_inode, donor_inode);
1245 if (ret1) 1220 if (ret1)
1246 goto out; 1221 goto out;
1247 1222
@@ -1355,36 +1330,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1355 seq_start = le32_to_cpu(ext_cur->ee_block); 1330 seq_start = le32_to_cpu(ext_cur->ee_block);
1356 rest_blocks = seq_blocks; 1331 rest_blocks = seq_blocks;
1357 1332
1358 /* Discard preallocations of two inodes */ 1333 /*
1359 down_write(&EXT4_I(orig_inode)->i_data_sem); 1334 * Up semaphore to avoid following problems:
1360 ext4_discard_preallocations(orig_inode); 1335 * a. transaction deadlock among ext4_journal_start,
1361 up_write(&EXT4_I(orig_inode)->i_data_sem); 1336 * ->write_begin via pagefault, and jbd2_journal_commit
1362 1337 * b. racing with ->readpage, ->write_begin, and ext4_get_block
1363 down_write(&EXT4_I(donor_inode)->i_data_sem); 1338 * in move_extent_per_page
1364 ext4_discard_preallocations(donor_inode); 1339 */
1365 up_write(&EXT4_I(donor_inode)->i_data_sem); 1340 double_up_write_data_sem(orig_inode, donor_inode);
1366 1341
1367 while (orig_page_offset <= seq_end_page) { 1342 while (orig_page_offset <= seq_end_page) {
1368 1343
1369 /* Swap original branches with new branches */ 1344 /* Swap original branches with new branches */
1370 ret1 = move_extent_per_page(o_filp, donor_inode, 1345 block_len_in_page = move_extent_per_page(
1346 o_filp, donor_inode,
1371 orig_page_offset, 1347 orig_page_offset,
1372 data_offset_in_page, 1348 data_offset_in_page,
1373 block_len_in_page, uninit); 1349 block_len_in_page, uninit,
1374 if (ret1 < 0) 1350 &ret1);
1375 goto out; 1351
1376 orig_page_offset++;
1377 /* Count how many blocks we have exchanged */ 1352 /* Count how many blocks we have exchanged */
1378 *moved_len += block_len_in_page; 1353 *moved_len += block_len_in_page;
1354 if (ret1 < 0)
1355 break;
1379 if (*moved_len > len) { 1356 if (*moved_len > len) {
1380 ext4_error(orig_inode->i_sb, __func__, 1357 ext4_error(orig_inode->i_sb,
1381 "We replaced blocks too much! " 1358 "We replaced blocks too much! "
1382 "sum of replaced: %llu requested: %llu", 1359 "sum of replaced: %llu requested: %llu",
1383 *moved_len, len); 1360 *moved_len, len);
1384 ret1 = -EIO; 1361 ret1 = -EIO;
1385 goto out; 1362 break;
1386 } 1363 }
1387 1364
1365 orig_page_offset++;
1388 data_offset_in_page = 0; 1366 data_offset_in_page = 0;
1389 rest_blocks -= block_len_in_page; 1367 rest_blocks -= block_len_in_page;
1390 if (rest_blocks > blocks_per_page) 1368 if (rest_blocks > blocks_per_page)
@@ -1393,6 +1371,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1393 block_len_in_page = rest_blocks; 1371 block_len_in_page = rest_blocks;
1394 } 1372 }
1395 1373
1374 double_down_write_data_sem(orig_inode, donor_inode);
1375 if (ret1 < 0)
1376 break;
1377
1396 /* Decrease buffer counter */ 1378 /* Decrease buffer counter */
1397 if (holecheck_path) 1379 if (holecheck_path)
1398 ext4_ext_drop_refs(holecheck_path); 1380 ext4_ext_drop_refs(holecheck_path);
@@ -1414,6 +1396,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1414 1396
1415 } 1397 }
1416out: 1398out:
1399 if (*moved_len) {
1400 ext4_discard_preallocations(orig_inode);
1401 ext4_discard_preallocations(donor_inode);
1402 }
1403
1417 if (orig_path) { 1404 if (orig_path) {
1418 ext4_ext_drop_refs(orig_path); 1405 ext4_ext_drop_refs(orig_path);
1419 kfree(orig_path); 1406 kfree(orig_path);
@@ -1422,7 +1409,7 @@ out:
1422 ext4_ext_drop_refs(holecheck_path); 1409 ext4_ext_drop_refs(holecheck_path);
1423 kfree(holecheck_path); 1410 kfree(holecheck_path);
1424 } 1411 }
1425 1412 double_up_write_data_sem(orig_inode, donor_inode);
1426 ret2 = mext_inode_double_unlock(orig_inode, donor_inode); 1413 ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
1427 1414
1428 if (ret1) 1415 if (ret1)