diff options
Diffstat (limited to 'fs/ext4/move_extent.c')
-rw-r--r-- | fs/ext4/move_extent.c | 313 |
1 files changed, 150 insertions, 163 deletions
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 25b6b1457360..d1fc662cc311 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include <linux/fs.h> | 16 | #include <linux/fs.h> |
17 | #include <linux/quotaops.h> | 17 | #include <linux/quotaops.h> |
18 | #include <linux/slab.h> | ||
18 | #include "ext4_jbd2.h" | 19 | #include "ext4_jbd2.h" |
19 | #include "ext4_extents.h" | 20 | #include "ext4_extents.h" |
20 | #include "ext4.h" | 21 | #include "ext4.h" |
@@ -77,12 +78,14 @@ static int | |||
77 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | 78 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, |
78 | struct ext4_extent **extent) | 79 | struct ext4_extent **extent) |
79 | { | 80 | { |
81 | struct ext4_extent_header *eh; | ||
80 | int ppos, leaf_ppos = path->p_depth; | 82 | int ppos, leaf_ppos = path->p_depth; |
81 | 83 | ||
82 | ppos = leaf_ppos; | 84 | ppos = leaf_ppos; |
83 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { | 85 | if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { |
84 | /* leaf block */ | 86 | /* leaf block */ |
85 | *extent = ++path[ppos].p_ext; | 87 | *extent = ++path[ppos].p_ext; |
88 | path[ppos].p_block = ext_pblock(path[ppos].p_ext); | ||
86 | return 0; | 89 | return 0; |
87 | } | 90 | } |
88 | 91 | ||
@@ -119,9 +122,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | |||
119 | ext_block_hdr(path[cur_ppos+1].p_bh); | 122 | ext_block_hdr(path[cur_ppos+1].p_bh); |
120 | } | 123 | } |
121 | 124 | ||
125 | path[leaf_ppos].p_ext = *extent = NULL; | ||
126 | |||
127 | eh = path[leaf_ppos].p_hdr; | ||
128 | if (le16_to_cpu(eh->eh_entries) == 0) | ||
129 | /* empty leaf is found */ | ||
130 | return -ENODATA; | ||
131 | |||
122 | /* leaf block */ | 132 | /* leaf block */ |
123 | path[leaf_ppos].p_ext = *extent = | 133 | path[leaf_ppos].p_ext = *extent = |
124 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); | 134 | EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); |
135 | path[leaf_ppos].p_block = | ||
136 | ext_pblock(path[leaf_ppos].p_ext); | ||
125 | return 0; | 137 | return 0; |
126 | } | 138 | } |
127 | } | 139 | } |
@@ -141,12 +153,12 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, | |||
141 | int ret = 0; | 153 | int ret = 0; |
142 | 154 | ||
143 | if (inode1 == NULL) { | 155 | if (inode1 == NULL) { |
144 | ext4_error(inode2->i_sb, function, | 156 | __ext4_error(inode2->i_sb, function, |
145 | "Both inodes should not be NULL: " | 157 | "Both inodes should not be NULL: " |
146 | "inode1 NULL inode2 %lu", inode2->i_ino); | 158 | "inode1 NULL inode2 %lu", inode2->i_ino); |
147 | ret = -EIO; | 159 | ret = -EIO; |
148 | } else if (inode2 == NULL) { | 160 | } else if (inode2 == NULL) { |
149 | ext4_error(inode1->i_sb, function, | 161 | __ext4_error(inode1->i_sb, function, |
150 | "Both inodes should not be NULL: " | 162 | "Both inodes should not be NULL: " |
151 | "inode1 %lu inode2 NULL", inode1->i_ino); | 163 | "inode1 %lu inode2 NULL", inode1->i_ino); |
152 | ret = -EIO; | 164 | ret = -EIO; |
@@ -155,40 +167,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, | |||
155 | } | 167 | } |
156 | 168 | ||
157 | /** | 169 | /** |
158 | * mext_double_down_read - Acquire two inodes' read semaphore | 170 | * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem |
159 | * | 171 | * |
160 | * @orig_inode: original inode structure | 172 | * @orig_inode: original inode structure |
161 | * @donor_inode: donor inode structure | 173 | * @donor_inode: donor inode structure |
162 | * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. | 174 | * Acquire write lock of i_data_sem of the two inodes (orig and donor) by |
175 | * i_ino order. | ||
163 | */ | 176 | */ |
164 | static void | 177 | static void |
165 | mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) | 178 | double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) |
166 | { | ||
167 | struct inode *first = orig_inode, *second = donor_inode; | ||
168 | |||
169 | /* | ||
170 | * Use the inode number to provide the stable locking order instead | ||
171 | * of its address, because the C language doesn't guarantee you can | ||
172 | * compare pointers that don't come from the same array. | ||
173 | */ | ||
174 | if (donor_inode->i_ino < orig_inode->i_ino) { | ||
175 | first = donor_inode; | ||
176 | second = orig_inode; | ||
177 | } | ||
178 | |||
179 | down_read(&EXT4_I(first)->i_data_sem); | ||
180 | down_read(&EXT4_I(second)->i_data_sem); | ||
181 | } | ||
182 | |||
183 | /** | ||
184 | * mext_double_down_write - Acquire two inodes' write semaphore | ||
185 | * | ||
186 | * @orig_inode: original inode structure | ||
187 | * @donor_inode: donor inode structure | ||
188 | * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. | ||
189 | */ | ||
190 | static void | ||
191 | mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | ||
192 | { | 179 | { |
193 | struct inode *first = orig_inode, *second = donor_inode; | 180 | struct inode *first = orig_inode, *second = donor_inode; |
194 | 181 | ||
@@ -203,32 +190,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) | |||
203 | } | 190 | } |
204 | 191 | ||
205 | down_write(&EXT4_I(first)->i_data_sem); | 192 | down_write(&EXT4_I(first)->i_data_sem); |
206 | down_write(&EXT4_I(second)->i_data_sem); | 193 | down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); |
207 | } | ||
208 | |||
209 | /** | ||
210 | * mext_double_up_read - Release two inodes' read semaphore | ||
211 | * | ||
212 | * @orig_inode: original inode structure to be released its lock first | ||
213 | * @donor_inode: donor inode structure to be released its lock second | ||
214 | * Release read semaphore of two inodes (orig and donor). | ||
215 | */ | ||
216 | static void | ||
217 | mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) | ||
218 | { | ||
219 | up_read(&EXT4_I(orig_inode)->i_data_sem); | ||
220 | up_read(&EXT4_I(donor_inode)->i_data_sem); | ||
221 | } | 194 | } |
222 | 195 | ||
223 | /** | 196 | /** |
224 | * mext_double_up_write - Release two inodes' write semaphore | 197 | * double_up_write_data_sem - Release two inodes' write lock of i_data_sem |
225 | * | 198 | * |
226 | * @orig_inode: original inode structure to be released its lock first | 199 | * @orig_inode: original inode structure to be released its lock first |
227 | * @donor_inode: donor inode structure to be released its lock second | 200 | * @donor_inode: donor inode structure to be released its lock second |
228 | * Release write semaphore of two inodes (orig and donor). | 201 | * Release write lock of i_data_sem of two inodes (orig and donor). |
229 | */ | 202 | */ |
230 | static void | 203 | static void |
231 | mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) | 204 | double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) |
232 | { | 205 | { |
233 | up_write(&EXT4_I(orig_inode)->i_data_sem); | 206 | up_write(&EXT4_I(orig_inode)->i_data_sem); |
234 | up_write(&EXT4_I(donor_inode)->i_data_sem); | 207 | up_write(&EXT4_I(donor_inode)->i_data_sem); |
@@ -280,6 +253,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
280 | } | 253 | } |
281 | 254 | ||
282 | o_start->ee_len = start_ext->ee_len; | 255 | o_start->ee_len = start_ext->ee_len; |
256 | eblock = le32_to_cpu(start_ext->ee_block); | ||
283 | new_flag = 1; | 257 | new_flag = 1; |
284 | 258 | ||
285 | } else if (start_ext->ee_len && new_ext->ee_len && | 259 | } else if (start_ext->ee_len && new_ext->ee_len && |
@@ -290,6 +264,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, | |||
290 | * orig |------------------------------| | 264 | * orig |------------------------------| |
291 | */ | 265 | */ |
292 | o_start->ee_len = start_ext->ee_len; | 266 | o_start->ee_len = start_ext->ee_len; |
267 | eblock = le32_to_cpu(start_ext->ee_block); | ||
293 | new_flag = 1; | 268 | new_flag = 1; |
294 | 269 | ||
295 | } else if (!start_ext->ee_len && new_ext->ee_len && | 270 | } else if (!start_ext->ee_len && new_ext->ee_len && |
@@ -503,7 +478,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
503 | struct ext4_extent *oext, *o_start, *o_end, *prev_ext; | 478 | struct ext4_extent *oext, *o_start, *o_end, *prev_ext; |
504 | struct ext4_extent new_ext, start_ext, end_ext; | 479 | struct ext4_extent new_ext, start_ext, end_ext; |
505 | ext4_lblk_t new_ext_end; | 480 | ext4_lblk_t new_ext_end; |
506 | ext4_fsblk_t new_phys_end; | ||
507 | int oext_alen, new_ext_alen, end_ext_alen; | 481 | int oext_alen, new_ext_alen, end_ext_alen; |
508 | int depth = ext_depth(orig_inode); | 482 | int depth = ext_depth(orig_inode); |
509 | int ret; | 483 | int ret; |
@@ -517,7 +491,6 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
517 | new_ext.ee_len = dext->ee_len; | 491 | new_ext.ee_len = dext->ee_len; |
518 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); | 492 | new_ext_alen = ext4_ext_get_actual_len(&new_ext); |
519 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; | 493 | new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; |
520 | new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1; | ||
521 | 494 | ||
522 | /* | 495 | /* |
523 | * Case: original extent is first | 496 | * Case: original extent is first |
@@ -530,6 +503,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
530 | le32_to_cpu(oext->ee_block) + oext_alen) { | 503 | le32_to_cpu(oext->ee_block) + oext_alen) { |
531 | start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - | 504 | start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - |
532 | le32_to_cpu(oext->ee_block)); | 505 | le32_to_cpu(oext->ee_block)); |
506 | start_ext.ee_block = oext->ee_block; | ||
533 | copy_extent_status(oext, &start_ext); | 507 | copy_extent_status(oext, &start_ext); |
534 | } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { | 508 | } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { |
535 | prev_ext = oext - 1; | 509 | prev_ext = oext - 1; |
@@ -543,6 +517,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
543 | start_ext.ee_len = cpu_to_le16( | 517 | start_ext.ee_len = cpu_to_le16( |
544 | ext4_ext_get_actual_len(prev_ext) + | 518 | ext4_ext_get_actual_len(prev_ext) + |
545 | new_ext_alen); | 519 | new_ext_alen); |
520 | start_ext.ee_block = oext->ee_block; | ||
546 | copy_extent_status(prev_ext, &start_ext); | 521 | copy_extent_status(prev_ext, &start_ext); |
547 | new_ext.ee_len = 0; | 522 | new_ext.ee_len = 0; |
548 | } | 523 | } |
@@ -554,7 +529,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, | |||
554 | * new_ext |-------| | 529 | * new_ext |-------| |
555 | */ | 530 | */ |
556 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { | 531 | if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { |
557 | ext4_error(orig_inode->i_sb, __func__, | 532 | ext4_error(orig_inode->i_sb, |
558 | "new_ext_end(%u) should be less than or equal to " | 533 | "new_ext_end(%u) should be less than or equal to " |
559 | "oext->ee_block(%u) + oext_alen(%d) - 1", | 534 | "oext->ee_block(%u) + oext_alen(%d) - 1", |
560 | new_ext_end, le32_to_cpu(oext->ee_block), | 535 | new_ext_end, le32_to_cpu(oext->ee_block), |
@@ -596,7 +571,7 @@ out: | |||
596 | * @tmp_oext: the extent that will belong to the donor inode | 571 | * @tmp_oext: the extent that will belong to the donor inode |
597 | * @orig_off: block offset of original inode | 572 | * @orig_off: block offset of original inode |
598 | * @donor_off: block offset of donor inode | 573 | * @donor_off: block offset of donor inode |
599 | * @max_count: the maximun length of extents | 574 | * @max_count: the maximum length of extents |
600 | * | 575 | * |
601 | * Return 0 on success, or a negative error value on failure. | 576 | * Return 0 on success, or a negative error value on failure. |
602 | */ | 577 | */ |
@@ -661,6 +636,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
661 | * @donor_inode: donor inode | 636 | * @donor_inode: donor inode |
662 | * @from: block offset of orig_inode | 637 | * @from: block offset of orig_inode |
663 | * @count: block count to be replaced | 638 | * @count: block count to be replaced |
639 | * @err: pointer to save return value | ||
664 | * | 640 | * |
665 | * Replace original inode extents and donor inode extents page by page. | 641 | * Replace original inode extents and donor inode extents page by page. |
666 | * We implement this replacement in the following three steps: | 642 | * We implement this replacement in the following three steps: |
@@ -671,33 +647,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, | |||
671 | * 3. Change the block information of donor inode to point at the saved | 647 | * 3. Change the block information of donor inode to point at the saved |
672 | * original inode blocks in the dummy extents. | 648 | * original inode blocks in the dummy extents. |
673 | * | 649 | * |
674 | * Return 0 on success, or a negative error value on failure. | 650 | * Return replaced block count. |
675 | */ | 651 | */ |
676 | static int | 652 | static int |
677 | mext_replace_branches(handle_t *handle, struct inode *orig_inode, | 653 | mext_replace_branches(handle_t *handle, struct inode *orig_inode, |
678 | struct inode *donor_inode, ext4_lblk_t from, | 654 | struct inode *donor_inode, ext4_lblk_t from, |
679 | ext4_lblk_t count) | 655 | ext4_lblk_t count, int *err) |
680 | { | 656 | { |
681 | struct ext4_ext_path *orig_path = NULL; | 657 | struct ext4_ext_path *orig_path = NULL; |
682 | struct ext4_ext_path *donor_path = NULL; | 658 | struct ext4_ext_path *donor_path = NULL; |
683 | struct ext4_extent *oext, *dext; | 659 | struct ext4_extent *oext, *dext; |
684 | struct ext4_extent tmp_dext, tmp_oext; | 660 | struct ext4_extent tmp_dext, tmp_oext; |
685 | ext4_lblk_t orig_off = from, donor_off = from; | 661 | ext4_lblk_t orig_off = from, donor_off = from; |
686 | int err = 0; | ||
687 | int depth; | 662 | int depth; |
688 | int replaced_count = 0; | 663 | int replaced_count = 0; |
689 | int dext_alen; | 664 | int dext_alen; |
690 | 665 | ||
691 | mext_double_down_write(orig_inode, donor_inode); | 666 | /* Protect extent trees against block allocations via delalloc */ |
667 | double_down_write_data_sem(orig_inode, donor_inode); | ||
692 | 668 | ||
693 | /* Get the original extent for the block "orig_off" */ | 669 | /* Get the original extent for the block "orig_off" */ |
694 | err = get_ext_path(orig_inode, orig_off, &orig_path); | 670 | *err = get_ext_path(orig_inode, orig_off, &orig_path); |
695 | if (err) | 671 | if (*err) |
696 | goto out; | 672 | goto out; |
697 | 673 | ||
698 | /* Get the donor extent for the head */ | 674 | /* Get the donor extent for the head */ |
699 | err = get_ext_path(donor_inode, donor_off, &donor_path); | 675 | *err = get_ext_path(donor_inode, donor_off, &donor_path); |
700 | if (err) | 676 | if (*err) |
701 | goto out; | 677 | goto out; |
702 | depth = ext_depth(orig_inode); | 678 | depth = ext_depth(orig_inode); |
703 | oext = orig_path[depth].p_ext; | 679 | oext = orig_path[depth].p_ext; |
@@ -707,39 +683,39 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
707 | dext = donor_path[depth].p_ext; | 683 | dext = donor_path[depth].p_ext; |
708 | tmp_dext = *dext; | 684 | tmp_dext = *dext; |
709 | 685 | ||
710 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 686 | *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
711 | donor_off, count); | 687 | donor_off, count); |
712 | if (err) | 688 | if (*err) |
713 | goto out; | 689 | goto out; |
714 | 690 | ||
715 | /* Loop for the donor extents */ | 691 | /* Loop for the donor extents */ |
716 | while (1) { | 692 | while (1) { |
717 | /* The extent for donor must be found. */ | 693 | /* The extent for donor must be found. */ |
718 | if (!dext) { | 694 | if (!dext) { |
719 | ext4_error(donor_inode->i_sb, __func__, | 695 | ext4_error(donor_inode->i_sb, |
720 | "The extent for donor must be found"); | 696 | "The extent for donor must be found"); |
721 | err = -EIO; | 697 | *err = -EIO; |
722 | goto out; | 698 | goto out; |
723 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { | 699 | } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { |
724 | ext4_error(donor_inode->i_sb, __func__, | 700 | ext4_error(donor_inode->i_sb, |
725 | "Donor offset(%u) and the first block of donor " | 701 | "Donor offset(%u) and the first block of donor " |
726 | "extent(%u) should be equal", | 702 | "extent(%u) should be equal", |
727 | donor_off, | 703 | donor_off, |
728 | le32_to_cpu(tmp_dext.ee_block)); | 704 | le32_to_cpu(tmp_dext.ee_block)); |
729 | err = -EIO; | 705 | *err = -EIO; |
730 | goto out; | 706 | goto out; |
731 | } | 707 | } |
732 | 708 | ||
733 | /* Set donor extent to orig extent */ | 709 | /* Set donor extent to orig extent */ |
734 | err = mext_leaf_block(handle, orig_inode, | 710 | *err = mext_leaf_block(handle, orig_inode, |
735 | orig_path, &tmp_dext, &orig_off); | 711 | orig_path, &tmp_dext, &orig_off); |
736 | if (err < 0) | 712 | if (*err) |
737 | goto out; | 713 | goto out; |
738 | 714 | ||
739 | /* Set orig extent to donor extent */ | 715 | /* Set orig extent to donor extent */ |
740 | err = mext_leaf_block(handle, donor_inode, | 716 | *err = mext_leaf_block(handle, donor_inode, |
741 | donor_path, &tmp_oext, &donor_off); | 717 | donor_path, &tmp_oext, &donor_off); |
742 | if (err < 0) | 718 | if (*err) |
743 | goto out; | 719 | goto out; |
744 | 720 | ||
745 | dext_alen = ext4_ext_get_actual_len(&tmp_dext); | 721 | dext_alen = ext4_ext_get_actual_len(&tmp_dext); |
@@ -753,35 +729,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, | |||
753 | 729 | ||
754 | if (orig_path) | 730 | if (orig_path) |
755 | ext4_ext_drop_refs(orig_path); | 731 | ext4_ext_drop_refs(orig_path); |
756 | err = get_ext_path(orig_inode, orig_off, &orig_path); | 732 | *err = get_ext_path(orig_inode, orig_off, &orig_path); |
757 | if (err) | 733 | if (*err) |
758 | goto out; | 734 | goto out; |
759 | depth = ext_depth(orig_inode); | 735 | depth = ext_depth(orig_inode); |
760 | oext = orig_path[depth].p_ext; | 736 | oext = orig_path[depth].p_ext; |
761 | if (le32_to_cpu(oext->ee_block) + | ||
762 | ext4_ext_get_actual_len(oext) <= orig_off) { | ||
763 | err = 0; | ||
764 | goto out; | ||
765 | } | ||
766 | tmp_oext = *oext; | 737 | tmp_oext = *oext; |
767 | 738 | ||
768 | if (donor_path) | 739 | if (donor_path) |
769 | ext4_ext_drop_refs(donor_path); | 740 | ext4_ext_drop_refs(donor_path); |
770 | err = get_ext_path(donor_inode, donor_off, &donor_path); | 741 | *err = get_ext_path(donor_inode, donor_off, &donor_path); |
771 | if (err) | 742 | if (*err) |
772 | goto out; | 743 | goto out; |
773 | depth = ext_depth(donor_inode); | 744 | depth = ext_depth(donor_inode); |
774 | dext = donor_path[depth].p_ext; | 745 | dext = donor_path[depth].p_ext; |
775 | if (le32_to_cpu(dext->ee_block) + | ||
776 | ext4_ext_get_actual_len(dext) <= donor_off) { | ||
777 | err = 0; | ||
778 | goto out; | ||
779 | } | ||
780 | tmp_dext = *dext; | 746 | tmp_dext = *dext; |
781 | 747 | ||
782 | err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, | 748 | *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, |
783 | donor_off, count - replaced_count); | 749 | donor_off, count - replaced_count); |
784 | if (err) | 750 | if (*err) |
785 | goto out; | 751 | goto out; |
786 | } | 752 | } |
787 | 753 | ||
@@ -795,8 +761,12 @@ out: | |||
795 | kfree(donor_path); | 761 | kfree(donor_path); |
796 | } | 762 | } |
797 | 763 | ||
798 | mext_double_up_write(orig_inode, donor_inode); | 764 | ext4_ext_invalidate_cache(orig_inode); |
799 | return err; | 765 | ext4_ext_invalidate_cache(donor_inode); |
766 | |||
767 | double_up_write_data_sem(orig_inode, donor_inode); | ||
768 | |||
769 | return replaced_count; | ||
800 | } | 770 | } |
801 | 771 | ||
802 | /** | 772 | /** |
@@ -808,16 +778,17 @@ out: | |||
808 | * @data_offset_in_page: block index where data swapping starts | 778 | * @data_offset_in_page: block index where data swapping starts |
809 | * @block_len_in_page: the number of blocks to be swapped | 779 | * @block_len_in_page: the number of blocks to be swapped |
810 | * @uninit: orig extent is uninitialized or not | 780 | * @uninit: orig extent is uninitialized or not |
781 | * @err: pointer to save return value | ||
811 | * | 782 | * |
812 | * Save the data in original inode blocks and replace original inode extents | 783 | * Save the data in original inode blocks and replace original inode extents |
813 | * with donor inode extents by calling mext_replace_branches(). | 784 | * with donor inode extents by calling mext_replace_branches(). |
814 | * Finally, write out the saved data in new original inode blocks. Return 0 | 785 | * Finally, write out the saved data in new original inode blocks. Return |
815 | * on success, or a negative error value on failure. | 786 | * replaced block count. |
816 | */ | 787 | */ |
817 | static int | 788 | static int |
818 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | 789 | move_extent_per_page(struct file *o_filp, struct inode *donor_inode, |
819 | pgoff_t orig_page_offset, int data_offset_in_page, | 790 | pgoff_t orig_page_offset, int data_offset_in_page, |
820 | int block_len_in_page, int uninit) | 791 | int block_len_in_page, int uninit, int *err) |
821 | { | 792 | { |
822 | struct inode *orig_inode = o_filp->f_dentry->d_inode; | 793 | struct inode *orig_inode = o_filp->f_dentry->d_inode; |
823 | struct address_space *mapping = orig_inode->i_mapping; | 794 | struct address_space *mapping = orig_inode->i_mapping; |
@@ -829,9 +800,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
829 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | 800 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; |
830 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 801 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
831 | unsigned int w_flags = 0; | 802 | unsigned int w_flags = 0; |
832 | unsigned int tmp_data_len, data_len; | 803 | unsigned int tmp_data_size, data_size, replaced_size; |
833 | void *fsdata; | 804 | void *fsdata; |
834 | int ret, i, jblocks; | 805 | int i, jblocks; |
806 | int err2 = 0; | ||
807 | int replaced_count = 0; | ||
835 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 808 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
836 | 809 | ||
837 | /* | 810 | /* |
@@ -841,8 +814,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
841 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; | 814 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; |
842 | handle = ext4_journal_start(orig_inode, jblocks); | 815 | handle = ext4_journal_start(orig_inode, jblocks); |
843 | if (IS_ERR(handle)) { | 816 | if (IS_ERR(handle)) { |
844 | ret = PTR_ERR(handle); | 817 | *err = PTR_ERR(handle); |
845 | return ret; | 818 | return 0; |
846 | } | 819 | } |
847 | 820 | ||
848 | if (segment_eq(get_fs(), KERNEL_DS)) | 821 | if (segment_eq(get_fs(), KERNEL_DS)) |
@@ -858,39 +831,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
858 | * Just swap data blocks between orig and donor. | 831 | * Just swap data blocks between orig and donor. |
859 | */ | 832 | */ |
860 | if (uninit) { | 833 | if (uninit) { |
861 | ret = mext_replace_branches(handle, orig_inode, | 834 | replaced_count = mext_replace_branches(handle, orig_inode, |
862 | donor_inode, orig_blk_offset, | 835 | donor_inode, orig_blk_offset, |
863 | block_len_in_page); | 836 | block_len_in_page, err); |
864 | |||
865 | /* Clear the inode cache not to refer to the old data */ | ||
866 | ext4_ext_invalidate_cache(orig_inode); | ||
867 | ext4_ext_invalidate_cache(donor_inode); | ||
868 | goto out2; | 837 | goto out2; |
869 | } | 838 | } |
870 | 839 | ||
871 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | 840 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; |
872 | 841 | ||
873 | /* Calculate data_len */ | 842 | /* Calculate data_size */ |
874 | if ((orig_blk_offset + block_len_in_page - 1) == | 843 | if ((orig_blk_offset + block_len_in_page - 1) == |
875 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { | 844 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { |
876 | /* Replace the last block */ | 845 | /* Replace the last block */ |
877 | tmp_data_len = orig_inode->i_size & (blocksize - 1); | 846 | tmp_data_size = orig_inode->i_size & (blocksize - 1); |
878 | /* | 847 | /* |
879 | * If data_len equal zero, it shows data_len is multiples of | 848 | * If data_size equal zero, it shows data_size is multiples of |
880 | * blocksize. So we set appropriate value. | 849 | * blocksize. So we set appropriate value. |
881 | */ | 850 | */ |
882 | if (tmp_data_len == 0) | 851 | if (tmp_data_size == 0) |
883 | tmp_data_len = blocksize; | 852 | tmp_data_size = blocksize; |
884 | 853 | ||
885 | data_len = tmp_data_len + | 854 | data_size = tmp_data_size + |
886 | ((block_len_in_page - 1) << orig_inode->i_blkbits); | 855 | ((block_len_in_page - 1) << orig_inode->i_blkbits); |
887 | } else { | 856 | } else |
888 | data_len = block_len_in_page << orig_inode->i_blkbits; | 857 | data_size = block_len_in_page << orig_inode->i_blkbits; |
889 | } | 858 | |
859 | replaced_size = data_size; | ||
890 | 860 | ||
891 | ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, | 861 | *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, |
892 | &page, &fsdata); | 862 | &page, &fsdata); |
893 | if (unlikely(ret < 0)) | 863 | if (unlikely(*err < 0)) |
894 | goto out; | 864 | goto out; |
895 | 865 | ||
896 | if (!PageUptodate(page)) { | 866 | if (!PageUptodate(page)) { |
@@ -911,14 +881,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
911 | /* Release old bh and drop refs */ | 881 | /* Release old bh and drop refs */ |
912 | try_to_release_page(page, 0); | 882 | try_to_release_page(page, 0); |
913 | 883 | ||
914 | ret = mext_replace_branches(handle, orig_inode, donor_inode, | 884 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, |
915 | orig_blk_offset, block_len_in_page); | 885 | orig_blk_offset, block_len_in_page, |
916 | if (ret < 0) | 886 | &err2); |
917 | goto out; | 887 | if (err2) { |
918 | 888 | if (replaced_count) { | |
919 | /* Clear the inode cache not to refer to the old data */ | 889 | block_len_in_page = replaced_count; |
920 | ext4_ext_invalidate_cache(orig_inode); | 890 | replaced_size = |
921 | ext4_ext_invalidate_cache(donor_inode); | 891 | block_len_in_page << orig_inode->i_blkbits; |
892 | } else | ||
893 | goto out; | ||
894 | } | ||
922 | 895 | ||
923 | if (!page_has_buffers(page)) | 896 | if (!page_has_buffers(page)) |
924 | create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); | 897 | create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); |
@@ -928,16 +901,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
928 | bh = bh->b_this_page; | 901 | bh = bh->b_this_page; |
929 | 902 | ||
930 | for (i = 0; i < block_len_in_page; i++) { | 903 | for (i = 0; i < block_len_in_page; i++) { |
931 | ret = ext4_get_block(orig_inode, | 904 | *err = ext4_get_block(orig_inode, |
932 | (sector_t)(orig_blk_offset + i), bh, 0); | 905 | (sector_t)(orig_blk_offset + i), bh, 0); |
933 | if (ret < 0) | 906 | if (*err < 0) |
934 | goto out; | 907 | goto out; |
935 | 908 | ||
936 | if (bh->b_this_page != NULL) | 909 | if (bh->b_this_page != NULL) |
937 | bh = bh->b_this_page; | 910 | bh = bh->b_this_page; |
938 | } | 911 | } |
939 | 912 | ||
940 | ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, | 913 | *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, |
941 | page, fsdata); | 914 | page, fsdata); |
942 | page = NULL; | 915 | page = NULL; |
943 | 916 | ||
@@ -951,18 +924,20 @@ out: | |||
951 | out2: | 924 | out2: |
952 | ext4_journal_stop(handle); | 925 | ext4_journal_stop(handle); |
953 | 926 | ||
954 | return ret < 0 ? ret : 0; | 927 | if (err2) |
928 | *err = err2; | ||
929 | |||
930 | return replaced_count; | ||
955 | } | 931 | } |
956 | 932 | ||
957 | /** | 933 | /** |
958 | * mext_check_argumants - Check whether move extent can be done | 934 | * mext_check_arguments - Check whether move extent can be done |
959 | * | 935 | * |
960 | * @orig_inode: original inode | 936 | * @orig_inode: original inode |
961 | * @donor_inode: donor inode | 937 | * @donor_inode: donor inode |
962 | * @orig_start: logical start offset in block for orig | 938 | * @orig_start: logical start offset in block for orig |
963 | * @donor_start: logical start offset in block for donor | 939 | * @donor_start: logical start offset in block for donor |
964 | * @len: the number of blocks to be moved | 940 | * @len: the number of blocks to be moved |
965 | * @moved_len: moved block length | ||
966 | * | 941 | * |
967 | * Check the arguments of ext4_move_extents() whether the files can be | 942 | * Check the arguments of ext4_move_extents() whether the files can be |
968 | * exchanged with each other. | 943 | * exchanged with each other. |
@@ -970,18 +945,17 @@ out2: | |||
970 | */ | 945 | */ |
971 | static int | 946 | static int |
972 | mext_check_arguments(struct inode *orig_inode, | 947 | mext_check_arguments(struct inode *orig_inode, |
973 | struct inode *donor_inode, __u64 orig_start, | 948 | struct inode *donor_inode, __u64 orig_start, |
974 | __u64 donor_start, __u64 *len, __u64 moved_len) | 949 | __u64 donor_start, __u64 *len) |
975 | { | 950 | { |
976 | ext4_lblk_t orig_blocks, donor_blocks; | 951 | ext4_lblk_t orig_blocks, donor_blocks; |
977 | unsigned int blkbits = orig_inode->i_blkbits; | 952 | unsigned int blkbits = orig_inode->i_blkbits; |
978 | unsigned int blocksize = 1 << blkbits; | 953 | unsigned int blocksize = 1 << blkbits; |
979 | 954 | ||
980 | /* Regular file check */ | 955 | if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { |
981 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { | 956 | ext4_debug("ext4 move extent: suid or sgid is set" |
982 | ext4_debug("ext4 move extent: The argument files should be " | 957 | " to donor file [ino:orig %lu, donor %lu]\n", |
983 | "regular file [ino:orig %lu, donor %lu]\n", | 958 | orig_inode->i_ino, donor_inode->i_ino); |
984 | orig_inode->i_ino, donor_inode->i_ino); | ||
985 | return -EINVAL; | 959 | return -EINVAL; |
986 | } | 960 | } |
987 | 961 | ||
@@ -1025,13 +999,6 @@ mext_check_arguments(struct inode *orig_inode, | |||
1025 | return -EINVAL; | 999 | return -EINVAL; |
1026 | } | 1000 | } |
1027 | 1001 | ||
1028 | if (moved_len) { | ||
1029 | ext4_debug("ext4 move extent: moved_len should be 0 " | ||
1030 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, | ||
1031 | donor_inode->i_ino); | ||
1032 | return -EINVAL; | ||
1033 | } | ||
1034 | |||
1035 | if ((orig_start > EXT_MAX_BLOCK) || | 1002 | if ((orig_start > EXT_MAX_BLOCK) || |
1036 | (donor_start > EXT_MAX_BLOCK) || | 1003 | (donor_start > EXT_MAX_BLOCK) || |
1037 | (*len > EXT_MAX_BLOCK) || | 1004 | (*len > EXT_MAX_BLOCK) || |
@@ -1088,7 +1055,7 @@ mext_check_arguments(struct inode *orig_inode, | |||
1088 | } | 1055 | } |
1089 | 1056 | ||
1090 | if (!*len) { | 1057 | if (!*len) { |
1091 | ext4_debug("ext4 move extent: len shoudld not be 0 " | 1058 | ext4_debug("ext4 move extent: len should not be 0 " |
1092 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, | 1059 | "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, |
1093 | donor_inode->i_ino); | 1060 | donor_inode->i_ino); |
1094 | return -EINVAL; | 1061 | return -EINVAL; |
@@ -1232,16 +1199,24 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1232 | return -EINVAL; | 1199 | return -EINVAL; |
1233 | } | 1200 | } |
1234 | 1201 | ||
1235 | /* protect orig and donor against a truncate */ | 1202 | /* Regular file check */ |
1203 | if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { | ||
1204 | ext4_debug("ext4 move extent: The argument files should be " | ||
1205 | "regular file [ino:orig %lu, donor %lu]\n", | ||
1206 | orig_inode->i_ino, donor_inode->i_ino); | ||
1207 | return -EINVAL; | ||
1208 | } | ||
1209 | |||
1210 | /* Protect orig and donor inodes against a truncate */ | ||
1236 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); | 1211 | ret1 = mext_inode_double_lock(orig_inode, donor_inode); |
1237 | if (ret1 < 0) | 1212 | if (ret1 < 0) |
1238 | return ret1; | 1213 | return ret1; |
1239 | 1214 | ||
1240 | mext_double_down_read(orig_inode, donor_inode); | 1215 | /* Protect extent tree against block allocations via delalloc */ |
1216 | double_down_write_data_sem(orig_inode, donor_inode); | ||
1241 | /* Check the filesystem environment whether move_extent can be done */ | 1217 | /* Check the filesystem environment whether move_extent can be done */ |
1242 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, | 1218 | ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, |
1243 | donor_start, &len, *moved_len); | 1219 | donor_start, &len); |
1244 | mext_double_up_read(orig_inode, donor_inode); | ||
1245 | if (ret1) | 1220 | if (ret1) |
1246 | goto out; | 1221 | goto out; |
1247 | 1222 | ||
@@ -1355,36 +1330,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1355 | seq_start = le32_to_cpu(ext_cur->ee_block); | 1330 | seq_start = le32_to_cpu(ext_cur->ee_block); |
1356 | rest_blocks = seq_blocks; | 1331 | rest_blocks = seq_blocks; |
1357 | 1332 | ||
1358 | /* Discard preallocations of two inodes */ | 1333 | /* |
1359 | down_write(&EXT4_I(orig_inode)->i_data_sem); | 1334 | * Up semaphore to avoid following problems: |
1360 | ext4_discard_preallocations(orig_inode); | 1335 | * a. transaction deadlock among ext4_journal_start, |
1361 | up_write(&EXT4_I(orig_inode)->i_data_sem); | 1336 | * ->write_begin via pagefault, and jbd2_journal_commit |
1362 | 1337 | * b. racing with ->readpage, ->write_begin, and ext4_get_block | |
1363 | down_write(&EXT4_I(donor_inode)->i_data_sem); | 1338 | * in move_extent_per_page |
1364 | ext4_discard_preallocations(donor_inode); | 1339 | */ |
1365 | up_write(&EXT4_I(donor_inode)->i_data_sem); | 1340 | double_up_write_data_sem(orig_inode, donor_inode); |
1366 | 1341 | ||
1367 | while (orig_page_offset <= seq_end_page) { | 1342 | while (orig_page_offset <= seq_end_page) { |
1368 | 1343 | ||
1369 | /* Swap original branches with new branches */ | 1344 | /* Swap original branches with new branches */ |
1370 | ret1 = move_extent_per_page(o_filp, donor_inode, | 1345 | block_len_in_page = move_extent_per_page( |
1346 | o_filp, donor_inode, | ||
1371 | orig_page_offset, | 1347 | orig_page_offset, |
1372 | data_offset_in_page, | 1348 | data_offset_in_page, |
1373 | block_len_in_page, uninit); | 1349 | block_len_in_page, uninit, |
1374 | if (ret1 < 0) | 1350 | &ret1); |
1375 | goto out; | 1351 | |
1376 | orig_page_offset++; | ||
1377 | /* Count how many blocks we have exchanged */ | 1352 | /* Count how many blocks we have exchanged */ |
1378 | *moved_len += block_len_in_page; | 1353 | *moved_len += block_len_in_page; |
1354 | if (ret1 < 0) | ||
1355 | break; | ||
1379 | if (*moved_len > len) { | 1356 | if (*moved_len > len) { |
1380 | ext4_error(orig_inode->i_sb, __func__, | 1357 | ext4_error(orig_inode->i_sb, |
1381 | "We replaced blocks too much! " | 1358 | "We replaced blocks too much! " |
1382 | "sum of replaced: %llu requested: %llu", | 1359 | "sum of replaced: %llu requested: %llu", |
1383 | *moved_len, len); | 1360 | *moved_len, len); |
1384 | ret1 = -EIO; | 1361 | ret1 = -EIO; |
1385 | goto out; | 1362 | break; |
1386 | } | 1363 | } |
1387 | 1364 | ||
1365 | orig_page_offset++; | ||
1388 | data_offset_in_page = 0; | 1366 | data_offset_in_page = 0; |
1389 | rest_blocks -= block_len_in_page; | 1367 | rest_blocks -= block_len_in_page; |
1390 | if (rest_blocks > blocks_per_page) | 1368 | if (rest_blocks > blocks_per_page) |
@@ -1393,6 +1371,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1393 | block_len_in_page = rest_blocks; | 1371 | block_len_in_page = rest_blocks; |
1394 | } | 1372 | } |
1395 | 1373 | ||
1374 | double_down_write_data_sem(orig_inode, donor_inode); | ||
1375 | if (ret1 < 0) | ||
1376 | break; | ||
1377 | |||
1396 | /* Decrease buffer counter */ | 1378 | /* Decrease buffer counter */ |
1397 | if (holecheck_path) | 1379 | if (holecheck_path) |
1398 | ext4_ext_drop_refs(holecheck_path); | 1380 | ext4_ext_drop_refs(holecheck_path); |
@@ -1414,6 +1396,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
1414 | 1396 | ||
1415 | } | 1397 | } |
1416 | out: | 1398 | out: |
1399 | if (*moved_len) { | ||
1400 | ext4_discard_preallocations(orig_inode); | ||
1401 | ext4_discard_preallocations(donor_inode); | ||
1402 | } | ||
1403 | |||
1417 | if (orig_path) { | 1404 | if (orig_path) { |
1418 | ext4_ext_drop_refs(orig_path); | 1405 | ext4_ext_drop_refs(orig_path); |
1419 | kfree(orig_path); | 1406 | kfree(orig_path); |
@@ -1422,7 +1409,7 @@ out: | |||
1422 | ext4_ext_drop_refs(holecheck_path); | 1409 | ext4_ext_drop_refs(holecheck_path); |
1423 | kfree(holecheck_path); | 1410 | kfree(holecheck_path); |
1424 | } | 1411 | } |
1425 | 1412 | double_up_write_data_sem(orig_inode, donor_inode); | |
1426 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); | 1413 | ret2 = mext_inode_double_unlock(orig_inode, donor_inode); |
1427 | 1414 | ||
1428 | if (ret1) | 1415 | if (ret1) |