aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/ext4/move_extent.c253
1 files changed, 178 insertions, 75 deletions
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index e2016f34b58..c87a746450e 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -736,6 +736,118 @@ out:
736} 736}
737 737
738/** 738/**
739 * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
740 *
741 * @inode1: the inode structure
742 * @inode2: the inode structure
743 * @index: page index
744 * @page: result page vector
745 *
746 * Grab two locked pages for inode's by inode order
747 */
748static int
749mext_page_double_lock(struct inode *inode1, struct inode *inode2,
750 pgoff_t index, struct page *page[2])
751{
752 struct address_space *mapping[2];
753 unsigned fl = AOP_FLAG_NOFS;
754
755 BUG_ON(!inode1 || !inode2);
756 if (inode1 < inode2) {
757 mapping[0] = inode1->i_mapping;
758 mapping[1] = inode2->i_mapping;
759 } else {
760 mapping[0] = inode2->i_mapping;
761 mapping[1] = inode1->i_mapping;
762 }
763
764 page[0] = grab_cache_page_write_begin(mapping[0], index, fl);
765 if (!page[0])
766 return -ENOMEM;
767
768 page[1] = grab_cache_page_write_begin(mapping[1], index, fl);
769 if (!page[1]) {
770 unlock_page(page[0]);
771 page_cache_release(page[0]);
772 return -ENOMEM;
773 }
774
775 if (inode1 > inode2) {
776 struct page *tmp;
777 tmp = page[0];
778 page[0] = page[1];
779 page[1] = tmp;
780 }
781 return 0;
782}
783
784/* Force page buffers uptodate w/o dropping page's lock */
785static int
786mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
787{
788 struct inode *inode = page->mapping->host;
789 sector_t block;
790 struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE];
791 unsigned int blocksize, block_start, block_end;
792 int i, err, nr = 0, partial = 0;
793 BUG_ON(!PageLocked(page));
794 BUG_ON(PageWriteback(page));
795
796 if (PageUptodate(page))
797 return 0;
798
799 blocksize = 1 << inode->i_blkbits;
800 if (!page_has_buffers(page))
801 create_empty_buffers(page, blocksize, 0);
802
803 head = page_buffers(page);
804 block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
805 for (bh = head, block_start = 0; bh != head || !block_start;
806 block++, block_start = block_end, bh = bh->b_this_page) {
807 block_end = block_start + blocksize;
808 if (block_end <= from || block_start >= to) {
809 if (!buffer_uptodate(bh))
810 partial = 1;
811 continue;
812 }
813 if (buffer_uptodate(bh))
814 continue;
815 if (!buffer_mapped(bh)) {
816 int err = 0;
817 err = ext4_get_block(inode, block, bh, 0);
818 if (err) {
819 SetPageError(page);
820 return err;
821 }
822 if (!buffer_mapped(bh)) {
823 zero_user(page, block_start, blocksize);
824 if (!err)
825 set_buffer_uptodate(bh);
826 continue;
827 }
828 }
829 BUG_ON(nr >= MAX_BUF_PER_PAGE);
830 arr[nr++] = bh;
831 }
832 /* No io required */
833 if (!nr)
834 goto out;
835
836 for (i = 0; i < nr; i++) {
837 bh = arr[i];
838 if (!bh_uptodate_or_lock(bh)) {
839 err = bh_submit_read(bh);
840 if (err)
841 return err;
842 }
843 }
844out:
845 if (!partial)
846 SetPageUptodate(page);
847 return 0;
848}
849
850/**
739 * move_extent_per_page - Move extent data per page 851 * move_extent_per_page - Move extent data per page
740 * 852 *
741 * @o_filp: file structure of original file 853 * @o_filp: file structure of original file
@@ -757,26 +869,24 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
757 int block_len_in_page, int uninit, int *err) 869 int block_len_in_page, int uninit, int *err)
758{ 870{
759 struct inode *orig_inode = o_filp->f_dentry->d_inode; 871 struct inode *orig_inode = o_filp->f_dentry->d_inode;
760 struct address_space *mapping = orig_inode->i_mapping; 872 struct page *pagep[2] = {NULL, NULL};
761 struct buffer_head *bh;
762 struct page *page = NULL;
763 const struct address_space_operations *a_ops = mapping->a_ops;
764 handle_t *handle; 873 handle_t *handle;
765 ext4_lblk_t orig_blk_offset; 874 ext4_lblk_t orig_blk_offset;
766 long long offs = orig_page_offset << PAGE_CACHE_SHIFT; 875 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
767 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 876 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
768 unsigned int w_flags = 0; 877 unsigned int w_flags = 0;
769 unsigned int tmp_data_size, data_size, replaced_size; 878 unsigned int tmp_data_size, data_size, replaced_size;
770 void *fsdata; 879 int err2, jblocks, retries = 0;
771 int i, jblocks;
772 int err2 = 0;
773 int replaced_count = 0; 880 int replaced_count = 0;
881 int from = data_offset_in_page << orig_inode->i_blkbits;
774 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 882 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
775 883
776 /* 884 /*
777 * It needs twice the amount of ordinary journal buffers because 885 * It needs twice the amount of ordinary journal buffers because
778 * inode and donor_inode may change each different metadata blocks. 886 * inode and donor_inode may change each different metadata blocks.
779 */ 887 */
888again:
889 *err = 0;
780 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 890 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
781 handle = ext4_journal_start(orig_inode, jblocks); 891 handle = ext4_journal_start(orig_inode, jblocks);
782 if (IS_ERR(handle)) { 892 if (IS_ERR(handle)) {
@@ -790,19 +900,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
790 orig_blk_offset = orig_page_offset * blocks_per_page + 900 orig_blk_offset = orig_page_offset * blocks_per_page +
791 data_offset_in_page; 901 data_offset_in_page;
792 902
793 /*
794 * If orig extent is uninitialized one,
795 * it's not necessary force the page into memory
796 * and then force it to be written out again.
797 * Just swap data blocks between orig and donor.
798 */
799 if (uninit) {
800 replaced_count = mext_replace_branches(handle, orig_inode,
801 donor_inode, orig_blk_offset,
802 block_len_in_page, err);
803 goto out2;
804 }
805
806 offs = (long long)orig_blk_offset << orig_inode->i_blkbits; 903 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
807 904
808 /* Calculate data_size */ 905 /* Calculate data_size */
@@ -824,75 +921,81 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
824 921
825 replaced_size = data_size; 922 replaced_size = data_size;
826 923
827 *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, 924 *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
828 &page, &fsdata); 925 pagep);
829 if (unlikely(*err < 0)) 926 if (unlikely(*err < 0))
830 goto out; 927 goto stop_journal;
831 928
832 if (!PageUptodate(page)) { 929 *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
833 mapping->a_ops->readpage(o_filp, page); 930 if (*err)
834 lock_page(page); 931 goto unlock_pages;
932
933 /* At this point all buffers in range are uptodate, old mapping layout
934 * is no longer required, try to drop it now. */
935 if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
936 (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
937 *err = -EBUSY;
938 goto unlock_pages;
835 } 939 }
836 940
837 /*
838 * try_to_release_page() doesn't call releasepage in writeback mode.
839 * We should care about the order of writing to the same file
840 * by multiple move extent processes.
841 * It needs to call wait_on_page_writeback() to wait for the
842 * writeback of the page.
843 */
844 wait_on_page_writeback(page);
845
846 /* Release old bh and drop refs */
847 try_to_release_page(page, 0);
848
849 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, 941 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
850 orig_blk_offset, block_len_in_page, 942 orig_blk_offset,
851 &err2); 943 block_len_in_page, err);
852 if (err2) { 944 if (*err) {
853 if (replaced_count) { 945 if (replaced_count) {
854 block_len_in_page = replaced_count; 946 block_len_in_page = replaced_count;
855 replaced_size = 947 replaced_size =
856 block_len_in_page << orig_inode->i_blkbits; 948 block_len_in_page << orig_inode->i_blkbits;
857 } else 949 } else
858 goto out; 950 goto unlock_pages;
859 } 951 }
952 /* Perform all necessary steps similar write_begin()/write_end()
953 * but keeping in mind that i_size will not change */
954 *err = __block_write_begin(pagep[0], from, from + replaced_size,
955 ext4_get_block);
956 if (!*err)
957 *err = block_commit_write(pagep[0], from, from + replaced_size);
860 958
861 if (!page_has_buffers(page)) 959 if (unlikely(*err < 0))
862 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); 960 goto repair_branches;
863 961
864 bh = page_buffers(page); 962 /* Even in case of data=writeback it is reasonable to pin
865 for (i = 0; i < data_offset_in_page; i++) 963 * inode to transaction, to prevent unexpected data loss */
866 bh = bh->b_this_page; 964 *err = ext4_jbd2_file_inode(handle, orig_inode);
867 965
868 for (i = 0; i < block_len_in_page; i++) { 966unlock_pages:
869 *err = ext4_get_block(orig_inode, 967 unlock_page(pagep[0]);
870 (sector_t)(orig_blk_offset + i), bh, 0); 968 page_cache_release(pagep[0]);
871 if (*err < 0) 969 unlock_page(pagep[1]);
872 goto out; 970 page_cache_release(pagep[1]);
873 971stop_journal:
874 if (bh->b_this_page != NULL)
875 bh = bh->b_this_page;
876 }
877
878 *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
879 page, fsdata);
880 page = NULL;
881
882out:
883 if (unlikely(page)) {
884 if (PageLocked(page))
885 unlock_page(page);
886 page_cache_release(page);
887 ext4_journal_stop(handle);
888 }
889out2:
890 ext4_journal_stop(handle); 972 ext4_journal_stop(handle);
891 973 /* Buffer was busy because probably is pinned to journal transaction,
892 if (err2) 974 * force transaction commit may help to free it. */
893 *err = err2; 975 if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb,
894 976 &retries))
977 goto again;
895 return replaced_count; 978 return replaced_count;
979
980repair_branches:
981 /*
982 * This should never ever happen!
983 * Extents are swapped already, but we are not able to copy data.
984 * Try to swap extents to it's original places
985 */
986 double_down_write_data_sem(orig_inode, donor_inode);
987 replaced_count = mext_replace_branches(handle, donor_inode, orig_inode,
988 orig_blk_offset,
989 block_len_in_page, &err2);
990 double_up_write_data_sem(orig_inode, donor_inode);
991 if (replaced_count != block_len_in_page) {
992 EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
993 "Unable to copy data block,"
994 " data will be lost.");
995 *err = -EIO;
996 }
997 replaced_count = 0;
998 goto unlock_pages;
896} 999}
897 1000
898/** 1001/**