diff options
-rw-r--r-- | fs/ext4/move_extent.c | 253 |
1 files changed, 178 insertions, 75 deletions
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index e2016f34b58..c87a746450e 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -736,6 +736,118 @@ out: | |||
736 | } | 736 | } |
737 | 737 | ||
738 | /** | 738 | /** |
739 | * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 | ||
740 | * | ||
741 | * @inode1: the inode structure | ||
742 | * @inode2: the inode structure | ||
743 | * @index: page index | ||
744 | * @page: result page vector | ||
745 | * | ||
746 | * Grab two locked pages for inode's by inode order | ||
747 | */ | ||
748 | static int | ||
749 | mext_page_double_lock(struct inode *inode1, struct inode *inode2, | ||
750 | pgoff_t index, struct page *page[2]) | ||
751 | { | ||
752 | struct address_space *mapping[2]; | ||
753 | unsigned fl = AOP_FLAG_NOFS; | ||
754 | |||
755 | BUG_ON(!inode1 || !inode2); | ||
756 | if (inode1 < inode2) { | ||
757 | mapping[0] = inode1->i_mapping; | ||
758 | mapping[1] = inode2->i_mapping; | ||
759 | } else { | ||
760 | mapping[0] = inode2->i_mapping; | ||
761 | mapping[1] = inode1->i_mapping; | ||
762 | } | ||
763 | |||
764 | page[0] = grab_cache_page_write_begin(mapping[0], index, fl); | ||
765 | if (!page[0]) | ||
766 | return -ENOMEM; | ||
767 | |||
768 | page[1] = grab_cache_page_write_begin(mapping[1], index, fl); | ||
769 | if (!page[1]) { | ||
770 | unlock_page(page[0]); | ||
771 | page_cache_release(page[0]); | ||
772 | return -ENOMEM; | ||
773 | } | ||
774 | |||
775 | if (inode1 > inode2) { | ||
776 | struct page *tmp; | ||
777 | tmp = page[0]; | ||
778 | page[0] = page[1]; | ||
779 | page[1] = tmp; | ||
780 | } | ||
781 | return 0; | ||
782 | } | ||
783 | |||
784 | /* Force page buffers uptodate w/o dropping page's lock */ | ||
785 | static int | ||
786 | mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) | ||
787 | { | ||
788 | struct inode *inode = page->mapping->host; | ||
789 | sector_t block; | ||
790 | struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; | ||
791 | unsigned int blocksize, block_start, block_end; | ||
792 | int i, err, nr = 0, partial = 0; | ||
793 | BUG_ON(!PageLocked(page)); | ||
794 | BUG_ON(PageWriteback(page)); | ||
795 | |||
796 | if (PageUptodate(page)) | ||
797 | return 0; | ||
798 | |||
799 | blocksize = 1 << inode->i_blkbits; | ||
800 | if (!page_has_buffers(page)) | ||
801 | create_empty_buffers(page, blocksize, 0); | ||
802 | |||
803 | head = page_buffers(page); | ||
804 | block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
805 | for (bh = head, block_start = 0; bh != head || !block_start; | ||
806 | block++, block_start = block_end, bh = bh->b_this_page) { | ||
807 | block_end = block_start + blocksize; | ||
808 | if (block_end <= from || block_start >= to) { | ||
809 | if (!buffer_uptodate(bh)) | ||
810 | partial = 1; | ||
811 | continue; | ||
812 | } | ||
813 | if (buffer_uptodate(bh)) | ||
814 | continue; | ||
815 | if (!buffer_mapped(bh)) { | ||
816 | int err = 0; | ||
817 | err = ext4_get_block(inode, block, bh, 0); | ||
818 | if (err) { | ||
819 | SetPageError(page); | ||
820 | return err; | ||
821 | } | ||
822 | if (!buffer_mapped(bh)) { | ||
823 | zero_user(page, block_start, blocksize); | ||
824 | if (!err) | ||
825 | set_buffer_uptodate(bh); | ||
826 | continue; | ||
827 | } | ||
828 | } | ||
829 | BUG_ON(nr >= MAX_BUF_PER_PAGE); | ||
830 | arr[nr++] = bh; | ||
831 | } | ||
832 | /* No io required */ | ||
833 | if (!nr) | ||
834 | goto out; | ||
835 | |||
836 | for (i = 0; i < nr; i++) { | ||
837 | bh = arr[i]; | ||
838 | if (!bh_uptodate_or_lock(bh)) { | ||
839 | err = bh_submit_read(bh); | ||
840 | if (err) | ||
841 | return err; | ||
842 | } | ||
843 | } | ||
844 | out: | ||
845 | if (!partial) | ||
846 | SetPageUptodate(page); | ||
847 | return 0; | ||
848 | } | ||
849 | |||
850 | /** | ||
739 | * move_extent_per_page - Move extent data per page | 851 | * move_extent_per_page - Move extent data per page |
740 | * | 852 | * |
741 | * @o_filp: file structure of original file | 853 | * @o_filp: file structure of original file |
@@ -757,26 +869,24 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
757 | int block_len_in_page, int uninit, int *err) | 869 | int block_len_in_page, int uninit, int *err) |
758 | { | 870 | { |
759 | struct inode *orig_inode = o_filp->f_dentry->d_inode; | 871 | struct inode *orig_inode = o_filp->f_dentry->d_inode; |
760 | struct address_space *mapping = orig_inode->i_mapping; | 872 | struct page *pagep[2] = {NULL, NULL}; |
761 | struct buffer_head *bh; | ||
762 | struct page *page = NULL; | ||
763 | const struct address_space_operations *a_ops = mapping->a_ops; | ||
764 | handle_t *handle; | 873 | handle_t *handle; |
765 | ext4_lblk_t orig_blk_offset; | 874 | ext4_lblk_t orig_blk_offset; |
766 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | 875 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; |
767 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 876 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
768 | unsigned int w_flags = 0; | 877 | unsigned int w_flags = 0; |
769 | unsigned int tmp_data_size, data_size, replaced_size; | 878 | unsigned int tmp_data_size, data_size, replaced_size; |
770 | void *fsdata; | 879 | int err2, jblocks, retries = 0; |
771 | int i, jblocks; | ||
772 | int err2 = 0; | ||
773 | int replaced_count = 0; | 880 | int replaced_count = 0; |
881 | int from = data_offset_in_page << orig_inode->i_blkbits; | ||
774 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; | 882 | int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; |
775 | 883 | ||
776 | /* | 884 | /* |
777 | * It needs twice the amount of ordinary journal buffers because | 885 | * It needs twice the amount of ordinary journal buffers because |
778 | * inode and donor_inode may change each different metadata blocks. | 886 | * inode and donor_inode may change each different metadata blocks. |
779 | */ | 887 | */ |
888 | again: | ||
889 | *err = 0; | ||
780 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; | 890 | jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; |
781 | handle = ext4_journal_start(orig_inode, jblocks); | 891 | handle = ext4_journal_start(orig_inode, jblocks); |
782 | if (IS_ERR(handle)) { | 892 | if (IS_ERR(handle)) { |
@@ -790,19 +900,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
790 | orig_blk_offset = orig_page_offset * blocks_per_page + | 900 | orig_blk_offset = orig_page_offset * blocks_per_page + |
791 | data_offset_in_page; | 901 | data_offset_in_page; |
792 | 902 | ||
793 | /* | ||
794 | * If orig extent is uninitialized one, | ||
795 | * it's not necessary force the page into memory | ||
796 | * and then force it to be written out again. | ||
797 | * Just swap data blocks between orig and donor. | ||
798 | */ | ||
799 | if (uninit) { | ||
800 | replaced_count = mext_replace_branches(handle, orig_inode, | ||
801 | donor_inode, orig_blk_offset, | ||
802 | block_len_in_page, err); | ||
803 | goto out2; | ||
804 | } | ||
805 | |||
806 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | 903 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; |
807 | 904 | ||
808 | /* Calculate data_size */ | 905 | /* Calculate data_size */ |
@@ -824,75 +921,81 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
824 | 921 | ||
825 | replaced_size = data_size; | 922 | replaced_size = data_size; |
826 | 923 | ||
827 | *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, | 924 | *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, |
828 | &page, &fsdata); | 925 | pagep); |
829 | if (unlikely(*err < 0)) | 926 | if (unlikely(*err < 0)) |
830 | goto out; | 927 | goto stop_journal; |
831 | 928 | ||
832 | if (!PageUptodate(page)) { | 929 | *err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); |
833 | mapping->a_ops->readpage(o_filp, page); | 930 | if (*err) |
834 | lock_page(page); | 931 | goto unlock_pages; |
932 | |||
933 | /* At this point all buffers in range are uptodate, old mapping layout | ||
934 | * is no longer required, try to drop it now. */ | ||
935 | if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) || | ||
936 | (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) { | ||
937 | *err = -EBUSY; | ||
938 | goto unlock_pages; | ||
835 | } | 939 | } |
836 | 940 | ||
837 | /* | ||
838 | * try_to_release_page() doesn't call releasepage in writeback mode. | ||
839 | * We should care about the order of writing to the same file | ||
840 | * by multiple move extent processes. | ||
841 | * It needs to call wait_on_page_writeback() to wait for the | ||
842 | * writeback of the page. | ||
843 | */ | ||
844 | wait_on_page_writeback(page); | ||
845 | |||
846 | /* Release old bh and drop refs */ | ||
847 | try_to_release_page(page, 0); | ||
848 | |||
849 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, | 941 | replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, |
850 | orig_blk_offset, block_len_in_page, | 942 | orig_blk_offset, |
851 | &err2); | 943 | block_len_in_page, err); |
852 | if (err2) { | 944 | if (*err) { |
853 | if (replaced_count) { | 945 | if (replaced_count) { |
854 | block_len_in_page = replaced_count; | 946 | block_len_in_page = replaced_count; |
855 | replaced_size = | 947 | replaced_size = |
856 | block_len_in_page << orig_inode->i_blkbits; | 948 | block_len_in_page << orig_inode->i_blkbits; |
857 | } else | 949 | } else |
858 | goto out; | 950 | goto unlock_pages; |
859 | } | 951 | } |
952 | /* Perform all necessary steps similar write_begin()/write_end() | ||
953 | * but keeping in mind that i_size will not change */ | ||
954 | *err = __block_write_begin(pagep[0], from, from + replaced_size, | ||
955 | ext4_get_block); | ||
956 | if (!*err) | ||
957 | *err = block_commit_write(pagep[0], from, from + replaced_size); | ||
860 | 958 | ||
861 | if (!page_has_buffers(page)) | 959 | if (unlikely(*err < 0)) |
862 | create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); | 960 | goto repair_branches; |
863 | 961 | ||
864 | bh = page_buffers(page); | 962 | /* Even in case of data=writeback it is reasonable to pin |
865 | for (i = 0; i < data_offset_in_page; i++) | 963 | * inode to transaction, to prevent unexpected data loss */ |
866 | bh = bh->b_this_page; | 964 | *err = ext4_jbd2_file_inode(handle, orig_inode); |
867 | 965 | ||
868 | for (i = 0; i < block_len_in_page; i++) { | 966 | unlock_pages: |
869 | *err = ext4_get_block(orig_inode, | 967 | unlock_page(pagep[0]); |
870 | (sector_t)(orig_blk_offset + i), bh, 0); | 968 | page_cache_release(pagep[0]); |
871 | if (*err < 0) | 969 | unlock_page(pagep[1]); |
872 | goto out; | 970 | page_cache_release(pagep[1]); |
873 | 971 | stop_journal: | |
874 | if (bh->b_this_page != NULL) | ||
875 | bh = bh->b_this_page; | ||
876 | } | ||
877 | |||
878 | *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, | ||
879 | page, fsdata); | ||
880 | page = NULL; | ||
881 | |||
882 | out: | ||
883 | if (unlikely(page)) { | ||
884 | if (PageLocked(page)) | ||
885 | unlock_page(page); | ||
886 | page_cache_release(page); | ||
887 | ext4_journal_stop(handle); | ||
888 | } | ||
889 | out2: | ||
890 | ext4_journal_stop(handle); | 972 | ext4_journal_stop(handle); |
891 | 973 | /* Buffer was busy because probably is pinned to journal transaction, | |
892 | if (err2) | 974 | * force transaction commit may help to free it. */ |
893 | *err = err2; | 975 | if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb, |
894 | 976 | &retries)) | |
977 | goto again; | ||
895 | return replaced_count; | 978 | return replaced_count; |
979 | |||
980 | repair_branches: | ||
981 | /* | ||
982 | * This should never ever happen! | ||
983 | * Extents are swapped already, but we are not able to copy data. | ||
984 | * Try to swap extents to it's original places | ||
985 | */ | ||
986 | double_down_write_data_sem(orig_inode, donor_inode); | ||
987 | replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, | ||
988 | orig_blk_offset, | ||
989 | block_len_in_page, &err2); | ||
990 | double_up_write_data_sem(orig_inode, donor_inode); | ||
991 | if (replaced_count != block_len_in_page) { | ||
992 | EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), | ||
993 | "Unable to copy data block," | ||
994 | " data will be lost."); | ||
995 | *err = -EIO; | ||
996 | } | ||
997 | replaced_count = 0; | ||
998 | goto unlock_pages; | ||
896 | } | 999 | } |
897 | 1000 | ||
898 | /** | 1001 | /** |