aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r--fs/ocfs2/file.c309
1 files changed, 245 insertions, 64 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c447..2b10b36d1577 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
724 return status; 724 return status;
725} 725}
726 726
727/*
728 * While a write will already be ordering the data, a truncate will not.
729 * Thus, we need to explicitly order the zeroed pages.
730 */
731static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
732{
733 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
734 handle_t *handle = NULL;
735 int ret = 0;
736
737 if (!ocfs2_should_order_data(inode))
738 goto out;
739
740 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
741 if (IS_ERR(handle)) {
742 ret = -ENOMEM;
743 mlog_errno(ret);
744 goto out;
745 }
746
747 ret = ocfs2_jbd2_file_inode(handle, inode);
748 if (ret < 0)
749 mlog_errno(ret);
750
751out:
752 if (ret) {
753 if (!IS_ERR(handle))
754 ocfs2_commit_trans(osb, handle);
755 handle = ERR_PTR(ret);
756 }
757 return handle;
758}
759
727/* Some parts of this taken from generic_cont_expand, which turned out 760/* Some parts of this taken from generic_cont_expand, which turned out
728 * to be too fragile to do exactly what we need without us having to 761 * to be too fragile to do exactly what we need without us having to
729 * worry about recursive locking in ->write_begin() and ->write_end(). */ 762 * worry about recursive locking in ->write_begin() and ->write_end(). */
730static int ocfs2_write_zero_page(struct inode *inode, 763static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
731 u64 size) 764 u64 abs_to)
732{ 765{
733 struct address_space *mapping = inode->i_mapping; 766 struct address_space *mapping = inode->i_mapping;
734 struct page *page; 767 struct page *page;
735 unsigned long index; 768 unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
736 unsigned int offset;
737 handle_t *handle = NULL; 769 handle_t *handle = NULL;
738 int ret; 770 int ret = 0;
771 unsigned zero_from, zero_to, block_start, block_end;
739 772
740 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 773 BUG_ON(abs_from >= abs_to);
741 /* ugh. in prepare/commit_write, if from==to==start of block, we 774 BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
742 ** skip the prepare. make sure we never send an offset for the start 775 BUG_ON(abs_from & (inode->i_blkbits - 1));
743 ** of a block
744 */
745 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
746 offset++;
747 }
748 index = size >> PAGE_CACHE_SHIFT;
749 776
750 page = grab_cache_page(mapping, index); 777 page = grab_cache_page(mapping, index);
751 if (!page) { 778 if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
754 goto out; 781 goto out;
755 } 782 }
756 783
757 ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); 784 /* Get the offsets within the page that we want to zero */
758 if (ret < 0) { 785 zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
759 mlog_errno(ret); 786 zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
760 goto out_unlock; 787 if (!zero_to)
761 } 788 zero_to = PAGE_CACHE_SIZE;
762 789
763 if (ocfs2_should_order_data(inode)) { 790 mlog(0,
764 handle = ocfs2_start_walk_page_trans(inode, page, offset, 791 "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
765 offset); 792 (unsigned long long)abs_from, (unsigned long long)abs_to,
766 if (IS_ERR(handle)) { 793 index, zero_from, zero_to);
767 ret = PTR_ERR(handle); 794
768 handle = NULL; 795 /* We know that zero_from is block aligned */
796 for (block_start = zero_from; block_start < zero_to;
797 block_start = block_end) {
798 block_end = block_start + (1 << inode->i_blkbits);
799
800 /*
801 * block_start is block-aligned. Bump it by one to
802 * force ocfs2_{prepare,commit}_write() to zero the
803 * whole block.
804 */
805 ret = ocfs2_prepare_write_nolock(inode, page,
806 block_start + 1,
807 block_start + 1);
808 if (ret < 0) {
809 mlog_errno(ret);
769 goto out_unlock; 810 goto out_unlock;
770 } 811 }
771 }
772 812
773 /* must not update i_size! */ 813 if (!handle) {
774 ret = block_commit_write(page, offset, offset); 814 handle = ocfs2_zero_start_ordered_transaction(inode);
775 if (ret < 0) 815 if (IS_ERR(handle)) {
776 mlog_errno(ret); 816 ret = PTR_ERR(handle);
777 else 817 handle = NULL;
778 ret = 0; 818 break;
819 }
820 }
821
822 /* must not update i_size! */
823 ret = block_commit_write(page, block_start + 1,
824 block_start + 1);
825 if (ret < 0)
826 mlog_errno(ret);
827 else
828 ret = 0;
829 }
779 830
780 if (handle) 831 if (handle)
781 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 832 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
833
782out_unlock: 834out_unlock:
783 unlock_page(page); 835 unlock_page(page);
784 page_cache_release(page); 836 page_cache_release(page);
@@ -786,22 +838,114 @@ out:
786 return ret; 838 return ret;
787} 839}
788 840
789static int ocfs2_zero_extend(struct inode *inode, 841/*
790 u64 zero_to_size) 842 * Find the next range to zero. We do this in terms of bytes because
843 * that's what ocfs2_zero_extend() wants, and it is dealing with the
844 * pagecache. We may return multiple extents.
845 *
846 * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
847 * needs to be zeroed. range_start and range_end return the next zeroing
848 * range. A subsequent call should pass the previous range_end as its
849 * zero_start. If range_end is 0, there's nothing to do.
850 *
851 * Unwritten extents are skipped over. Refcounted extents are CoWd.
852 */
853static int ocfs2_zero_extend_get_range(struct inode *inode,
854 struct buffer_head *di_bh,
855 u64 zero_start, u64 zero_end,
856 u64 *range_start, u64 *range_end)
791{ 857{
792 int ret = 0; 858 int rc = 0, needs_cow = 0;
793 u64 start_off; 859 u32 p_cpos, zero_clusters = 0;
794 struct super_block *sb = inode->i_sb; 860 u32 zero_cpos =
861 zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
862 u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
863 unsigned int num_clusters = 0;
864 unsigned int ext_flags = 0;
795 865
796 start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); 866 while (zero_cpos < last_cpos) {
797 while (start_off < zero_to_size) { 867 rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
798 ret = ocfs2_write_zero_page(inode, start_off); 868 &num_clusters, &ext_flags);
799 if (ret < 0) { 869 if (rc) {
800 mlog_errno(ret); 870 mlog_errno(rc);
871 goto out;
872 }
873
874 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
875 zero_clusters = num_clusters;
876 if (ext_flags & OCFS2_EXT_REFCOUNTED)
877 needs_cow = 1;
878 break;
879 }
880
881 zero_cpos += num_clusters;
882 }
883 if (!zero_clusters) {
884 *range_end = 0;
885 goto out;
886 }
887
888 while ((zero_cpos + zero_clusters) < last_cpos) {
889 rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
890 &p_cpos, &num_clusters,
891 &ext_flags);
892 if (rc) {
893 mlog_errno(rc);
801 goto out; 894 goto out;
802 } 895 }
803 896
804 start_off += sb->s_blocksize; 897 if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
898 break;
899 if (ext_flags & OCFS2_EXT_REFCOUNTED)
900 needs_cow = 1;
901 zero_clusters += num_clusters;
902 }
903 if ((zero_cpos + zero_clusters) > last_cpos)
904 zero_clusters = last_cpos - zero_cpos;
905
906 if (needs_cow) {
907 rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
908 UINT_MAX);
909 if (rc) {
910 mlog_errno(rc);
911 goto out;
912 }
913 }
914
915 *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
916 *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
917 zero_cpos + zero_clusters);
918
919out:
920 return rc;
921}
922
923/*
924 * Zero one range returned from ocfs2_zero_extend_get_range(). The caller
925 * has made sure that the entire range needs zeroing.
926 */
927static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
928 u64 range_end)
929{
930 int rc = 0;
931 u64 next_pos;
932 u64 zero_pos = range_start;
933
934 mlog(0, "range_start = %llu, range_end = %llu\n",
935 (unsigned long long)range_start,
936 (unsigned long long)range_end);
937 BUG_ON(range_start >= range_end);
938
939 while (zero_pos < range_end) {
940 next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
941 if (next_pos > range_end)
942 next_pos = range_end;
943 rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
944 if (rc < 0) {
945 mlog_errno(rc);
946 break;
947 }
948 zero_pos = next_pos;
805 949
806 /* 950 /*
807 * Very large extends have the potential to lock up 951 * Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
810 cond_resched(); 954 cond_resched();
811 } 955 }
812 956
813out: 957 return rc;
958}
959
960int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
961 loff_t zero_to_size)
962{
963 int ret = 0;
964 u64 zero_start, range_start = 0, range_end = 0;
965 struct super_block *sb = inode->i_sb;
966
967 zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
968 mlog(0, "zero_start %llu for i_size %llu\n",
969 (unsigned long long)zero_start,
970 (unsigned long long)i_size_read(inode));
971 while (zero_start < zero_to_size) {
972 ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
973 zero_to_size,
974 &range_start,
975 &range_end);
976 if (ret) {
977 mlog_errno(ret);
978 break;
979 }
980 if (!range_end)
981 break;
982 /* Trim the ends */
983 if (range_start < zero_start)
984 range_start = zero_start;
985 if (range_end > zero_to_size)
986 range_end = zero_to_size;
987
988 ret = ocfs2_zero_extend_range(inode, range_start,
989 range_end);
990 if (ret) {
991 mlog_errno(ret);
992 break;
993 }
994 zero_start = range_end;
995 }
996
814 return ret; 997 return ret;
815} 998}
816 999
817int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) 1000int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
1001 u64 new_i_size, u64 zero_to)
818{ 1002{
819 int ret; 1003 int ret;
820 u32 clusters_to_add; 1004 u32 clusters_to_add;
821 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1005 struct ocfs2_inode_info *oi = OCFS2_I(inode);
822 1006
1007 /*
1008 * Only quota files call this without a bh, and they can't be
1009 * refcounted.
1010 */
1011 BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
1012 BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
1013
823 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); 1014 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
824 if (clusters_to_add < oi->ip_clusters) 1015 if (clusters_to_add < oi->ip_clusters)
825 clusters_to_add = 0; 1016 clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
840 * still need to zero the area between the old i_size and the 1031 * still need to zero the area between the old i_size and the
841 * new i_size. 1032 * new i_size.
842 */ 1033 */
843 ret = ocfs2_zero_extend(inode, zero_to); 1034 ret = ocfs2_zero_extend(inode, di_bh, zero_to);
844 if (ret < 0) 1035 if (ret < 0)
845 mlog_errno(ret); 1036 mlog_errno(ret);
846 1037
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
862 goto out; 1053 goto out;
863 1054
864 if (i_size_read(inode) == new_i_size) 1055 if (i_size_read(inode) == new_i_size)
865 goto out; 1056 goto out;
866 BUG_ON(new_i_size < i_size_read(inode)); 1057 BUG_ON(new_i_size < i_size_read(inode));
867 1058
868 /* 1059 /*
869 * Fall through for converting inline data, even if the fs
870 * supports sparse files.
871 *
872 * The check for inline data here is legal - nobody can add
873 * the feature since we have i_mutex. We must check it again
874 * after acquiring ip_alloc_sem though, as paths like mmap
875 * might have raced us to converting the inode to extents.
876 */
877 if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
878 && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
879 goto out_update_size;
880
881 /*
882 * The alloc sem blocks people in read/write from reading our 1060 * The alloc sem blocks people in read/write from reading our
883 * allocation until we're done changing it. We depend on 1061 * allocation until we're done changing it. We depend on
884 * i_mutex to block other extend/truncate calls while we're 1062 * i_mutex to block other extend/truncate calls while we're
885 * here. 1063 * here. We even have to hold it for sparse files because there
1064 * might be some tail zeroing.
886 */ 1065 */
887 down_write(&oi->ip_alloc_sem); 1066 down_write(&oi->ip_alloc_sem);
888 1067
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
899 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); 1078 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
900 if (ret) { 1079 if (ret) {
901 up_write(&oi->ip_alloc_sem); 1080 up_write(&oi->ip_alloc_sem);
902
903 mlog_errno(ret); 1081 mlog_errno(ret);
904 goto out; 1082 goto out;
905 } 1083 }
906 } 1084 }
907 1085
908 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 1086 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
909 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); 1087 ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
1088 else
1089 ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
1090 new_i_size);
910 1091
911 up_write(&oi->ip_alloc_sem); 1092 up_write(&oi->ip_alloc_sem);
912 1093