aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ocfs2/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r--fs/ocfs2/file.c576
1 files changed, 443 insertions, 133 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index a5fbd9cea968..81296b4e3646 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -175,13 +175,12 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file)
175 return 0; 175 return 0;
176} 176}
177 177
178static int ocfs2_sync_file(struct file *file, 178static int ocfs2_sync_file(struct file *file, int datasync)
179 struct dentry *dentry,
180 int datasync)
181{ 179{
182 int err = 0; 180 int err = 0;
183 journal_t *journal; 181 journal_t *journal;
184 struct inode *inode = dentry->d_inode; 182 struct dentry *dentry = file->f_path.dentry;
183 struct inode *inode = file->f_mapping->host;
185 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 184 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
186 185
187 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, 186 mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync,
@@ -278,10 +277,7 @@ int ocfs2_update_inode_atime(struct inode *inode,
278 inode->i_atime = CURRENT_TIME; 277 inode->i_atime = CURRENT_TIME;
279 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); 278 di->i_atime = cpu_to_le64(inode->i_atime.tv_sec);
280 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); 279 di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec);
281 280 ocfs2_journal_dirty(handle, bh);
282 ret = ocfs2_journal_dirty(handle, bh);
283 if (ret < 0)
284 mlog_errno(ret);
285 281
286out_commit: 282out_commit:
287 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 283 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -430,9 +426,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb,
430 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); 426 di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec);
431 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); 427 di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
432 428
433 status = ocfs2_journal_dirty(handle, fe_bh); 429 ocfs2_journal_dirty(handle, fe_bh);
434 if (status < 0)
435 mlog_errno(status);
436 430
437out_commit: 431out_commit:
438 ocfs2_commit_trans(osb, handle); 432 ocfs2_commit_trans(osb, handle);
@@ -449,7 +443,6 @@ static int ocfs2_truncate_file(struct inode *inode,
449 int status = 0; 443 int status = 0;
450 struct ocfs2_dinode *fe = NULL; 444 struct ocfs2_dinode *fe = NULL;
451 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 445 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
452 struct ocfs2_truncate_context *tc = NULL;
453 446
454 mlog_entry("(inode = %llu, new_i_size = %llu\n", 447 mlog_entry("(inode = %llu, new_i_size = %llu\n",
455 (unsigned long long)OCFS2_I(inode)->ip_blkno, 448 (unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -488,6 +481,9 @@ static int ocfs2_truncate_file(struct inode *inode,
488 481
489 down_write(&OCFS2_I(inode)->ip_alloc_sem); 482 down_write(&OCFS2_I(inode)->ip_alloc_sem);
490 483
484 ocfs2_resv_discard(&osb->osb_la_resmap,
485 &OCFS2_I(inode)->ip_la_data_resv);
486
491 /* 487 /*
492 * The inode lock forced other nodes to sync and drop their 488 * The inode lock forced other nodes to sync and drop their
493 * pages, which (correctly) happens even if we have a truncate 489 * pages, which (correctly) happens even if we have a truncate
@@ -517,13 +513,7 @@ static int ocfs2_truncate_file(struct inode *inode,
517 goto bail_unlock_sem; 513 goto bail_unlock_sem;
518 } 514 }
519 515
520 status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); 516 status = ocfs2_commit_truncate(osb, inode, di_bh);
521 if (status < 0) {
522 mlog_errno(status);
523 goto bail_unlock_sem;
524 }
525
526 status = ocfs2_commit_truncate(osb, inode, di_bh, tc);
527 if (status < 0) { 517 if (status < 0) {
528 mlog_errno(status); 518 mlog_errno(status);
529 goto bail_unlock_sem; 519 goto bail_unlock_sem;
@@ -666,11 +656,7 @@ restarted_transaction:
666 goto leave; 656 goto leave;
667 } 657 }
668 658
669 status = ocfs2_journal_dirty(handle, bh); 659 ocfs2_journal_dirty(handle, bh);
670 if (status < 0) {
671 mlog_errno(status);
672 goto leave;
673 }
674 660
675 spin_lock(&OCFS2_I(inode)->ip_lock); 661 spin_lock(&OCFS2_I(inode)->ip_lock);
676 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); 662 clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters);
@@ -738,28 +724,55 @@ leave:
738 return status; 724 return status;
739} 725}
740 726
727/*
728 * While a write will already be ordering the data, a truncate will not.
729 * Thus, we need to explicitly order the zeroed pages.
730 */
731static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
732{
733 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
734 handle_t *handle = NULL;
735 int ret = 0;
736
737 if (!ocfs2_should_order_data(inode))
738 goto out;
739
740 handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
741 if (IS_ERR(handle)) {
742 ret = -ENOMEM;
743 mlog_errno(ret);
744 goto out;
745 }
746
747 ret = ocfs2_jbd2_file_inode(handle, inode);
748 if (ret < 0)
749 mlog_errno(ret);
750
751out:
752 if (ret) {
753 if (!IS_ERR(handle))
754 ocfs2_commit_trans(osb, handle);
755 handle = ERR_PTR(ret);
756 }
757 return handle;
758}
759
741/* Some parts of this taken from generic_cont_expand, which turned out 760/* Some parts of this taken from generic_cont_expand, which turned out
742 * to be too fragile to do exactly what we need without us having to 761 * to be too fragile to do exactly what we need without us having to
743 * worry about recursive locking in ->write_begin() and ->write_end(). */ 762 * worry about recursive locking in ->write_begin() and ->write_end(). */
744static int ocfs2_write_zero_page(struct inode *inode, 763static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
745 u64 size) 764 u64 abs_to)
746{ 765{
747 struct address_space *mapping = inode->i_mapping; 766 struct address_space *mapping = inode->i_mapping;
748 struct page *page; 767 struct page *page;
749 unsigned long index; 768 unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
750 unsigned int offset;
751 handle_t *handle = NULL; 769 handle_t *handle = NULL;
752 int ret; 770 int ret = 0;
771 unsigned zero_from, zero_to, block_start, block_end;
753 772
754 offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ 773 BUG_ON(abs_from >= abs_to);
755 /* ugh. in prepare/commit_write, if from==to==start of block, we 774 BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
756 ** skip the prepare. make sure we never send an offset for the start 775 BUG_ON(abs_from & (inode->i_blkbits - 1));
757 ** of a block
758 */
759 if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
760 offset++;
761 }
762 index = size >> PAGE_CACHE_SHIFT;
763 776
764 page = grab_cache_page(mapping, index); 777 page = grab_cache_page(mapping, index);
765 if (!page) { 778 if (!page) {
@@ -768,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
768 goto out; 781 goto out;
769 } 782 }
770 783
771 ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); 784 /* Get the offsets within the page that we want to zero */
772 if (ret < 0) { 785 zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
773 mlog_errno(ret); 786 zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
774 goto out_unlock; 787 if (!zero_to)
775 } 788 zero_to = PAGE_CACHE_SIZE;
776 789
777 if (ocfs2_should_order_data(inode)) { 790 mlog(0,
778 handle = ocfs2_start_walk_page_trans(inode, page, offset, 791 "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
779 offset); 792 (unsigned long long)abs_from, (unsigned long long)abs_to,
780 if (IS_ERR(handle)) { 793 index, zero_from, zero_to);
781 ret = PTR_ERR(handle); 794
782 handle = NULL; 795 /* We know that zero_from is block aligned */
796 for (block_start = zero_from; block_start < zero_to;
797 block_start = block_end) {
798 block_end = block_start + (1 << inode->i_blkbits);
799
800 /*
801 * block_start is block-aligned. Bump it by one to
802 * force ocfs2_{prepare,commit}_write() to zero the
803 * whole block.
804 */
805 ret = ocfs2_prepare_write_nolock(inode, page,
806 block_start + 1,
807 block_start + 1);
808 if (ret < 0) {
809 mlog_errno(ret);
783 goto out_unlock; 810 goto out_unlock;
784 } 811 }
785 }
786 812
787 /* must not update i_size! */ 813 if (!handle) {
788 ret = block_commit_write(page, offset, offset); 814 handle = ocfs2_zero_start_ordered_transaction(inode);
789 if (ret < 0) 815 if (IS_ERR(handle)) {
790 mlog_errno(ret); 816 ret = PTR_ERR(handle);
791 else 817 handle = NULL;
792 ret = 0; 818 break;
819 }
820 }
821
822 /* must not update i_size! */
823 ret = block_commit_write(page, block_start + 1,
824 block_start + 1);
825 if (ret < 0)
826 mlog_errno(ret);
827 else
828 ret = 0;
829 }
793 830
794 if (handle) 831 if (handle)
795 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); 832 ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
833
796out_unlock: 834out_unlock:
797 unlock_page(page); 835 unlock_page(page);
798 page_cache_release(page); 836 page_cache_release(page);
@@ -800,22 +838,114 @@ out:
800 return ret; 838 return ret;
801} 839}
802 840
803static int ocfs2_zero_extend(struct inode *inode, 841/*
804 u64 zero_to_size) 842 * Find the next range to zero. We do this in terms of bytes because
843 * that's what ocfs2_zero_extend() wants, and it is dealing with the
844 * pagecache. We may return multiple extents.
845 *
846 * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
847 * needs to be zeroed. range_start and range_end return the next zeroing
848 * range. A subsequent call should pass the previous range_end as its
849 * zero_start. If range_end is 0, there's nothing to do.
850 *
851 * Unwritten extents are skipped over. Refcounted extents are CoWd.
852 */
853static int ocfs2_zero_extend_get_range(struct inode *inode,
854 struct buffer_head *di_bh,
855 u64 zero_start, u64 zero_end,
856 u64 *range_start, u64 *range_end)
805{ 857{
806 int ret = 0; 858 int rc = 0, needs_cow = 0;
807 u64 start_off; 859 u32 p_cpos, zero_clusters = 0;
808 struct super_block *sb = inode->i_sb; 860 u32 zero_cpos =
861 zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
862 u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
863 unsigned int num_clusters = 0;
864 unsigned int ext_flags = 0;
809 865
810 start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); 866 while (zero_cpos < last_cpos) {
811 while (start_off < zero_to_size) { 867 rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
812 ret = ocfs2_write_zero_page(inode, start_off); 868 &num_clusters, &ext_flags);
813 if (ret < 0) { 869 if (rc) {
814 mlog_errno(ret); 870 mlog_errno(rc);
815 goto out; 871 goto out;
816 } 872 }
817 873
818 start_off += sb->s_blocksize; 874 if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
875 zero_clusters = num_clusters;
876 if (ext_flags & OCFS2_EXT_REFCOUNTED)
877 needs_cow = 1;
878 break;
879 }
880
881 zero_cpos += num_clusters;
882 }
883 if (!zero_clusters) {
884 *range_end = 0;
885 goto out;
886 }
887
888 while ((zero_cpos + zero_clusters) < last_cpos) {
889 rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
890 &p_cpos, &num_clusters,
891 &ext_flags);
892 if (rc) {
893 mlog_errno(rc);
894 goto out;
895 }
896
897 if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
898 break;
899 if (ext_flags & OCFS2_EXT_REFCOUNTED)
900 needs_cow = 1;
901 zero_clusters += num_clusters;
902 }
903 if ((zero_cpos + zero_clusters) > last_cpos)
904 zero_clusters = last_cpos - zero_cpos;
905
906 if (needs_cow) {
907 rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
908 UINT_MAX);
909 if (rc) {
910 mlog_errno(rc);
911 goto out;
912 }
913 }
914
915 *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
916 *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
917 zero_cpos + zero_clusters);
918
919out:
920 return rc;
921}
922
923/*
924 * Zero one range returned from ocfs2_zero_extend_get_range(). The caller
925 * has made sure that the entire range needs zeroing.
926 */
927static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
928 u64 range_end)
929{
930 int rc = 0;
931 u64 next_pos;
932 u64 zero_pos = range_start;
933
934 mlog(0, "range_start = %llu, range_end = %llu\n",
935 (unsigned long long)range_start,
936 (unsigned long long)range_end);
937 BUG_ON(range_start >= range_end);
938
939 while (zero_pos < range_end) {
940 next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
941 if (next_pos > range_end)
942 next_pos = range_end;
943 rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
944 if (rc < 0) {
945 mlog_errno(rc);
946 break;
947 }
948 zero_pos = next_pos;
819 949
820 /* 950 /*
821 * Very large extends have the potential to lock up 951 * Very large extends have the potential to lock up
@@ -824,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
824 cond_resched(); 954 cond_resched();
825 } 955 }
826 956
827out: 957 return rc;
958}
959
960int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
961 loff_t zero_to_size)
962{
963 int ret = 0;
964 u64 zero_start, range_start = 0, range_end = 0;
965 struct super_block *sb = inode->i_sb;
966
967 zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
968 mlog(0, "zero_start %llu for i_size %llu\n",
969 (unsigned long long)zero_start,
970 (unsigned long long)i_size_read(inode));
971 while (zero_start < zero_to_size) {
972 ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
973 zero_to_size,
974 &range_start,
975 &range_end);
976 if (ret) {
977 mlog_errno(ret);
978 break;
979 }
980 if (!range_end)
981 break;
982 /* Trim the ends */
983 if (range_start < zero_start)
984 range_start = zero_start;
985 if (range_end > zero_to_size)
986 range_end = zero_to_size;
987
988 ret = ocfs2_zero_extend_range(inode, range_start,
989 range_end);
990 if (ret) {
991 mlog_errno(ret);
992 break;
993 }
994 zero_start = range_end;
995 }
996
828 return ret; 997 return ret;
829} 998}
830 999
831int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) 1000int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
1001 u64 new_i_size, u64 zero_to)
832{ 1002{
833 int ret; 1003 int ret;
834 u32 clusters_to_add; 1004 u32 clusters_to_add;
835 struct ocfs2_inode_info *oi = OCFS2_I(inode); 1005 struct ocfs2_inode_info *oi = OCFS2_I(inode);
836 1006
1007 /*
1008 * Only quota files call this without a bh, and they can't be
1009 * refcounted.
1010 */
1011 BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
1012 BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
1013
837 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); 1014 clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
838 if (clusters_to_add < oi->ip_clusters) 1015 if (clusters_to_add < oi->ip_clusters)
839 clusters_to_add = 0; 1016 clusters_to_add = 0;
@@ -854,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
854 * still need to zero the area between the old i_size and the 1031 * still need to zero the area between the old i_size and the
855 * new i_size. 1032 * new i_size.
856 */ 1033 */
857 ret = ocfs2_zero_extend(inode, zero_to); 1034 ret = ocfs2_zero_extend(inode, di_bh, zero_to);
858 if (ret < 0) 1035 if (ret < 0)
859 mlog_errno(ret); 1036 mlog_errno(ret);
860 1037
@@ -876,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
876 goto out; 1053 goto out;
877 1054
878 if (i_size_read(inode) == new_i_size) 1055 if (i_size_read(inode) == new_i_size)
879 goto out; 1056 goto out;
880 BUG_ON(new_i_size < i_size_read(inode)); 1057 BUG_ON(new_i_size < i_size_read(inode));
881 1058
882 /* 1059 /*
883 * Fall through for converting inline data, even if the fs
884 * supports sparse files.
885 *
886 * The check for inline data here is legal - nobody can add
887 * the feature since we have i_mutex. We must check it again
888 * after acquiring ip_alloc_sem though, as paths like mmap
889 * might have raced us to converting the inode to extents.
890 */
891 if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
892 && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
893 goto out_update_size;
894
895 /*
896 * The alloc sem blocks people in read/write from reading our 1060 * The alloc sem blocks people in read/write from reading our
897 * allocation until we're done changing it. We depend on 1061 * allocation until we're done changing it. We depend on
898 * i_mutex to block other extend/truncate calls while we're 1062 * i_mutex to block other extend/truncate calls while we're
899 * here. 1063 * here. We even have to hold it for sparse files because there
1064 * might be some tail zeroing.
900 */ 1065 */
901 down_write(&oi->ip_alloc_sem); 1066 down_write(&oi->ip_alloc_sem);
902 1067
@@ -913,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
913 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); 1078 ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
914 if (ret) { 1079 if (ret) {
915 up_write(&oi->ip_alloc_sem); 1080 up_write(&oi->ip_alloc_sem);
916
917 mlog_errno(ret); 1081 mlog_errno(ret);
918 goto out; 1082 goto out;
919 } 1083 }
920 } 1084 }
921 1085
922 if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) 1086 if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
923 ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); 1087 ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
1088 else
1089 ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
1090 new_i_size);
924 1091
925 up_write(&oi->ip_alloc_sem); 1092 up_write(&oi->ip_alloc_sem);
926 1093
@@ -946,9 +1113,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
946 struct ocfs2_super *osb = OCFS2_SB(sb); 1113 struct ocfs2_super *osb = OCFS2_SB(sb);
947 struct buffer_head *bh = NULL; 1114 struct buffer_head *bh = NULL;
948 handle_t *handle = NULL; 1115 handle_t *handle = NULL;
949 int qtype;
950 struct dquot *transfer_from[MAXQUOTAS] = { };
951 struct dquot *transfer_to[MAXQUOTAS] = { }; 1116 struct dquot *transfer_to[MAXQUOTAS] = { };
1117 int qtype;
952 1118
953 mlog_entry("(0x%p, '%.*s')\n", dentry, 1119 mlog_entry("(0x%p, '%.*s')\n", dentry,
954 dentry->d_name.len, dentry->d_name.name); 1120 dentry->d_name.len, dentry->d_name.name);
@@ -979,10 +1145,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
979 if (status) 1145 if (status)
980 return status; 1146 return status;
981 1147
1148 if (is_quota_modification(inode, attr))
1149 dquot_initialize(inode);
982 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; 1150 size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
983 if (size_change) { 1151 if (size_change) {
984 dquot_initialize(inode);
985
986 status = ocfs2_rw_lock(inode, 1); 1152 status = ocfs2_rw_lock(inode, 1);
987 if (status < 0) { 1153 if (status < 0) {
988 mlog_errno(status); 1154 mlog_errno(status);
@@ -1032,9 +1198,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1032 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { 1198 OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
1033 transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, 1199 transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid,
1034 USRQUOTA); 1200 USRQUOTA);
1035 transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, 1201 if (!transfer_to[USRQUOTA]) {
1036 USRQUOTA);
1037 if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) {
1038 status = -ESRCH; 1202 status = -ESRCH;
1039 goto bail_unlock; 1203 goto bail_unlock;
1040 } 1204 }
@@ -1044,9 +1208,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1044 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { 1208 OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
1045 transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, 1209 transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid,
1046 GRPQUOTA); 1210 GRPQUOTA);
1047 transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, 1211 if (!transfer_to[GRPQUOTA]) {
1048 GRPQUOTA);
1049 if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) {
1050 status = -ESRCH; 1212 status = -ESRCH;
1051 goto bail_unlock; 1213 goto bail_unlock;
1052 } 1214 }
@@ -1058,7 +1220,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1058 mlog_errno(status); 1220 mlog_errno(status);
1059 goto bail_unlock; 1221 goto bail_unlock;
1060 } 1222 }
1061 status = dquot_transfer(inode, attr); 1223 status = __dquot_transfer(inode, transfer_to);
1062 if (status < 0) 1224 if (status < 0)
1063 goto bail_commit; 1225 goto bail_commit;
1064 } else { 1226 } else {
@@ -1071,18 +1233,26 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
1071 } 1233 }
1072 1234
1073 /* 1235 /*
1074 * This will intentionally not wind up calling vmtruncate(), 1236 * This will intentionally not wind up calling truncate_setsize(),
1075 * since all the work for a size change has been done above. 1237 * since all the work for a size change has been done above.
1076 * Otherwise, we could get into problems with truncate as 1238 * Otherwise, we could get into problems with truncate as
1077 * ip_alloc_sem is used there to protect against i_size 1239 * ip_alloc_sem is used there to protect against i_size
1078 * changes. 1240 * changes.
1241 *
1242 * XXX: this means the conditional below can probably be removed.
1079 */ 1243 */
1080 status = inode_setattr(inode, attr); 1244 if ((attr->ia_valid & ATTR_SIZE) &&
1081 if (status < 0) { 1245 attr->ia_size != i_size_read(inode)) {
1082 mlog_errno(status); 1246 status = vmtruncate(inode, attr->ia_size);
1083 goto bail_commit; 1247 if (status) {
1248 mlog_errno(status);
1249 goto bail_commit;
1250 }
1084 } 1251 }
1085 1252
1253 setattr_copy(inode, attr);
1254 mark_inode_dirty(inode);
1255
1086 status = ocfs2_mark_inode_dirty(handle, inode, bh); 1256 status = ocfs2_mark_inode_dirty(handle, inode, bh);
1087 if (status < 0) 1257 if (status < 0)
1088 mlog_errno(status); 1258 mlog_errno(status);
@@ -1098,10 +1268,8 @@ bail:
1098 brelse(bh); 1268 brelse(bh);
1099 1269
1100 /* Release quota pointers in case we acquired them */ 1270 /* Release quota pointers in case we acquired them */
1101 for (qtype = 0; qtype < MAXQUOTAS; qtype++) { 1271 for (qtype = 0; qtype < MAXQUOTAS; qtype++)
1102 dqput(transfer_to[qtype]); 1272 dqput(transfer_to[qtype]);
1103 dqput(transfer_from[qtype]);
1104 }
1105 1273
1106 if (!status && attr->ia_valid & ATTR_MODE) { 1274 if (!status && attr->ia_valid & ATTR_MODE) {
1107 status = ocfs2_acl_chmod(inode); 1275 status = ocfs2_acl_chmod(inode);
@@ -1195,9 +1363,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode,
1195 di = (struct ocfs2_dinode *) bh->b_data; 1363 di = (struct ocfs2_dinode *) bh->b_data;
1196 di->i_mode = cpu_to_le16(inode->i_mode); 1364 di->i_mode = cpu_to_le16(inode->i_mode);
1197 1365
1198 ret = ocfs2_journal_dirty(handle, bh); 1366 ocfs2_journal_dirty(handle, bh);
1199 if (ret < 0)
1200 mlog_errno(ret);
1201 1367
1202out_trans: 1368out_trans:
1203 ocfs2_commit_trans(osb, handle); 1369 ocfs2_commit_trans(osb, handle);
@@ -1434,16 +1600,90 @@ out:
1434 return ret; 1600 return ret;
1435} 1601}
1436 1602
1603static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos)
1604{
1605 int i;
1606 struct ocfs2_extent_rec *rec = NULL;
1607
1608 for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
1609
1610 rec = &el->l_recs[i];
1611
1612 if (le32_to_cpu(rec->e_cpos) < pos)
1613 break;
1614 }
1615
1616 return i;
1617}
1618
1619/*
1620 * Helper to calculate the punching pos and length in one run, we handle the
1621 * following three cases in order:
1622 *
1623 * - remove the entire record
1624 * - remove a partial record
1625 * - no record needs to be removed (hole-punching completed)
1626*/
1627static void ocfs2_calc_trunc_pos(struct inode *inode,
1628 struct ocfs2_extent_list *el,
1629 struct ocfs2_extent_rec *rec,
1630 u32 trunc_start, u32 *trunc_cpos,
1631 u32 *trunc_len, u32 *trunc_end,
1632 u64 *blkno, int *done)
1633{
1634 int ret = 0;
1635 u32 coff, range;
1636
1637 range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec);
1638
1639 if (le32_to_cpu(rec->e_cpos) >= trunc_start) {
1640 *trunc_cpos = le32_to_cpu(rec->e_cpos);
1641 /*
1642 * Skip holes if any.
1643 */
1644 if (range < *trunc_end)
1645 *trunc_end = range;
1646 *trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos);
1647 *blkno = le64_to_cpu(rec->e_blkno);
1648 *trunc_end = le32_to_cpu(rec->e_cpos);
1649 } else if (range > trunc_start) {
1650 *trunc_cpos = trunc_start;
1651 *trunc_len = *trunc_end - trunc_start;
1652 coff = trunc_start - le32_to_cpu(rec->e_cpos);
1653 *blkno = le64_to_cpu(rec->e_blkno) +
1654 ocfs2_clusters_to_blocks(inode->i_sb, coff);
1655 *trunc_end = trunc_start;
1656 } else {
1657 /*
1658 * It may have two following possibilities:
1659 *
1660 * - last record has been removed
1661 * - trunc_start was within a hole
1662 *
1663 * both two cases mean the completion of hole punching.
1664 */
1665 ret = 1;
1666 }
1667
1668 *done = ret;
1669}
1670
1437static int ocfs2_remove_inode_range(struct inode *inode, 1671static int ocfs2_remove_inode_range(struct inode *inode,
1438 struct buffer_head *di_bh, u64 byte_start, 1672 struct buffer_head *di_bh, u64 byte_start,
1439 u64 byte_len) 1673 u64 byte_len)
1440{ 1674{
1441 int ret = 0; 1675 int ret = 0, flags = 0, done = 0, i;
1442 u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; 1676 u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos;
1677 u32 cluster_in_el;
1443 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); 1678 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1444 struct ocfs2_cached_dealloc_ctxt dealloc; 1679 struct ocfs2_cached_dealloc_ctxt dealloc;
1445 struct address_space *mapping = inode->i_mapping; 1680 struct address_space *mapping = inode->i_mapping;
1446 struct ocfs2_extent_tree et; 1681 struct ocfs2_extent_tree et;
1682 struct ocfs2_path *path = NULL;
1683 struct ocfs2_extent_list *el = NULL;
1684 struct ocfs2_extent_rec *rec = NULL;
1685 struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
1686 u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc);
1447 1687
1448 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); 1688 ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
1449 ocfs2_init_dealloc_ctxt(&dealloc); 1689 ocfs2_init_dealloc_ctxt(&dealloc);
@@ -1469,17 +1709,35 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1469 goto out; 1709 goto out;
1470 } 1710 }
1471 1711
1712 /*
1713 * For reflinks, we may need to CoW 2 clusters which might be
1714 * partially zero'd later, if hole's start and end offset were
1715 * within one cluster(means is not exactly aligned to clustersize).
1716 */
1717
1718 if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
1719
1720 ret = ocfs2_cow_file_pos(inode, di_bh, byte_start);
1721 if (ret) {
1722 mlog_errno(ret);
1723 goto out;
1724 }
1725
1726 ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len);
1727 if (ret) {
1728 mlog_errno(ret);
1729 goto out;
1730 }
1731 }
1732
1472 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); 1733 trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start);
1473 trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; 1734 trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits;
1474 if (trunc_len >= trunc_start) 1735 cluster_in_el = trunc_end;
1475 trunc_len -= trunc_start;
1476 else
1477 trunc_len = 0;
1478 1736
1479 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", 1737 mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n",
1480 (unsigned long long)OCFS2_I(inode)->ip_blkno, 1738 (unsigned long long)OCFS2_I(inode)->ip_blkno,
1481 (unsigned long long)byte_start, 1739 (unsigned long long)byte_start,
1482 (unsigned long long)byte_len, trunc_start, trunc_len); 1740 (unsigned long long)byte_len, trunc_start, trunc_end);
1483 1741
1484 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); 1742 ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len);
1485 if (ret) { 1743 if (ret) {
@@ -1487,31 +1745,79 @@ static int ocfs2_remove_inode_range(struct inode *inode,
1487 goto out; 1745 goto out;
1488 } 1746 }
1489 1747
1490 cpos = trunc_start; 1748 path = ocfs2_new_path_from_et(&et);
1491 while (trunc_len) { 1749 if (!path) {
1492 ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, 1750 ret = -ENOMEM;
1493 &alloc_size, NULL); 1751 mlog_errno(ret);
1752 goto out;
1753 }
1754
1755 while (trunc_end > trunc_start) {
1756
1757 ret = ocfs2_find_path(INODE_CACHE(inode), path,
1758 cluster_in_el);
1494 if (ret) { 1759 if (ret) {
1495 mlog_errno(ret); 1760 mlog_errno(ret);
1496 goto out; 1761 goto out;
1497 } 1762 }
1498 1763
1499 if (alloc_size > trunc_len) 1764 el = path_leaf_el(path);
1500 alloc_size = trunc_len; 1765
1766 i = ocfs2_find_rec(el, trunc_end);
1767 /*
1768 * Need to go to previous extent block.
1769 */
1770 if (i < 0) {
1771 if (path->p_tree_depth == 0)
1772 break;
1501 1773
1502 /* Only do work for non-holes */ 1774 ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb,
1503 if (phys_cpos != 0) { 1775 path,
1504 ret = ocfs2_remove_btree_range(inode, &et, cpos, 1776 &cluster_in_el);
1505 phys_cpos, alloc_size,
1506 &dealloc);
1507 if (ret) { 1777 if (ret) {
1508 mlog_errno(ret); 1778 mlog_errno(ret);
1509 goto out; 1779 goto out;
1510 } 1780 }
1781
1782 /*
1783 * We've reached the leftmost extent block,
1784 * it's safe to leave.
1785 */
1786 if (cluster_in_el == 0)
1787 break;
1788
1789 /*
1790 * The 'pos' searched for previous extent block is
1791 * always one cluster less than actual trunc_end.
1792 */
1793 trunc_end = cluster_in_el + 1;
1794
1795 ocfs2_reinit_path(path, 1);
1796
1797 continue;
1798
1799 } else
1800 rec = &el->l_recs[i];
1801
1802 ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos,
1803 &trunc_len, &trunc_end, &blkno, &done);
1804 if (done)
1805 break;
1806
1807 flags = rec->e_flags;
1808 phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
1809
1810 ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
1811 phys_cpos, trunc_len, flags,
1812 &dealloc, refcount_loc);
1813 if (ret < 0) {
1814 mlog_errno(ret);
1815 goto out;
1511 } 1816 }
1512 1817
1513 cpos += alloc_size; 1818 cluster_in_el = trunc_end;
1514 trunc_len -= alloc_size; 1819
1820 ocfs2_reinit_path(path, 1);
1515 } 1821 }
1516 1822
1517 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); 1823 ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
@@ -2001,9 +2307,13 @@ relock:
2001 * direct write may have instantiated a few 2307 * direct write may have instantiated a few
2002 * blocks outside i_size. Trim these off again. 2308 * blocks outside i_size. Trim these off again.
2003 * Don't need i_size_read because we hold i_mutex. 2309 * Don't need i_size_read because we hold i_mutex.
2310 *
2311 * XXX(truncate): this looks buggy because ocfs2 did not
2312 * actually implement ->truncate. Take a look at
2313 * the new truncate sequence and update this accordingly
2004 */ 2314 */
2005 if (*ppos + count > inode->i_size) 2315 if (*ppos + count > inode->i_size)
2006 vmtruncate(inode, inode->i_size); 2316 truncate_setsize(inode, inode->i_size);
2007 ret = written; 2317 ret = written;
2008 goto out_dio; 2318 goto out_dio;
2009 } 2319 }