diff options
Diffstat (limited to 'fs/ocfs2/file.c')
-rw-r--r-- | fs/ocfs2/file.c | 576 |
1 files changed, 443 insertions, 133 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index a5fbd9cea968..81296b4e3646 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -175,13 +175,12 @@ static int ocfs2_dir_release(struct inode *inode, struct file *file) | |||
175 | return 0; | 175 | return 0; |
176 | } | 176 | } |
177 | 177 | ||
178 | static int ocfs2_sync_file(struct file *file, | 178 | static int ocfs2_sync_file(struct file *file, int datasync) |
179 | struct dentry *dentry, | ||
180 | int datasync) | ||
181 | { | 179 | { |
182 | int err = 0; | 180 | int err = 0; |
183 | journal_t *journal; | 181 | journal_t *journal; |
184 | struct inode *inode = dentry->d_inode; | 182 | struct dentry *dentry = file->f_path.dentry; |
183 | struct inode *inode = file->f_mapping->host; | ||
185 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 184 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
186 | 185 | ||
187 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, | 186 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, |
@@ -278,10 +277,7 @@ int ocfs2_update_inode_atime(struct inode *inode, | |||
278 | inode->i_atime = CURRENT_TIME; | 277 | inode->i_atime = CURRENT_TIME; |
279 | di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); | 278 | di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); |
280 | di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); | 279 | di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); |
281 | 280 | ocfs2_journal_dirty(handle, bh); | |
282 | ret = ocfs2_journal_dirty(handle, bh); | ||
283 | if (ret < 0) | ||
284 | mlog_errno(ret); | ||
285 | 281 | ||
286 | out_commit: | 282 | out_commit: |
287 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 283 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
@@ -430,9 +426,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
430 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); | 426 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); |
431 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 427 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
432 | 428 | ||
433 | status = ocfs2_journal_dirty(handle, fe_bh); | 429 | ocfs2_journal_dirty(handle, fe_bh); |
434 | if (status < 0) | ||
435 | mlog_errno(status); | ||
436 | 430 | ||
437 | out_commit: | 431 | out_commit: |
438 | ocfs2_commit_trans(osb, handle); | 432 | ocfs2_commit_trans(osb, handle); |
@@ -449,7 +443,6 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
449 | int status = 0; | 443 | int status = 0; |
450 | struct ocfs2_dinode *fe = NULL; | 444 | struct ocfs2_dinode *fe = NULL; |
451 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 445 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
452 | struct ocfs2_truncate_context *tc = NULL; | ||
453 | 446 | ||
454 | mlog_entry("(inode = %llu, new_i_size = %llu\n", | 447 | mlog_entry("(inode = %llu, new_i_size = %llu\n", |
455 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 448 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
@@ -488,6 +481,9 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
488 | 481 | ||
489 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 482 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
490 | 483 | ||
484 | ocfs2_resv_discard(&osb->osb_la_resmap, | ||
485 | &OCFS2_I(inode)->ip_la_data_resv); | ||
486 | |||
491 | /* | 487 | /* |
492 | * The inode lock forced other nodes to sync and drop their | 488 | * The inode lock forced other nodes to sync and drop their |
493 | * pages, which (correctly) happens even if we have a truncate | 489 | * pages, which (correctly) happens even if we have a truncate |
@@ -517,13 +513,7 @@ static int ocfs2_truncate_file(struct inode *inode, | |||
517 | goto bail_unlock_sem; | 513 | goto bail_unlock_sem; |
518 | } | 514 | } |
519 | 515 | ||
520 | status = ocfs2_prepare_truncate(osb, inode, di_bh, &tc); | 516 | status = ocfs2_commit_truncate(osb, inode, di_bh); |
521 | if (status < 0) { | ||
522 | mlog_errno(status); | ||
523 | goto bail_unlock_sem; | ||
524 | } | ||
525 | |||
526 | status = ocfs2_commit_truncate(osb, inode, di_bh, tc); | ||
527 | if (status < 0) { | 517 | if (status < 0) { |
528 | mlog_errno(status); | 518 | mlog_errno(status); |
529 | goto bail_unlock_sem; | 519 | goto bail_unlock_sem; |
@@ -666,11 +656,7 @@ restarted_transaction: | |||
666 | goto leave; | 656 | goto leave; |
667 | } | 657 | } |
668 | 658 | ||
669 | status = ocfs2_journal_dirty(handle, bh); | 659 | ocfs2_journal_dirty(handle, bh); |
670 | if (status < 0) { | ||
671 | mlog_errno(status); | ||
672 | goto leave; | ||
673 | } | ||
674 | 660 | ||
675 | spin_lock(&OCFS2_I(inode)->ip_lock); | 661 | spin_lock(&OCFS2_I(inode)->ip_lock); |
676 | clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); | 662 | clusters_to_add -= (OCFS2_I(inode)->ip_clusters - prev_clusters); |
@@ -738,28 +724,55 @@ leave: | |||
738 | return status; | 724 | return status; |
739 | } | 725 | } |
740 | 726 | ||
727 | /* | ||
728 | * While a write will already be ordering the data, a truncate will not. | ||
729 | * Thus, we need to explicitly order the zeroed pages. | ||
730 | */ | ||
731 | static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode) | ||
732 | { | ||
733 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
734 | handle_t *handle = NULL; | ||
735 | int ret = 0; | ||
736 | |||
737 | if (!ocfs2_should_order_data(inode)) | ||
738 | goto out; | ||
739 | |||
740 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | ||
741 | if (IS_ERR(handle)) { | ||
742 | ret = -ENOMEM; | ||
743 | mlog_errno(ret); | ||
744 | goto out; | ||
745 | } | ||
746 | |||
747 | ret = ocfs2_jbd2_file_inode(handle, inode); | ||
748 | if (ret < 0) | ||
749 | mlog_errno(ret); | ||
750 | |||
751 | out: | ||
752 | if (ret) { | ||
753 | if (!IS_ERR(handle)) | ||
754 | ocfs2_commit_trans(osb, handle); | ||
755 | handle = ERR_PTR(ret); | ||
756 | } | ||
757 | return handle; | ||
758 | } | ||
759 | |||
741 | /* Some parts of this taken from generic_cont_expand, which turned out | 760 | /* Some parts of this taken from generic_cont_expand, which turned out |
742 | * to be too fragile to do exactly what we need without us having to | 761 | * to be too fragile to do exactly what we need without us having to |
743 | * worry about recursive locking in ->write_begin() and ->write_end(). */ | 762 | * worry about recursive locking in ->write_begin() and ->write_end(). */ |
744 | static int ocfs2_write_zero_page(struct inode *inode, | 763 | static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, |
745 | u64 size) | 764 | u64 abs_to) |
746 | { | 765 | { |
747 | struct address_space *mapping = inode->i_mapping; | 766 | struct address_space *mapping = inode->i_mapping; |
748 | struct page *page; | 767 | struct page *page; |
749 | unsigned long index; | 768 | unsigned long index = abs_from >> PAGE_CACHE_SHIFT; |
750 | unsigned int offset; | ||
751 | handle_t *handle = NULL; | 769 | handle_t *handle = NULL; |
752 | int ret; | 770 | int ret = 0; |
771 | unsigned zero_from, zero_to, block_start, block_end; | ||
753 | 772 | ||
754 | offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */ | 773 | BUG_ON(abs_from >= abs_to); |
755 | /* ugh. in prepare/commit_write, if from==to==start of block, we | 774 | BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT)); |
756 | ** skip the prepare. make sure we never send an offset for the start | 775 | BUG_ON(abs_from & (inode->i_blkbits - 1)); |
757 | ** of a block | ||
758 | */ | ||
759 | if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) { | ||
760 | offset++; | ||
761 | } | ||
762 | index = size >> PAGE_CACHE_SHIFT; | ||
763 | 776 | ||
764 | page = grab_cache_page(mapping, index); | 777 | page = grab_cache_page(mapping, index); |
765 | if (!page) { | 778 | if (!page) { |
@@ -768,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode, | |||
768 | goto out; | 781 | goto out; |
769 | } | 782 | } |
770 | 783 | ||
771 | ret = ocfs2_prepare_write_nolock(inode, page, offset, offset); | 784 | /* Get the offsets within the page that we want to zero */ |
772 | if (ret < 0) { | 785 | zero_from = abs_from & (PAGE_CACHE_SIZE - 1); |
773 | mlog_errno(ret); | 786 | zero_to = abs_to & (PAGE_CACHE_SIZE - 1); |
774 | goto out_unlock; | 787 | if (!zero_to) |
775 | } | 788 | zero_to = PAGE_CACHE_SIZE; |
776 | 789 | ||
777 | if (ocfs2_should_order_data(inode)) { | 790 | mlog(0, |
778 | handle = ocfs2_start_walk_page_trans(inode, page, offset, | 791 | "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n", |
779 | offset); | 792 | (unsigned long long)abs_from, (unsigned long long)abs_to, |
780 | if (IS_ERR(handle)) { | 793 | index, zero_from, zero_to); |
781 | ret = PTR_ERR(handle); | 794 | |
782 | handle = NULL; | 795 | /* We know that zero_from is block aligned */ |
796 | for (block_start = zero_from; block_start < zero_to; | ||
797 | block_start = block_end) { | ||
798 | block_end = block_start + (1 << inode->i_blkbits); | ||
799 | |||
800 | /* | ||
801 | * block_start is block-aligned. Bump it by one to | ||
802 | * force ocfs2_{prepare,commit}_write() to zero the | ||
803 | * whole block. | ||
804 | */ | ||
805 | ret = ocfs2_prepare_write_nolock(inode, page, | ||
806 | block_start + 1, | ||
807 | block_start + 1); | ||
808 | if (ret < 0) { | ||
809 | mlog_errno(ret); | ||
783 | goto out_unlock; | 810 | goto out_unlock; |
784 | } | 811 | } |
785 | } | ||
786 | 812 | ||
787 | /* must not update i_size! */ | 813 | if (!handle) { |
788 | ret = block_commit_write(page, offset, offset); | 814 | handle = ocfs2_zero_start_ordered_transaction(inode); |
789 | if (ret < 0) | 815 | if (IS_ERR(handle)) { |
790 | mlog_errno(ret); | 816 | ret = PTR_ERR(handle); |
791 | else | 817 | handle = NULL; |
792 | ret = 0; | 818 | break; |
819 | } | ||
820 | } | ||
821 | |||
822 | /* must not update i_size! */ | ||
823 | ret = block_commit_write(page, block_start + 1, | ||
824 | block_start + 1); | ||
825 | if (ret < 0) | ||
826 | mlog_errno(ret); | ||
827 | else | ||
828 | ret = 0; | ||
829 | } | ||
793 | 830 | ||
794 | if (handle) | 831 | if (handle) |
795 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 832 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
833 | |||
796 | out_unlock: | 834 | out_unlock: |
797 | unlock_page(page); | 835 | unlock_page(page); |
798 | page_cache_release(page); | 836 | page_cache_release(page); |
@@ -800,22 +838,114 @@ out: | |||
800 | return ret; | 838 | return ret; |
801 | } | 839 | } |
802 | 840 | ||
803 | static int ocfs2_zero_extend(struct inode *inode, | 841 | /* |
804 | u64 zero_to_size) | 842 | * Find the next range to zero. We do this in terms of bytes because |
843 | * that's what ocfs2_zero_extend() wants, and it is dealing with the | ||
844 | * pagecache. We may return multiple extents. | ||
845 | * | ||
846 | * zero_start and zero_end are ocfs2_zero_extend()s current idea of what | ||
847 | * needs to be zeroed. range_start and range_end return the next zeroing | ||
848 | * range. A subsequent call should pass the previous range_end as its | ||
849 | * zero_start. If range_end is 0, there's nothing to do. | ||
850 | * | ||
851 | * Unwritten extents are skipped over. Refcounted extents are CoWd. | ||
852 | */ | ||
853 | static int ocfs2_zero_extend_get_range(struct inode *inode, | ||
854 | struct buffer_head *di_bh, | ||
855 | u64 zero_start, u64 zero_end, | ||
856 | u64 *range_start, u64 *range_end) | ||
805 | { | 857 | { |
806 | int ret = 0; | 858 | int rc = 0, needs_cow = 0; |
807 | u64 start_off; | 859 | u32 p_cpos, zero_clusters = 0; |
808 | struct super_block *sb = inode->i_sb; | 860 | u32 zero_cpos = |
861 | zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
862 | u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end); | ||
863 | unsigned int num_clusters = 0; | ||
864 | unsigned int ext_flags = 0; | ||
809 | 865 | ||
810 | start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); | 866 | while (zero_cpos < last_cpos) { |
811 | while (start_off < zero_to_size) { | 867 | rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos, |
812 | ret = ocfs2_write_zero_page(inode, start_off); | 868 | &num_clusters, &ext_flags); |
813 | if (ret < 0) { | 869 | if (rc) { |
814 | mlog_errno(ret); | 870 | mlog_errno(rc); |
815 | goto out; | 871 | goto out; |
816 | } | 872 | } |
817 | 873 | ||
818 | start_off += sb->s_blocksize; | 874 | if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) { |
875 | zero_clusters = num_clusters; | ||
876 | if (ext_flags & OCFS2_EXT_REFCOUNTED) | ||
877 | needs_cow = 1; | ||
878 | break; | ||
879 | } | ||
880 | |||
881 | zero_cpos += num_clusters; | ||
882 | } | ||
883 | if (!zero_clusters) { | ||
884 | *range_end = 0; | ||
885 | goto out; | ||
886 | } | ||
887 | |||
888 | while ((zero_cpos + zero_clusters) < last_cpos) { | ||
889 | rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters, | ||
890 | &p_cpos, &num_clusters, | ||
891 | &ext_flags); | ||
892 | if (rc) { | ||
893 | mlog_errno(rc); | ||
894 | goto out; | ||
895 | } | ||
896 | |||
897 | if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN)) | ||
898 | break; | ||
899 | if (ext_flags & OCFS2_EXT_REFCOUNTED) | ||
900 | needs_cow = 1; | ||
901 | zero_clusters += num_clusters; | ||
902 | } | ||
903 | if ((zero_cpos + zero_clusters) > last_cpos) | ||
904 | zero_clusters = last_cpos - zero_cpos; | ||
905 | |||
906 | if (needs_cow) { | ||
907 | rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, | ||
908 | UINT_MAX); | ||
909 | if (rc) { | ||
910 | mlog_errno(rc); | ||
911 | goto out; | ||
912 | } | ||
913 | } | ||
914 | |||
915 | *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos); | ||
916 | *range_end = ocfs2_clusters_to_bytes(inode->i_sb, | ||
917 | zero_cpos + zero_clusters); | ||
918 | |||
919 | out: | ||
920 | return rc; | ||
921 | } | ||
922 | |||
923 | /* | ||
924 | * Zero one range returned from ocfs2_zero_extend_get_range(). The caller | ||
925 | * has made sure that the entire range needs zeroing. | ||
926 | */ | ||
927 | static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start, | ||
928 | u64 range_end) | ||
929 | { | ||
930 | int rc = 0; | ||
931 | u64 next_pos; | ||
932 | u64 zero_pos = range_start; | ||
933 | |||
934 | mlog(0, "range_start = %llu, range_end = %llu\n", | ||
935 | (unsigned long long)range_start, | ||
936 | (unsigned long long)range_end); | ||
937 | BUG_ON(range_start >= range_end); | ||
938 | |||
939 | while (zero_pos < range_end) { | ||
940 | next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE; | ||
941 | if (next_pos > range_end) | ||
942 | next_pos = range_end; | ||
943 | rc = ocfs2_write_zero_page(inode, zero_pos, next_pos); | ||
944 | if (rc < 0) { | ||
945 | mlog_errno(rc); | ||
946 | break; | ||
947 | } | ||
948 | zero_pos = next_pos; | ||
819 | 949 | ||
820 | /* | 950 | /* |
821 | * Very large extends have the potential to lock up | 951 | * Very large extends have the potential to lock up |
@@ -824,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode, | |||
824 | cond_resched(); | 954 | cond_resched(); |
825 | } | 955 | } |
826 | 956 | ||
827 | out: | 957 | return rc; |
958 | } | ||
959 | |||
960 | int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, | ||
961 | loff_t zero_to_size) | ||
962 | { | ||
963 | int ret = 0; | ||
964 | u64 zero_start, range_start = 0, range_end = 0; | ||
965 | struct super_block *sb = inode->i_sb; | ||
966 | |||
967 | zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode)); | ||
968 | mlog(0, "zero_start %llu for i_size %llu\n", | ||
969 | (unsigned long long)zero_start, | ||
970 | (unsigned long long)i_size_read(inode)); | ||
971 | while (zero_start < zero_to_size) { | ||
972 | ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start, | ||
973 | zero_to_size, | ||
974 | &range_start, | ||
975 | &range_end); | ||
976 | if (ret) { | ||
977 | mlog_errno(ret); | ||
978 | break; | ||
979 | } | ||
980 | if (!range_end) | ||
981 | break; | ||
982 | /* Trim the ends */ | ||
983 | if (range_start < zero_start) | ||
984 | range_start = zero_start; | ||
985 | if (range_end > zero_to_size) | ||
986 | range_end = zero_to_size; | ||
987 | |||
988 | ret = ocfs2_zero_extend_range(inode, range_start, | ||
989 | range_end); | ||
990 | if (ret) { | ||
991 | mlog_errno(ret); | ||
992 | break; | ||
993 | } | ||
994 | zero_start = range_end; | ||
995 | } | ||
996 | |||
828 | return ret; | 997 | return ret; |
829 | } | 998 | } |
830 | 999 | ||
831 | int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) | 1000 | int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh, |
1001 | u64 new_i_size, u64 zero_to) | ||
832 | { | 1002 | { |
833 | int ret; | 1003 | int ret; |
834 | u32 clusters_to_add; | 1004 | u32 clusters_to_add; |
835 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1005 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
836 | 1006 | ||
1007 | /* | ||
1008 | * Only quota files call this without a bh, and they can't be | ||
1009 | * refcounted. | ||
1010 | */ | ||
1011 | BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); | ||
1012 | BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE)); | ||
1013 | |||
837 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); | 1014 | clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size); |
838 | if (clusters_to_add < oi->ip_clusters) | 1015 | if (clusters_to_add < oi->ip_clusters) |
839 | clusters_to_add = 0; | 1016 | clusters_to_add = 0; |
@@ -854,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to) | |||
854 | * still need to zero the area between the old i_size and the | 1031 | * still need to zero the area between the old i_size and the |
855 | * new i_size. | 1032 | * new i_size. |
856 | */ | 1033 | */ |
857 | ret = ocfs2_zero_extend(inode, zero_to); | 1034 | ret = ocfs2_zero_extend(inode, di_bh, zero_to); |
858 | if (ret < 0) | 1035 | if (ret < 0) |
859 | mlog_errno(ret); | 1036 | mlog_errno(ret); |
860 | 1037 | ||
@@ -876,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode, | |||
876 | goto out; | 1053 | goto out; |
877 | 1054 | ||
878 | if (i_size_read(inode) == new_i_size) | 1055 | if (i_size_read(inode) == new_i_size) |
879 | goto out; | 1056 | goto out; |
880 | BUG_ON(new_i_size < i_size_read(inode)); | 1057 | BUG_ON(new_i_size < i_size_read(inode)); |
881 | 1058 | ||
882 | /* | 1059 | /* |
883 | * Fall through for converting inline data, even if the fs | ||
884 | * supports sparse files. | ||
885 | * | ||
886 | * The check for inline data here is legal - nobody can add | ||
887 | * the feature since we have i_mutex. We must check it again | ||
888 | * after acquiring ip_alloc_sem though, as paths like mmap | ||
889 | * might have raced us to converting the inode to extents. | ||
890 | */ | ||
891 | if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) | ||
892 | && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | ||
893 | goto out_update_size; | ||
894 | |||
895 | /* | ||
896 | * The alloc sem blocks people in read/write from reading our | 1060 | * The alloc sem blocks people in read/write from reading our |
897 | * allocation until we're done changing it. We depend on | 1061 | * allocation until we're done changing it. We depend on |
898 | * i_mutex to block other extend/truncate calls while we're | 1062 | * i_mutex to block other extend/truncate calls while we're |
899 | * here. | 1063 | * here. We even have to hold it for sparse files because there |
1064 | * might be some tail zeroing. | ||
900 | */ | 1065 | */ |
901 | down_write(&oi->ip_alloc_sem); | 1066 | down_write(&oi->ip_alloc_sem); |
902 | 1067 | ||
@@ -913,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode, | |||
913 | ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); | 1078 | ret = ocfs2_convert_inline_data_to_extents(inode, di_bh); |
914 | if (ret) { | 1079 | if (ret) { |
915 | up_write(&oi->ip_alloc_sem); | 1080 | up_write(&oi->ip_alloc_sem); |
916 | |||
917 | mlog_errno(ret); | 1081 | mlog_errno(ret); |
918 | goto out; | 1082 | goto out; |
919 | } | 1083 | } |
920 | } | 1084 | } |
921 | 1085 | ||
922 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) | 1086 | if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) |
923 | ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size); | 1087 | ret = ocfs2_zero_extend(inode, di_bh, new_i_size); |
1088 | else | ||
1089 | ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size, | ||
1090 | new_i_size); | ||
924 | 1091 | ||
925 | up_write(&oi->ip_alloc_sem); | 1092 | up_write(&oi->ip_alloc_sem); |
926 | 1093 | ||
@@ -946,9 +1113,8 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
946 | struct ocfs2_super *osb = OCFS2_SB(sb); | 1113 | struct ocfs2_super *osb = OCFS2_SB(sb); |
947 | struct buffer_head *bh = NULL; | 1114 | struct buffer_head *bh = NULL; |
948 | handle_t *handle = NULL; | 1115 | handle_t *handle = NULL; |
949 | int qtype; | ||
950 | struct dquot *transfer_from[MAXQUOTAS] = { }; | ||
951 | struct dquot *transfer_to[MAXQUOTAS] = { }; | 1116 | struct dquot *transfer_to[MAXQUOTAS] = { }; |
1117 | int qtype; | ||
952 | 1118 | ||
953 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 1119 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
954 | dentry->d_name.len, dentry->d_name.name); | 1120 | dentry->d_name.len, dentry->d_name.name); |
@@ -979,10 +1145,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
979 | if (status) | 1145 | if (status) |
980 | return status; | 1146 | return status; |
981 | 1147 | ||
1148 | if (is_quota_modification(inode, attr)) | ||
1149 | dquot_initialize(inode); | ||
982 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; | 1150 | size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE; |
983 | if (size_change) { | 1151 | if (size_change) { |
984 | dquot_initialize(inode); | ||
985 | |||
986 | status = ocfs2_rw_lock(inode, 1); | 1152 | status = ocfs2_rw_lock(inode, 1); |
987 | if (status < 0) { | 1153 | if (status < 0) { |
988 | mlog_errno(status); | 1154 | mlog_errno(status); |
@@ -1032,9 +1198,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1032 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { | 1198 | OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) { |
1033 | transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, | 1199 | transfer_to[USRQUOTA] = dqget(sb, attr->ia_uid, |
1034 | USRQUOTA); | 1200 | USRQUOTA); |
1035 | transfer_from[USRQUOTA] = dqget(sb, inode->i_uid, | 1201 | if (!transfer_to[USRQUOTA]) { |
1036 | USRQUOTA); | ||
1037 | if (!transfer_to[USRQUOTA] || !transfer_from[USRQUOTA]) { | ||
1038 | status = -ESRCH; | 1202 | status = -ESRCH; |
1039 | goto bail_unlock; | 1203 | goto bail_unlock; |
1040 | } | 1204 | } |
@@ -1044,9 +1208,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1044 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { | 1208 | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) { |
1045 | transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, | 1209 | transfer_to[GRPQUOTA] = dqget(sb, attr->ia_gid, |
1046 | GRPQUOTA); | 1210 | GRPQUOTA); |
1047 | transfer_from[GRPQUOTA] = dqget(sb, inode->i_gid, | 1211 | if (!transfer_to[GRPQUOTA]) { |
1048 | GRPQUOTA); | ||
1049 | if (!transfer_to[GRPQUOTA] || !transfer_from[GRPQUOTA]) { | ||
1050 | status = -ESRCH; | 1212 | status = -ESRCH; |
1051 | goto bail_unlock; | 1213 | goto bail_unlock; |
1052 | } | 1214 | } |
@@ -1058,7 +1220,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1058 | mlog_errno(status); | 1220 | mlog_errno(status); |
1059 | goto bail_unlock; | 1221 | goto bail_unlock; |
1060 | } | 1222 | } |
1061 | status = dquot_transfer(inode, attr); | 1223 | status = __dquot_transfer(inode, transfer_to); |
1062 | if (status < 0) | 1224 | if (status < 0) |
1063 | goto bail_commit; | 1225 | goto bail_commit; |
1064 | } else { | 1226 | } else { |
@@ -1071,18 +1233,26 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1071 | } | 1233 | } |
1072 | 1234 | ||
1073 | /* | 1235 | /* |
1074 | * This will intentionally not wind up calling vmtruncate(), | 1236 | * This will intentionally not wind up calling truncate_setsize(), |
1075 | * since all the work for a size change has been done above. | 1237 | * since all the work for a size change has been done above. |
1076 | * Otherwise, we could get into problems with truncate as | 1238 | * Otherwise, we could get into problems with truncate as |
1077 | * ip_alloc_sem is used there to protect against i_size | 1239 | * ip_alloc_sem is used there to protect against i_size |
1078 | * changes. | 1240 | * changes. |
1241 | * | ||
1242 | * XXX: this means the conditional below can probably be removed. | ||
1079 | */ | 1243 | */ |
1080 | status = inode_setattr(inode, attr); | 1244 | if ((attr->ia_valid & ATTR_SIZE) && |
1081 | if (status < 0) { | 1245 | attr->ia_size != i_size_read(inode)) { |
1082 | mlog_errno(status); | 1246 | status = vmtruncate(inode, attr->ia_size); |
1083 | goto bail_commit; | 1247 | if (status) { |
1248 | mlog_errno(status); | ||
1249 | goto bail_commit; | ||
1250 | } | ||
1084 | } | 1251 | } |
1085 | 1252 | ||
1253 | setattr_copy(inode, attr); | ||
1254 | mark_inode_dirty(inode); | ||
1255 | |||
1086 | status = ocfs2_mark_inode_dirty(handle, inode, bh); | 1256 | status = ocfs2_mark_inode_dirty(handle, inode, bh); |
1087 | if (status < 0) | 1257 | if (status < 0) |
1088 | mlog_errno(status); | 1258 | mlog_errno(status); |
@@ -1098,10 +1268,8 @@ bail: | |||
1098 | brelse(bh); | 1268 | brelse(bh); |
1099 | 1269 | ||
1100 | /* Release quota pointers in case we acquired them */ | 1270 | /* Release quota pointers in case we acquired them */ |
1101 | for (qtype = 0; qtype < MAXQUOTAS; qtype++) { | 1271 | for (qtype = 0; qtype < MAXQUOTAS; qtype++) |
1102 | dqput(transfer_to[qtype]); | 1272 | dqput(transfer_to[qtype]); |
1103 | dqput(transfer_from[qtype]); | ||
1104 | } | ||
1105 | 1273 | ||
1106 | if (!status && attr->ia_valid & ATTR_MODE) { | 1274 | if (!status && attr->ia_valid & ATTR_MODE) { |
1107 | status = ocfs2_acl_chmod(inode); | 1275 | status = ocfs2_acl_chmod(inode); |
@@ -1195,9 +1363,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode, | |||
1195 | di = (struct ocfs2_dinode *) bh->b_data; | 1363 | di = (struct ocfs2_dinode *) bh->b_data; |
1196 | di->i_mode = cpu_to_le16(inode->i_mode); | 1364 | di->i_mode = cpu_to_le16(inode->i_mode); |
1197 | 1365 | ||
1198 | ret = ocfs2_journal_dirty(handle, bh); | 1366 | ocfs2_journal_dirty(handle, bh); |
1199 | if (ret < 0) | ||
1200 | mlog_errno(ret); | ||
1201 | 1367 | ||
1202 | out_trans: | 1368 | out_trans: |
1203 | ocfs2_commit_trans(osb, handle); | 1369 | ocfs2_commit_trans(osb, handle); |
@@ -1434,16 +1600,90 @@ out: | |||
1434 | return ret; | 1600 | return ret; |
1435 | } | 1601 | } |
1436 | 1602 | ||
1603 | static int ocfs2_find_rec(struct ocfs2_extent_list *el, u32 pos) | ||
1604 | { | ||
1605 | int i; | ||
1606 | struct ocfs2_extent_rec *rec = NULL; | ||
1607 | |||
1608 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | ||
1609 | |||
1610 | rec = &el->l_recs[i]; | ||
1611 | |||
1612 | if (le32_to_cpu(rec->e_cpos) < pos) | ||
1613 | break; | ||
1614 | } | ||
1615 | |||
1616 | return i; | ||
1617 | } | ||
1618 | |||
1619 | /* | ||
1620 | * Helper to calculate the punching pos and length in one run, we handle the | ||
1621 | * following three cases in order: | ||
1622 | * | ||
1623 | * - remove the entire record | ||
1624 | * - remove a partial record | ||
1625 | * - no record needs to be removed (hole-punching completed) | ||
1626 | */ | ||
1627 | static void ocfs2_calc_trunc_pos(struct inode *inode, | ||
1628 | struct ocfs2_extent_list *el, | ||
1629 | struct ocfs2_extent_rec *rec, | ||
1630 | u32 trunc_start, u32 *trunc_cpos, | ||
1631 | u32 *trunc_len, u32 *trunc_end, | ||
1632 | u64 *blkno, int *done) | ||
1633 | { | ||
1634 | int ret = 0; | ||
1635 | u32 coff, range; | ||
1636 | |||
1637 | range = le32_to_cpu(rec->e_cpos) + ocfs2_rec_clusters(el, rec); | ||
1638 | |||
1639 | if (le32_to_cpu(rec->e_cpos) >= trunc_start) { | ||
1640 | *trunc_cpos = le32_to_cpu(rec->e_cpos); | ||
1641 | /* | ||
1642 | * Skip holes if any. | ||
1643 | */ | ||
1644 | if (range < *trunc_end) | ||
1645 | *trunc_end = range; | ||
1646 | *trunc_len = *trunc_end - le32_to_cpu(rec->e_cpos); | ||
1647 | *blkno = le64_to_cpu(rec->e_blkno); | ||
1648 | *trunc_end = le32_to_cpu(rec->e_cpos); | ||
1649 | } else if (range > trunc_start) { | ||
1650 | *trunc_cpos = trunc_start; | ||
1651 | *trunc_len = *trunc_end - trunc_start; | ||
1652 | coff = trunc_start - le32_to_cpu(rec->e_cpos); | ||
1653 | *blkno = le64_to_cpu(rec->e_blkno) + | ||
1654 | ocfs2_clusters_to_blocks(inode->i_sb, coff); | ||
1655 | *trunc_end = trunc_start; | ||
1656 | } else { | ||
1657 | /* | ||
1658 | * It may have two following possibilities: | ||
1659 | * | ||
1660 | * - last record has been removed | ||
1661 | * - trunc_start was within a hole | ||
1662 | * | ||
1663 | * both two cases mean the completion of hole punching. | ||
1664 | */ | ||
1665 | ret = 1; | ||
1666 | } | ||
1667 | |||
1668 | *done = ret; | ||
1669 | } | ||
1670 | |||
1437 | static int ocfs2_remove_inode_range(struct inode *inode, | 1671 | static int ocfs2_remove_inode_range(struct inode *inode, |
1438 | struct buffer_head *di_bh, u64 byte_start, | 1672 | struct buffer_head *di_bh, u64 byte_start, |
1439 | u64 byte_len) | 1673 | u64 byte_len) |
1440 | { | 1674 | { |
1441 | int ret = 0; | 1675 | int ret = 0, flags = 0, done = 0, i; |
1442 | u32 trunc_start, trunc_len, cpos, phys_cpos, alloc_size; | 1676 | u32 trunc_start, trunc_len, trunc_end, trunc_cpos, phys_cpos; |
1677 | u32 cluster_in_el; | ||
1443 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1678 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
1444 | struct ocfs2_cached_dealloc_ctxt dealloc; | 1679 | struct ocfs2_cached_dealloc_ctxt dealloc; |
1445 | struct address_space *mapping = inode->i_mapping; | 1680 | struct address_space *mapping = inode->i_mapping; |
1446 | struct ocfs2_extent_tree et; | 1681 | struct ocfs2_extent_tree et; |
1682 | struct ocfs2_path *path = NULL; | ||
1683 | struct ocfs2_extent_list *el = NULL; | ||
1684 | struct ocfs2_extent_rec *rec = NULL; | ||
1685 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
1686 | u64 blkno, refcount_loc = le64_to_cpu(di->i_refcount_loc); | ||
1447 | 1687 | ||
1448 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); | 1688 | ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); |
1449 | ocfs2_init_dealloc_ctxt(&dealloc); | 1689 | ocfs2_init_dealloc_ctxt(&dealloc); |
@@ -1469,17 +1709,35 @@ static int ocfs2_remove_inode_range(struct inode *inode, | |||
1469 | goto out; | 1709 | goto out; |
1470 | } | 1710 | } |
1471 | 1711 | ||
1712 | /* | ||
1713 | * For reflinks, we may need to CoW 2 clusters which might be | ||
1714 | * partially zero'd later, if hole's start and end offset were | ||
1715 | * within one cluster(means is not exactly aligned to clustersize). | ||
1716 | */ | ||
1717 | |||
1718 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) { | ||
1719 | |||
1720 | ret = ocfs2_cow_file_pos(inode, di_bh, byte_start); | ||
1721 | if (ret) { | ||
1722 | mlog_errno(ret); | ||
1723 | goto out; | ||
1724 | } | ||
1725 | |||
1726 | ret = ocfs2_cow_file_pos(inode, di_bh, byte_start + byte_len); | ||
1727 | if (ret) { | ||
1728 | mlog_errno(ret); | ||
1729 | goto out; | ||
1730 | } | ||
1731 | } | ||
1732 | |||
1472 | trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); | 1733 | trunc_start = ocfs2_clusters_for_bytes(osb->sb, byte_start); |
1473 | trunc_len = (byte_start + byte_len) >> osb->s_clustersize_bits; | 1734 | trunc_end = (byte_start + byte_len) >> osb->s_clustersize_bits; |
1474 | if (trunc_len >= trunc_start) | 1735 | cluster_in_el = trunc_end; |
1475 | trunc_len -= trunc_start; | ||
1476 | else | ||
1477 | trunc_len = 0; | ||
1478 | 1736 | ||
1479 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, clen: %u\n", | 1737 | mlog(0, "Inode: %llu, start: %llu, len: %llu, cstart: %u, cend: %u\n", |
1480 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 1738 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
1481 | (unsigned long long)byte_start, | 1739 | (unsigned long long)byte_start, |
1482 | (unsigned long long)byte_len, trunc_start, trunc_len); | 1740 | (unsigned long long)byte_len, trunc_start, trunc_end); |
1483 | 1741 | ||
1484 | ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); | 1742 | ret = ocfs2_zero_partial_clusters(inode, byte_start, byte_len); |
1485 | if (ret) { | 1743 | if (ret) { |
@@ -1487,31 +1745,79 @@ static int ocfs2_remove_inode_range(struct inode *inode, | |||
1487 | goto out; | 1745 | goto out; |
1488 | } | 1746 | } |
1489 | 1747 | ||
1490 | cpos = trunc_start; | 1748 | path = ocfs2_new_path_from_et(&et); |
1491 | while (trunc_len) { | 1749 | if (!path) { |
1492 | ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, | 1750 | ret = -ENOMEM; |
1493 | &alloc_size, NULL); | 1751 | mlog_errno(ret); |
1752 | goto out; | ||
1753 | } | ||
1754 | |||
1755 | while (trunc_end > trunc_start) { | ||
1756 | |||
1757 | ret = ocfs2_find_path(INODE_CACHE(inode), path, | ||
1758 | cluster_in_el); | ||
1494 | if (ret) { | 1759 | if (ret) { |
1495 | mlog_errno(ret); | 1760 | mlog_errno(ret); |
1496 | goto out; | 1761 | goto out; |
1497 | } | 1762 | } |
1498 | 1763 | ||
1499 | if (alloc_size > trunc_len) | 1764 | el = path_leaf_el(path); |
1500 | alloc_size = trunc_len; | 1765 | |
1766 | i = ocfs2_find_rec(el, trunc_end); | ||
1767 | /* | ||
1768 | * Need to go to previous extent block. | ||
1769 | */ | ||
1770 | if (i < 0) { | ||
1771 | if (path->p_tree_depth == 0) | ||
1772 | break; | ||
1501 | 1773 | ||
1502 | /* Only do work for non-holes */ | 1774 | ret = ocfs2_find_cpos_for_left_leaf(inode->i_sb, |
1503 | if (phys_cpos != 0) { | 1775 | path, |
1504 | ret = ocfs2_remove_btree_range(inode, &et, cpos, | 1776 | &cluster_in_el); |
1505 | phys_cpos, alloc_size, | ||
1506 | &dealloc); | ||
1507 | if (ret) { | 1777 | if (ret) { |
1508 | mlog_errno(ret); | 1778 | mlog_errno(ret); |
1509 | goto out; | 1779 | goto out; |
1510 | } | 1780 | } |
1781 | |||
1782 | /* | ||
1783 | * We've reached the leftmost extent block, | ||
1784 | * it's safe to leave. | ||
1785 | */ | ||
1786 | if (cluster_in_el == 0) | ||
1787 | break; | ||
1788 | |||
1789 | /* | ||
1790 | * The 'pos' searched for previous extent block is | ||
1791 | * always one cluster less than actual trunc_end. | ||
1792 | */ | ||
1793 | trunc_end = cluster_in_el + 1; | ||
1794 | |||
1795 | ocfs2_reinit_path(path, 1); | ||
1796 | |||
1797 | continue; | ||
1798 | |||
1799 | } else | ||
1800 | rec = &el->l_recs[i]; | ||
1801 | |||
1802 | ocfs2_calc_trunc_pos(inode, el, rec, trunc_start, &trunc_cpos, | ||
1803 | &trunc_len, &trunc_end, &blkno, &done); | ||
1804 | if (done) | ||
1805 | break; | ||
1806 | |||
1807 | flags = rec->e_flags; | ||
1808 | phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); | ||
1809 | |||
1810 | ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos, | ||
1811 | phys_cpos, trunc_len, flags, | ||
1812 | &dealloc, refcount_loc); | ||
1813 | if (ret < 0) { | ||
1814 | mlog_errno(ret); | ||
1815 | goto out; | ||
1511 | } | 1816 | } |
1512 | 1817 | ||
1513 | cpos += alloc_size; | 1818 | cluster_in_el = trunc_end; |
1514 | trunc_len -= alloc_size; | 1819 | |
1820 | ocfs2_reinit_path(path, 1); | ||
1515 | } | 1821 | } |
1516 | 1822 | ||
1517 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); | 1823 | ocfs2_truncate_cluster_pages(inode, byte_start, byte_len); |
@@ -2001,9 +2307,13 @@ relock: | |||
2001 | * direct write may have instantiated a few | 2307 | * direct write may have instantiated a few |
2002 | * blocks outside i_size. Trim these off again. | 2308 | * blocks outside i_size. Trim these off again. |
2003 | * Don't need i_size_read because we hold i_mutex. | 2309 | * Don't need i_size_read because we hold i_mutex. |
2310 | * | ||
2311 | * XXX(truncate): this looks buggy because ocfs2 did not | ||
2312 | * actually implement ->truncate. Take a look at | ||
2313 | * the new truncate sequence and update this accordingly | ||
2004 | */ | 2314 | */ |
2005 | if (*ppos + count > inode->i_size) | 2315 | if (*ppos + count > inode->i_size) |
2006 | vmtruncate(inode, inode->i_size); | 2316 | truncate_setsize(inode, inode->i_size); |
2007 | ret = written; | 2317 | ret = written; |
2008 | goto out_dio; | 2318 | goto out_dio; |
2009 | } | 2319 | } |