1 files changed, 245 insertions, 64 deletions
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c447..2b10b36d1577 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
        return status;
 }
+/*
+ * While a write will already be ordering the data, a truncate will not.
+ * Thus, we need to explicitly order the zeroed pages.
+ */
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+{
+        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+        handle_t *handle = NULL;
+        int ret = 0;
+        if (!ocfs2_should_order_data(inode))
+                goto out;
+        handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+        if (IS_ERR(handle)) {
+                ret = -ENOMEM;
+                mlog_errno(ret);
+                goto out;
+        }
+        ret = ocfs2_jbd2_file_inode(handle, inode);
+        if (ret < 0)
+                mlog_errno(ret);
+out:
+        if (ret) {
+                if (!IS_ERR(handle))
+                        ocfs2_commit_trans(osb, handle);
+                handle = ERR_PTR(ret);
+        }
+        return handle;
+}
 /* Some parts of this taken from generic_cont_expand, which turned out
 * to be too fragile to do exactly what we need without us having to
 * worry about recursive locking in ->write_begin() and ->write_end(). */
-static int ocfs2_write_zero_page(struct inode *inode,
+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
-                                 u64 size)
+                                 u64 abs_to)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
-        unsigned long index;
+        unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
-        unsigned int offset;
        handle_t *handle = NULL;
-        int ret;
+        int ret = 0;
+        unsigned zero_from, zero_to, block_start, block_end;
-        offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
+        BUG_ON(abs_from >= abs_to);
-        /* ugh.  in prepare/commit_write, if from==to==start of block, we
+        BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
-        ** skip the prepare.  make sure we never send an offset for the start
+        BUG_ON(abs_from & (inode->i_blkbits - 1));
-        ** of a block
-        */
-        if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-                offset++;
-        }
-        index = size >> PAGE_CACHE_SHIFT;
        page = grab_cache_page(mapping, index);
        if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
                goto out;
        }
-        ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
+        /* Get the offsets within the page that we want to zero */
-        if (ret < 0) {
+        zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
-                mlog_errno(ret);
+        zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
-                goto out_unlock;
+        if (!zero_to)
-        }
+                zero_to = PAGE_CACHE_SIZE;
-        if (ocfs2_should_order_data(inode)) {
+        mlog(0,
-                handle = ocfs2_start_walk_page_trans(inode, page, offset,
+             "abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
-                                                     offset);
+             (unsigned long long)abs_from, (unsigned long long)abs_to,
-                if (IS_ERR(handle)) {
+             index, zero_from, zero_to);
-                        ret = PTR_ERR(handle);
-                        handle = NULL;
+        /* We know that zero_from is block aligned */
+        for (block_start = zero_from; block_start < zero_to;
+             block_start = block_end) {
+                block_end = block_start + (1 << inode->i_blkbits);
+                /*
+                 * block_start is block-aligned.  Bump it by one to
+                 * force ocfs2_{prepare,commit}_write() to zero the
+                 * whole block.
+                 */
+                ret = ocfs2_prepare_write_nolock(inode, page,
+                                                 block_start + 1,
+                                                 block_start + 1);
+                if (ret < 0) {
+                        mlog_errno(ret);
                        goto out_unlock;
                }
-        }
-        /* must not update i_size! */
+                if (!handle) {
-        ret = block_commit_write(page, offset, offset);
+                        handle = ocfs2_zero_start_ordered_transaction(inode);
-        if (ret < 0)
+                        if (IS_ERR(handle)) {
-                mlog_errno(ret);
+                                ret = PTR_ERR(handle);
-        else
+                                handle = NULL;
-                ret = 0;
+                                break;
+                        }
+                }
+                /* must not update i_size! */
+                ret = block_commit_write(page, block_start + 1,
+                                         block_start + 1);
+                if (ret < 0)
+                        mlog_errno(ret);
+                else
+                        ret = 0;
+        }
        if (handle)
                ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out_unlock:
        unlock_page(page);
        page_cache_release(page);
@@ -786,22 +838,114 @@ out:
        return ret;
 }
-static int ocfs2_zero_extend(struct inode *inode,
+/*
-                             u64 zero_to_size)
+ * Find the next range to zero.  We do this in terms of bytes because
+ * that's what ocfs2_zero_extend() wants, and it is dealing with the
+ * pagecache.  We may return multiple extents.
+ *
+ * zero_start and zero_end are ocfs2_zero_extend()s current idea of what
+ * needs to be zeroed.  range_start and range_end return the next zeroing
+ * range.  A subsequent call should pass the previous range_end as its
+ * zero_start.  If range_end is 0, there's nothing to do.
+ *
+ * Unwritten extents are skipped over.  Refcounted extents are CoWd.
+ */
+static int ocfs2_zero_extend_get_range(struct inode *inode,
+                                       struct buffer_head *di_bh,
+                                       u64 zero_start, u64 zero_end,
+                                       u64 *range_start, u64 *range_end)
 {
-        int ret = 0;
+        int rc = 0, needs_cow = 0;
-        u64 start_off;
+        u32 p_cpos, zero_clusters = 0;
-        struct super_block *sb = inode->i_sb;
+        u32 zero_cpos =
+                zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
+        u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
+        unsigned int num_clusters = 0;
+        unsigned int ext_flags = 0;
-        start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
+        while (zero_cpos < last_cpos) {
-        while (start_off < zero_to_size) {
+                rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
-                ret = ocfs2_write_zero_page(inode, start_off);
+                                        &num_clusters, &ext_flags);
-                if (ret < 0) {
+                if (rc) {
-                        mlog_errno(ret);
+                        mlog_errno(rc);
+                        goto out;
+                }
+                if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
+                        zero_clusters = num_clusters;
+                        if (ext_flags & OCFS2_EXT_REFCOUNTED)
+                                needs_cow = 1;
+                        break;
+                }
+                zero_cpos += num_clusters;
+        }
+        if (!zero_clusters) {
+                *range_end = 0;
+                goto out;
+        }
+        while ((zero_cpos + zero_clusters) < last_cpos) {
+                rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
+                                        &p_cpos, &num_clusters,
+                                        &ext_flags);
+                if (rc) {
+                        mlog_errno(rc);
                        goto out;
                }
-                start_off += sb->s_blocksize;
+                if (!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN))
+                        break;
+                if (ext_flags & OCFS2_EXT_REFCOUNTED)
+                        needs_cow = 1;
+                zero_clusters += num_clusters;
+        }
+        if ((zero_cpos + zero_clusters) > last_cpos)
+                zero_clusters = last_cpos - zero_cpos;
+        if (needs_cow) {
+                rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
+                                        UINT_MAX);
+                if (rc) {
+                        mlog_errno(rc);
+                        goto out;
+                }
+        }
+        *range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
+        *range_end = ocfs2_clusters_to_bytes(inode->i_sb,
+                                             zero_cpos + zero_clusters);
+out:
+        return rc;
+}
+/*
+ * Zero one range returned from ocfs2_zero_extend_get_range().  The caller
+ * has made sure that the entire range needs zeroing.
+ */
+static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
+                                   u64 range_end)
+{
+        int rc = 0;
+        u64 next_pos;
+        u64 zero_pos = range_start;
+        mlog(0, "range_start = %llu, range_end = %llu\n",
+             (unsigned long long)range_start,
+             (unsigned long long)range_end);
+        BUG_ON(range_start >= range_end);
+        while (zero_pos < range_end) {
+                next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+                if (next_pos > range_end)
+                        next_pos = range_end;
+                rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
+                if (rc < 0) {
+                        mlog_errno(rc);
+                        break;
+                }
+                zero_pos = next_pos;
                /*
                 * Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
                cond_resched();
        }
-out:
+        return rc;
+}
+int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
+                      loff_t zero_to_size)
+{
+        int ret = 0;
+        u64 zero_start, range_start = 0, range_end = 0;
+        struct super_block *sb = inode->i_sb;
+        zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
+        mlog(0, "zero_start %llu for i_size %llu\n",
+             (unsigned long long)zero_start,
+             (unsigned long long)i_size_read(inode));
+        while (zero_start < zero_to_size) {
+                ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
+                                                  zero_to_size,
+                                                  &range_start,
+                                                  &range_end);
+                if (ret) {
+                        mlog_errno(ret);
+                        break;
+                }
+                if (!range_end)
+                        break;
+                /* Trim the ends */
+                if (range_start < zero_start)
+                        range_start = zero_start;
+                if (range_end > zero_to_size)
+                        range_end = zero_to_size;
+                ret = ocfs2_zero_extend_range(inode, range_start,
+                                              range_end);
+                if (ret) {
+                        mlog_errno(ret);
+                        break;
+                }
+                zero_start = range_end;
+        }
        return ret;
 }
-int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
+int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
+                          u64 new_i_size, u64 zero_to)
 {
        int ret;
        u32 clusters_to_add;
        struct ocfs2_inode_info *oi = OCFS2_I(inode);
+        /*
+         * Only quota files call this without a bh, and they can't be
+         * refcounted.
+         */
+        BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
+        BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
        clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
        if (clusters_to_add < oi->ip_clusters)
                clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
         * still need to zero the area between the old i_size and the
         * new i_size.
         */
-        ret = ocfs2_zero_extend(inode, zero_to);
+        ret = ocfs2_zero_extend(inode, di_bh, zero_to);
        if (ret < 0)
                mlog_errno(ret);
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
                goto out;
        if (i_size_read(inode) == new_i_size)
-                goto out;
+                goto out;
        BUG_ON(new_i_size < i_size_read(inode));
        /*
-         * Fall through for converting inline data, even if the fs
-         * supports sparse files.
-         *
-         * The check for inline data here is legal - nobody can add
-         * the feature since we have i_mutex. We must check it again
-         * after acquiring ip_alloc_sem though, as paths like mmap
-         * might have raced us to converting the inode to extents.
-         */
-        if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
-            && ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
-                goto out_update_size;
-        /*
         * The alloc sem blocks people in read/write from reading our
         * allocation until we're done changing it. We depend on
         * i_mutex to block other extend/truncate calls while we're
-         * here.
+         * here.  We even have to hold it for sparse files because there
+         * might be some tail zeroing.
         */
        down_write(&oi->ip_alloc_sem);
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
                ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
                if (ret) {
                        up_write(&oi->ip_alloc_sem);
                        mlog_errno(ret);
                        goto out;
                }
        }
-        if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+        if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
-                ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);
+                ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
+        else
+                ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
+                                            new_i_size);
        up_write(&oi->ip_alloc_sem);

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6a13ea64c447..2b10b36d1577 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
724	return status;	724	return status;
725	}	725	}
726		726
		727	/*
		728	* While a write will already be ordering the data, a truncate will not.
		729	* Thus, we need to explicitly order the zeroed pages.
		730	*/
		731	static handle_t ocfs2_zero_start_ordered_transaction(struct inode inode)
		732	{
		733	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
		734	handle_t *handle = NULL;
		735	int ret = 0;
		736
		737	if (!ocfs2_should_order_data(inode))
		738	goto out;
		739
		740	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
		741	if (IS_ERR(handle)) {
		742	ret = -ENOMEM;
		743	mlog_errno(ret);
		744	goto out;
		745	}
		746
		747	ret = ocfs2_jbd2_file_inode(handle, inode);
		748	if (ret < 0)
		749	mlog_errno(ret);
		750
		751	out:
		752	if (ret) {
		753	if (!IS_ERR(handle))
		754	ocfs2_commit_trans(osb, handle);
		755	handle = ERR_PTR(ret);
		756	}
		757	return handle;
		758	}
		759
727	/* Some parts of this taken from generic_cont_expand, which turned out	760	/* Some parts of this taken from generic_cont_expand, which turned out
728	* to be too fragile to do exactly what we need without us having to	761	* to be too fragile to do exactly what we need without us having to
729	* worry about recursive locking in ->write_begin() and ->write_end(). */	762	* worry about recursive locking in ->write_begin() and ->write_end(). */
730	static int ocfs2_write_zero_page(struct inode *inode,	763	static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
731	u64 size)	764	u64 abs_to)
732	{	765	{
733	struct address_space *mapping = inode->i_mapping;	766	struct address_space *mapping = inode->i_mapping;
734	struct page *page;	767	struct page *page;
735	unsigned long index;	768	unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
736	unsigned int offset;
737	handle_t *handle = NULL;	769	handle_t *handle = NULL;
738	int ret;	770	int ret = 0;
		771	unsigned zero_from, zero_to, block_start, block_end;
739		772
740	offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */	773	BUG_ON(abs_from >= abs_to);
741	/* ugh. in prepare/commit_write, if from==to==start of block, we	774	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
742	** skip the prepare. make sure we never send an offset for the start	775	BUG_ON(abs_from & (inode->i_blkbits - 1));
743	** of a block
744	*/
745	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
746	offset++;
747	}
748	index = size >> PAGE_CACHE_SHIFT;
749		776
750	page = grab_cache_page(mapping, index);	777	page = grab_cache_page(mapping, index);
751	if (!page) {	778	if (!page) {
@@ -754,31 +781,56 @@ static int ocfs2_write_zero_page(struct inode *inode,
754	goto out;	781	goto out;
755	}	782	}
756		783
757	ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);	784	/* Get the offsets within the page that we want to zero */
758	if (ret < 0) {	785	zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
759	mlog_errno(ret);	786	zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
760	goto out_unlock;	787	if (!zero_to)
761	}	788	zero_to = PAGE_CACHE_SIZE;
762		789
763	if (ocfs2_should_order_data(inode)) {	790	mlog(0,
764	handle = ocfs2_start_walk_page_trans(inode, page, offset,	791	"abs_from = %llu, abs_to = %llu, index = %lu, zero_from = %u, zero_to = %u\n",
765	offset);	792	(unsigned long long)abs_from, (unsigned long long)abs_to,
766	if (IS_ERR(handle)) {	793	index, zero_from, zero_to);
767	ret = PTR_ERR(handle);	794
768	handle = NULL;	795	/* We know that zero_from is block aligned */
		796	for (block_start = zero_from; block_start < zero_to;
		797	block_start = block_end) {
		798	block_end = block_start + (1 << inode->i_blkbits);
		799
		800	/*
		801	* block_start is block-aligned. Bump it by one to
		802	* force ocfs2_{prepare,commit}_write() to zero the
		803	* whole block.
		804	*/
		805	ret = ocfs2_prepare_write_nolock(inode, page,
		806	block_start + 1,
		807	block_start + 1);
		808	if (ret < 0) {
		809	mlog_errno(ret);
769	goto out_unlock;	810	goto out_unlock;
770	}	811	}
771	}
772		812
773	/* must not update i_size! */	813	if (!handle) {
774	ret = block_commit_write(page, offset, offset);	814	handle = ocfs2_zero_start_ordered_transaction(inode);
775	if (ret < 0)	815	if (IS_ERR(handle)) {
776	mlog_errno(ret);	816	ret = PTR_ERR(handle);
777	else	817	handle = NULL;
778	ret = 0;	818	break;
		819	}
		820	}
		821
		822	/* must not update i_size! */
		823	ret = block_commit_write(page, block_start + 1,
		824	block_start + 1);
		825	if (ret < 0)
		826	mlog_errno(ret);
		827	else
		828	ret = 0;
		829	}
779		830
780	if (handle)	831	if (handle)
781	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);	832	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
		833
782	out_unlock:	834	out_unlock:
783	unlock_page(page);	835	unlock_page(page);
784	page_cache_release(page);	836	page_cache_release(page);
@@ -786,22 +838,114 @@ out:
786	return ret;	838	return ret;
787	}	839	}
788		840
789	static int ocfs2_zero_extend(struct inode *inode,	841	/*
790	u64 zero_to_size)	842	* Find the next range to zero. We do this in terms of bytes because
		843	* that's what ocfs2_zero_extend() wants, and it is dealing with the
		844	* pagecache. We may return multiple extents.
		845	*
		846	* zero_start and zero_end are ocfs2_zero_extend()s current idea of what
		847	* needs to be zeroed. range_start and range_end return the next zeroing
		848	* range. A subsequent call should pass the previous range_end as its
		849	* zero_start. If range_end is 0, there's nothing to do.
		850	*
		851	* Unwritten extents are skipped over. Refcounted extents are CoWd.
		852	*/
		853	static int ocfs2_zero_extend_get_range(struct inode *inode,
		854	struct buffer_head *di_bh,
		855	u64 zero_start, u64 zero_end,
		856	u64 range_start, u64 range_end)
791	{	857	{
792	int ret = 0;	858	int rc = 0, needs_cow = 0;
793	u64 start_off;	859	u32 p_cpos, zero_clusters = 0;
794	struct super_block *sb = inode->i_sb;	860	u32 zero_cpos =
		861	zero_start >> OCFS2_SB(inode->i_sb)->s_clustersize_bits;
		862	u32 last_cpos = ocfs2_clusters_for_bytes(inode->i_sb, zero_end);
		863	unsigned int num_clusters = 0;
		864	unsigned int ext_flags = 0;
795		865
796	start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));	866	while (zero_cpos < last_cpos) {
797	while (start_off < zero_to_size) {	867	rc = ocfs2_get_clusters(inode, zero_cpos, &p_cpos,
798	ret = ocfs2_write_zero_page(inode, start_off);	868	&num_clusters, &ext_flags);
799	if (ret < 0) {	869	if (rc) {
800	mlog_errno(ret);	870	mlog_errno(rc);
		871	goto out;
		872	}
		873
		874	if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
		875	zero_clusters = num_clusters;
		876	if (ext_flags & OCFS2_EXT_REFCOUNTED)
		877	needs_cow = 1;
		878	break;
		879	}
		880
		881	zero_cpos += num_clusters;
		882	}
		883	if (!zero_clusters) {
		884	*range_end = 0;
		885	goto out;
		886	}
		887
		888	while ((zero_cpos + zero_clusters) < last_cpos) {
		889	rc = ocfs2_get_clusters(inode, zero_cpos + zero_clusters,
		890	&p_cpos, &num_clusters,
		891	&ext_flags);
		892	if (rc) {
		893	mlog_errno(rc);
801	goto out;	894	goto out;
802	}	895	}
803		896
804	start_off += sb->s_blocksize;	897	if (!p_cpos \|\| (ext_flags & OCFS2_EXT_UNWRITTEN))
		898	break;
		899	if (ext_flags & OCFS2_EXT_REFCOUNTED)
		900	needs_cow = 1;
		901	zero_clusters += num_clusters;
		902	}
		903	if ((zero_cpos + zero_clusters) > last_cpos)
		904	zero_clusters = last_cpos - zero_cpos;
		905
		906	if (needs_cow) {
		907	rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters,
		908	UINT_MAX);
		909	if (rc) {
		910	mlog_errno(rc);
		911	goto out;
		912	}
		913	}
		914
		915	*range_start = ocfs2_clusters_to_bytes(inode->i_sb, zero_cpos);
		916	*range_end = ocfs2_clusters_to_bytes(inode->i_sb,
		917	zero_cpos + zero_clusters);
		918
		919	out:
		920	return rc;
		921	}
		922
		923	/*
		924	* Zero one range returned from ocfs2_zero_extend_get_range(). The caller
		925	* has made sure that the entire range needs zeroing.
		926	*/
		927	static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
		928	u64 range_end)
		929	{
		930	int rc = 0;
		931	u64 next_pos;
		932	u64 zero_pos = range_start;
		933
		934	mlog(0, "range_start = %llu, range_end = %llu\n",
		935	(unsigned long long)range_start,
		936	(unsigned long long)range_end);
		937	BUG_ON(range_start >= range_end);
		938
		939	while (zero_pos < range_end) {
		940	next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
		941	if (next_pos > range_end)
		942	next_pos = range_end;
		943	rc = ocfs2_write_zero_page(inode, zero_pos, next_pos);
		944	if (rc < 0) {
		945	mlog_errno(rc);
		946	break;
		947	}
		948	zero_pos = next_pos;
805		949
806	/*	950	/*
807	* Very large extends have the potential to lock up	951	* Very large extends have the potential to lock up
@@ -810,16 +954,63 @@ static int ocfs2_zero_extend(struct inode *inode,
810	cond_resched();	954	cond_resched();
811	}	955	}
812		956
813	out:	957	return rc;
		958	}
		959
		960	int ocfs2_zero_extend(struct inode inode, struct buffer_head di_bh,
		961	loff_t zero_to_size)
		962	{
		963	int ret = 0;
		964	u64 zero_start, range_start = 0, range_end = 0;
		965	struct super_block *sb = inode->i_sb;
		966
		967	zero_start = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
		968	mlog(0, "zero_start %llu for i_size %llu\n",
		969	(unsigned long long)zero_start,
		970	(unsigned long long)i_size_read(inode));
		971	while (zero_start < zero_to_size) {
		972	ret = ocfs2_zero_extend_get_range(inode, di_bh, zero_start,
		973	zero_to_size,
		974	&range_start,
		975	&range_end);
		976	if (ret) {
		977	mlog_errno(ret);
		978	break;
		979	}
		980	if (!range_end)
		981	break;
		982	/* Trim the ends */
		983	if (range_start < zero_start)
		984	range_start = zero_start;
		985	if (range_end > zero_to_size)
		986	range_end = zero_to_size;
		987
		988	ret = ocfs2_zero_extend_range(inode, range_start,
		989	range_end);
		990	if (ret) {
		991	mlog_errno(ret);
		992	break;
		993	}
		994	zero_start = range_end;
		995	}
		996
814	return ret;	997	return ret;
815	}	998	}
816		999
817	int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)	1000	int ocfs2_extend_no_holes(struct inode inode, struct buffer_head di_bh,
		1001	u64 new_i_size, u64 zero_to)
818	{	1002	{
819	int ret;	1003	int ret;
820	u32 clusters_to_add;	1004	u32 clusters_to_add;
821	struct ocfs2_inode_info *oi = OCFS2_I(inode);	1005	struct ocfs2_inode_info *oi = OCFS2_I(inode);
822		1006
		1007	/*
		1008	* Only quota files call this without a bh, and they can't be
		1009	* refcounted.
		1010	*/
		1011	BUG_ON(!di_bh && (oi->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL));
		1012	BUG_ON(!di_bh && !(oi->ip_flags & OCFS2_INODE_SYSTEM_FILE));
		1013
823	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);	1014	clusters_to_add = ocfs2_clusters_for_bytes(inode->i_sb, new_i_size);
824	if (clusters_to_add < oi->ip_clusters)	1015	if (clusters_to_add < oi->ip_clusters)
825	clusters_to_add = 0;	1016	clusters_to_add = 0;
@@ -840,7 +1031,7 @@ int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, u64 zero_to)
840	* still need to zero the area between the old i_size and the	1031	* still need to zero the area between the old i_size and the
841	* new i_size.	1032	* new i_size.
842	*/	1033	*/
843	ret = ocfs2_zero_extend(inode, zero_to);	1034	ret = ocfs2_zero_extend(inode, di_bh, zero_to);
844	if (ret < 0)	1035	if (ret < 0)
845	mlog_errno(ret);	1036	mlog_errno(ret);
846		1037
@@ -862,27 +1053,15 @@ static int ocfs2_extend_file(struct inode *inode,
862	goto out;	1053	goto out;
863		1054
864	if (i_size_read(inode) == new_i_size)	1055	if (i_size_read(inode) == new_i_size)
865	goto out;	1056	goto out;
866	BUG_ON(new_i_size < i_size_read(inode));	1057	BUG_ON(new_i_size < i_size_read(inode));
867		1058
868	/*	1059	/*
869	* Fall through for converting inline data, even if the fs
870	* supports sparse files.
871	*
872	* The check for inline data here is legal - nobody can add
873	* the feature since we have i_mutex. We must check it again
874	* after acquiring ip_alloc_sem though, as paths like mmap
875	* might have raced us to converting the inode to extents.
876	*/
877	if (!(oi->ip_dyn_features & OCFS2_INLINE_DATA_FL)
878	&& ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
879	goto out_update_size;
880
881	/*
882	* The alloc sem blocks people in read/write from reading our	1060	* The alloc sem blocks people in read/write from reading our
883	* allocation until we're done changing it. We depend on	1061	* allocation until we're done changing it. We depend on
884	* i_mutex to block other extend/truncate calls while we're	1062	* i_mutex to block other extend/truncate calls while we're
885	* here.	1063	* here. We even have to hold it for sparse files because there
		1064	* might be some tail zeroing.
886	*/	1065	*/
887	down_write(&oi->ip_alloc_sem);	1066	down_write(&oi->ip_alloc_sem);
888		1067
@@ -899,14 +1078,16 @@ static int ocfs2_extend_file(struct inode *inode,
899	ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);	1078	ret = ocfs2_convert_inline_data_to_extents(inode, di_bh);
900	if (ret) {	1079	if (ret) {
901	up_write(&oi->ip_alloc_sem);	1080	up_write(&oi->ip_alloc_sem);
902
903	mlog_errno(ret);	1081	mlog_errno(ret);
904	goto out;	1082	goto out;
905	}	1083	}
906	}	1084	}
907		1085
908	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))	1086	if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
909	ret = ocfs2_extend_no_holes(inode, new_i_size, new_i_size);	1087	ret = ocfs2_zero_extend(inode, di_bh, new_i_size);
		1088	else
		1089	ret = ocfs2_extend_no_holes(inode, di_bh, new_i_size,
		1090	new_i_size);
910		1091
911	up_write(&oi->ip_alloc_sem);	1092	up_write(&oi->ip_alloc_sem);
912		1093