ocfs2: When zero extending, do it by page.

ocfs2_zero_extend() does its zeroing block by block, but it calls a function named ocfs2_write_zero_page(). Let's have ocfs2_write_zero_page() handle the page level. From ocfs2_zero_extend()'s perspective, it is now page-at-a-time. Signed-off-by: Joel Becker <joel.becker@oracle.com> Cc: stable@kernel.org
author: Joel Becker <joel.becker@oracle.com> 2010-07-06 17:36:06 -0400
committer: Joel Becker <joel.becker@oracle.com> 2010-07-08 16:24:49 -0400
commit: a4bfb4cf11fd2211b788af59dc8a8b4394bca227 (patch)
tree: c06f45ac5a992575a0e47dbc80bdf187db030cca
parent: 1739da40543ed2129050ccfa8a076a851ab6ed00 (diff)
2 files changed, 84 insertions, 64 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3623ca20cc18..9a5c931439bd 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -459,36 +459,6 @@ int walk_page_buffers(	handle_t *handle,
        return ret;
 }
-handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
-                                                         struct page *page,
-                                                         unsigned from,
-                                                         unsigned to)
-{
-        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
-        handle_t *handle;
-        int ret = 0;
-        handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
-        if (IS_ERR(handle)) {
-                ret = -ENOMEM;
-                mlog_errno(ret);
-                goto out;
-        }
-        if (ocfs2_should_order_data(inode)) {
-                ret = ocfs2_jbd2_file_inode(handle, inode);
-                if (ret < 0)
-                        mlog_errno(ret);
-        }
-out:
-        if (ret) {
-                if (!IS_ERR(handle))
-                        ocfs2_commit_trans(osb, handle);
-                handle = ERR_PTR(ret);
-        }
-        return handle;
-}
 static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
 {
        sector_t status;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6a13ea64c447..4cfc976a9067 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
        return status;
 }
+/*
+ * While a write will already be ordering the data, a truncate will not.
+ * Thus, we need to explicitly order the zeroed pages.
+ */
+static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode)
+{
+        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+        handle_t *handle = NULL;
+        int ret = 0;
+        if (!ocfs2_should_order_data(inode))
+                goto out;
+        handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+        if (IS_ERR(handle)) {
+                ret = -ENOMEM;
+                mlog_errno(ret);
+                goto out;
+        }
+        ret = ocfs2_jbd2_file_inode(handle, inode);
+        if (ret < 0)
+                mlog_errno(ret);
+out:
+        if (ret) {
+                if (!IS_ERR(handle))
+                        ocfs2_commit_trans(osb, handle);
+                handle = ERR_PTR(ret);
+        }
+        return handle;
+}
 /* Some parts of this taken from generic_cont_expand, which turned out
 * to be too fragile to do exactly what we need without us having to
 * worry about recursive locking in ->write_begin() and ->write_end(). */
-static int ocfs2_write_zero_page(struct inode *inode,
+static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
-                                 u64 size)
+                                 u64 abs_to)
 {
        struct address_space *mapping = inode->i_mapping;
        struct page *page;
-        unsigned long index;
+        unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
-        unsigned int offset;
        handle_t *handle = NULL;
        int ret;
+        unsigned zero_from, zero_to, block_start, block_end;
-        offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
+        BUG_ON(abs_from >= abs_to);
-        /* ugh.  in prepare/commit_write, if from==to==start of block, we
+        BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
-        ** skip the prepare.  make sure we never send an offset for the start
+        BUG_ON(abs_from & (inode->i_blkbits - 1));
-        ** of a block
-        */
-        if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
-                offset++;
-        }
-        index = size >> PAGE_CACHE_SHIFT;
        page = grab_cache_page(mapping, index);
        if (!page) {
@@ -754,31 +781,51 @@ static int ocfs2_write_zero_page(struct inode *inode,
                goto out;
        }
-        ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);
+        /* Get the offsets within the page that we want to zero */
-        if (ret < 0) {
+        zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
-                mlog_errno(ret);
+        zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
-                goto out_unlock;
+        if (!zero_to)
-        }
+                zero_to = PAGE_CACHE_SIZE;
-        if (ocfs2_should_order_data(inode)) {
+        /* We know that zero_from is block aligned */
-                handle = ocfs2_start_walk_page_trans(inode, page, offset,
+        for (block_start = zero_from; block_start < zero_to;
-                                                     offset);
+             block_start = block_end) {
-                if (IS_ERR(handle)) {
+                block_end = block_start + (1 << inode->i_blkbits);
-                        ret = PTR_ERR(handle);
-                        handle = NULL;
+                /*
+                 * block_start is block-aligned.  Bump it by one to
+                 * force ocfs2_{prepare,commit}_write() to zero the
+                 * whole block.
+                 */
+                ret = ocfs2_prepare_write_nolock(inode, page,
+                                                 block_start + 1,
+                                                 block_start + 1);
+                if (ret < 0) {
+                        mlog_errno(ret);
                        goto out_unlock;
                }
-        }
-        /* must not update i_size! */
+                if (!handle) {
-        ret = block_commit_write(page, offset, offset);
+                        handle = ocfs2_zero_start_ordered_transaction(inode);
-        if (ret < 0)
+                        if (IS_ERR(handle)) {
-                mlog_errno(ret);
+                                ret = PTR_ERR(handle);
-        else
+                                handle = NULL;
-                ret = 0;
+                                break;
+                        }
+                }
+                /* must not update i_size! */
+                ret = block_commit_write(page, block_start + 1,
+                                         block_start + 1);
+                if (ret < 0)
+                        mlog_errno(ret);
+                else
+                        ret = 0;
+        }
        if (handle)
                ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
 out_unlock:
        unlock_page(page);
        page_cache_release(page);
@@ -790,18 +837,21 @@ static int ocfs2_zero_extend(struct inode *inode,
                             u64 zero_to_size)
 {
        int ret = 0;
-        u64 start_off;
+        u64 start_off, next_off;
        struct super_block *sb = inode->i_sb;
        start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
        while (start_off < zero_to_size) {
-                ret = ocfs2_write_zero_page(inode, start_off);
+                next_off = (start_off & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+                if (next_off > zero_to_size)
+                        next_off = zero_to_size;
+                ret = ocfs2_write_zero_page(inode, start_off, next_off);
                if (ret < 0) {
                        mlog_errno(ret);
                        goto out;
                }
-                start_off += sb->s_blocksize;
+                start_off = next_off;
                /*
                 * Very large extends have the potential to lock up
author	Joel Becker <joel.becker@oracle.com>	2010-07-06 17:36:06 -0400
committer	Joel Becker <joel.becker@oracle.com>	2010-07-08 16:24:49 -0400
commit	a4bfb4cf11fd2211b788af59dc8a8b4394bca227 (patch)
tree	c06f45ac5a992575a0e47dbc80bdf187db030cca
parent	1739da40543ed2129050ccfa8a076a851ab6ed00 (diff)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 3623ca20cc18..9a5c931439bd 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c
@@ -459,36 +459,6 @@ int walk_page_buffers( handle_t *handle,
459	return ret;	459	return ret;
460	}	460	}
461		461
462	handle_t ocfs2_start_walk_page_trans(struct inode inode,
463	struct page *page,
464	unsigned from,
465	unsigned to)
466	{
467	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
468	handle_t *handle;
469	int ret = 0;
470
471	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
472	if (IS_ERR(handle)) {
473	ret = -ENOMEM;
474	mlog_errno(ret);
475	goto out;
476	}
477
478	if (ocfs2_should_order_data(inode)) {
479	ret = ocfs2_jbd2_file_inode(handle, inode);
480	if (ret < 0)
481	mlog_errno(ret);
482	}
483	out:
484	if (ret) {
485	if (!IS_ERR(handle))
486	ocfs2_commit_trans(osb, handle);
487	handle = ERR_PTR(ret);
488	}
489	return handle;
490	}
491
492	static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)	462	static sector_t ocfs2_bmap(struct address_space *mapping, sector_t block)
493	{	463	{
494	sector_t status;	464	sector_t status;


diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6a13ea64c447..4cfc976a9067 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c
@@ -724,28 +724,55 @@ leave:
724	return status;	724	return status;
725	}	725	}
726		726
		727	/*
		728	* While a write will already be ordering the data, a truncate will not.
		729	* Thus, we need to explicitly order the zeroed pages.
		730	*/
		731	static handle_t ocfs2_zero_start_ordered_transaction(struct inode inode)
		732	{
		733	struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
		734	handle_t *handle = NULL;
		735	int ret = 0;
		736
		737	if (!ocfs2_should_order_data(inode))
		738	goto out;
		739
		740	handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
		741	if (IS_ERR(handle)) {
		742	ret = -ENOMEM;
		743	mlog_errno(ret);
		744	goto out;
		745	}
		746
		747	ret = ocfs2_jbd2_file_inode(handle, inode);
		748	if (ret < 0)
		749	mlog_errno(ret);
		750
		751	out:
		752	if (ret) {
		753	if (!IS_ERR(handle))
		754	ocfs2_commit_trans(osb, handle);
		755	handle = ERR_PTR(ret);
		756	}
		757	return handle;
		758	}
		759
727	/* Some parts of this taken from generic_cont_expand, which turned out	760	/* Some parts of this taken from generic_cont_expand, which turned out
728	* to be too fragile to do exactly what we need without us having to	761	* to be too fragile to do exactly what we need without us having to
729	* worry about recursive locking in ->write_begin() and ->write_end(). */	762	* worry about recursive locking in ->write_begin() and ->write_end(). */
730	static int ocfs2_write_zero_page(struct inode *inode,	763	static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
731	u64 size)	764	u64 abs_to)
732	{	765	{
733	struct address_space *mapping = inode->i_mapping;	766	struct address_space *mapping = inode->i_mapping;
734	struct page *page;	767	struct page *page;
735	unsigned long index;	768	unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
736	unsigned int offset;
737	handle_t *handle = NULL;	769	handle_t *handle = NULL;
738	int ret;	770	int ret;
		771	unsigned zero_from, zero_to, block_start, block_end;
739		772
740	offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */	773	BUG_ON(abs_from >= abs_to);
741	/* ugh. in prepare/commit_write, if from==to==start of block, we	774	BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
742	** skip the prepare. make sure we never send an offset for the start	775	BUG_ON(abs_from & (inode->i_blkbits - 1));
743	** of a block
744	*/
745	if ((offset & (inode->i_sb->s_blocksize - 1)) == 0) {
746	offset++;
747	}
748	index = size >> PAGE_CACHE_SHIFT;
749		776
750	page = grab_cache_page(mapping, index);	777	page = grab_cache_page(mapping, index);
751	if (!page) {	778	if (!page) {
@@ -754,31 +781,51 @@ static int ocfs2_write_zero_page(struct inode *inode,
754	goto out;	781	goto out;
755	}	782	}
756		783
757	ret = ocfs2_prepare_write_nolock(inode, page, offset, offset);	784	/* Get the offsets within the page that we want to zero */
758	if (ret < 0) {	785	zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
759	mlog_errno(ret);	786	zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
760	goto out_unlock;	787	if (!zero_to)
761	}	788	zero_to = PAGE_CACHE_SIZE;
762		789
763	if (ocfs2_should_order_data(inode)) {	790	/* We know that zero_from is block aligned */
764	handle = ocfs2_start_walk_page_trans(inode, page, offset,	791	for (block_start = zero_from; block_start < zero_to;
765	offset);	792	block_start = block_end) {
766	if (IS_ERR(handle)) {	793	block_end = block_start + (1 << inode->i_blkbits);
767	ret = PTR_ERR(handle);	794
768	handle = NULL;	795	/*
		796	* block_start is block-aligned. Bump it by one to
		797	* force ocfs2_{prepare,commit}_write() to zero the
		798	* whole block.
		799	*/
		800	ret = ocfs2_prepare_write_nolock(inode, page,
		801	block_start + 1,
		802	block_start + 1);
		803	if (ret < 0) {
		804	mlog_errno(ret);
769	goto out_unlock;	805	goto out_unlock;
770	}	806	}
771	}
772		807
773	/* must not update i_size! */	808	if (!handle) {
774	ret = block_commit_write(page, offset, offset);	809	handle = ocfs2_zero_start_ordered_transaction(inode);
775	if (ret < 0)	810	if (IS_ERR(handle)) {
776	mlog_errno(ret);	811	ret = PTR_ERR(handle);
777	else	812	handle = NULL;
778	ret = 0;	813	break;
		814	}
		815	}
		816
		817	/* must not update i_size! */
		818	ret = block_commit_write(page, block_start + 1,
		819	block_start + 1);
		820	if (ret < 0)
		821	mlog_errno(ret);
		822	else
		823	ret = 0;
		824	}
779		825
780	if (handle)	826	if (handle)
781	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);	827	ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
		828
782	out_unlock:	829	out_unlock:
783	unlock_page(page);	830	unlock_page(page);
784	page_cache_release(page);	831	page_cache_release(page);
@@ -790,18 +837,21 @@ static int ocfs2_zero_extend(struct inode *inode,
790	u64 zero_to_size)	837	u64 zero_to_size)
791	{	838	{
792	int ret = 0;	839	int ret = 0;
793	u64 start_off;	840	u64 start_off, next_off;
794	struct super_block *sb = inode->i_sb;	841	struct super_block *sb = inode->i_sb;
795		842
796	start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));	843	start_off = ocfs2_align_bytes_to_blocks(sb, i_size_read(inode));
797	while (start_off < zero_to_size) {	844	while (start_off < zero_to_size) {
798	ret = ocfs2_write_zero_page(inode, start_off);	845	next_off = (start_off & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
		846	if (next_off > zero_to_size)
		847	next_off = zero_to_size;
		848	ret = ocfs2_write_zero_page(inode, start_off, next_off);
799	if (ret < 0) {	849	if (ret < 0) {
800	mlog_errno(ret);	850	mlog_errno(ret);
801	goto out;	851	goto out;
802	}	852	}
803		853
804	start_off += sb->s_blocksize;	854	start_off = next_off;
805		855
806	/*	856	/*
807	* Very large extends have the potential to lock up	857	* Very large extends have the potential to lock up