ocfs2: take data locks around extend

We need to take a data lock around extends to protect the pages that ocfs2_zero_extend is going to be pulling into the page cache. Otherwise an extend on one node might populate the page cache with data pages that have no lock coverage. Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
author: Mark Fasheh <mark.fasheh@oracle.com> 2006-05-05 22:04:03 -0400
committer: Mark Fasheh <mark.fasheh@oracle.com> 2006-05-17 17:38:47 -0400
commit: 53013cba4118a5cfe8f7c7ea5e5bc1c48b160f76 (patch)
tree: 5170ed12fbe07b5e8557e61952aa27c25034bd7a /fs/ocfs2/aops.c
parent: 0c056c50a6218e0e577817c16ba8851af593d742 (diff)
1 files changed, 39 insertions, 7 deletions
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 0d858d0b25be..47152bf9a7f2 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -276,13 +276,29 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
        return ret;
 }
+/* This can also be called from ocfs2_write_zero_page() which has done
+ * it's own cluster locking. */
+int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
+                               unsigned from, unsigned to)
+{
+        int ret;
+        down_read(&OCFS2_I(inode)->ip_alloc_sem);
+        ret = block_prepare_write(page, from, to, ocfs2_get_block);
+        up_read(&OCFS2_I(inode)->ip_alloc_sem);
+        return ret;
+}
 /*
 * ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called
 * from loopback.  It must be able to perform its own locking around
 * ocfs2_get_block().
 */
-int ocfs2_prepare_write(struct file *file, struct page *page,
+static int ocfs2_prepare_write(struct file *file, struct page *page,
-                        unsigned from, unsigned to)
+                               unsigned from, unsigned to)
 {
        struct inode *inode = page->mapping->host;
        int ret;
@@ -295,11 +311,7 @@ int ocfs2_prepare_write(struct file *file, struct page *page,
                goto out;
        }
-        down_read(&OCFS2_I(inode)->ip_alloc_sem);
+        ret = ocfs2_prepare_write_nolock(inode, page, from, to);
-        ret = block_prepare_write(page, from, to, ocfs2_get_block);
-        up_read(&OCFS2_I(inode)->ip_alloc_sem);
        ocfs2_meta_unlock(inode, 0);
 out:
@@ -625,11 +637,31 @@ static ssize_t ocfs2_direct_IO(int rw,
        int ret;
        mlog_entry_void();
+        /*
+         * We get PR data locks even for O_DIRECT.  This allows
+         * concurrent O_DIRECT I/O but doesn't let O_DIRECT with
+         * extending and buffered zeroing writes race.  If they did
+         * race then the buffered zeroing could be written back after
+         * the O_DIRECT I/O.  It's one thing to tell people not to mix
+         * buffered and O_DIRECT writes, but expecting them to
+         * understand that file extension is also an implicit buffered
+         * write is too much.  By getting the PR we force writeback of
+         * the buffered zeroing before proceeding.
+         */
+        ret = ocfs2_data_lock(inode, 0);
+        if (ret < 0) {
+                mlog_errno(ret);
+                goto out;
+        }
+        ocfs2_data_unlock(inode, 0);
        ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
                                            inode->i_sb->s_bdev, iov, offset,
                                            nr_segs, 
                                            ocfs2_direct_IO_get_blocks,
                                            ocfs2_dio_end_io);
+out:
        mlog_exit(ret);
        return ret;
 }
author	Mark Fasheh <mark.fasheh@oracle.com>	2006-05-05 22:04:03 -0400
committer	Mark Fasheh <mark.fasheh@oracle.com>	2006-05-17 17:38:47 -0400
commit	53013cba4118a5cfe8f7c7ea5e5bc1c48b160f76 (patch)
tree	5170ed12fbe07b5e8557e61952aa27c25034bd7a /fs/ocfs2/aops.c
parent	0c056c50a6218e0e577817c16ba8851af593d742 (diff)

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0d858d0b25be..47152bf9a7f2 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c
@@ -276,13 +276,29 @@ static int ocfs2_writepage(struct page page, struct writeback_control wbc)
276	return ret;	276	return ret;
277	}	277	}
278		278
		279	/* This can also be called from ocfs2_write_zero_page() which has done
		280	* it's own cluster locking. */
		281	int ocfs2_prepare_write_nolock(struct inode inode, struct page page,
		282	unsigned from, unsigned to)
		283	{
		284	int ret;
		285
		286	down_read(&OCFS2_I(inode)->ip_alloc_sem);
		287
		288	ret = block_prepare_write(page, from, to, ocfs2_get_block);
		289
		290	up_read(&OCFS2_I(inode)->ip_alloc_sem);
		291
		292	return ret;
		293	}
		294
279	/*	295	/*
280	* ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called	296	* ocfs2_prepare_write() can be an outer-most ocfs2 call when it is called
281	* from loopback. It must be able to perform its own locking around	297	* from loopback. It must be able to perform its own locking around
282	* ocfs2_get_block().	298	* ocfs2_get_block().
283	*/	299	*/
284	int ocfs2_prepare_write(struct file file, struct page page,	300	static int ocfs2_prepare_write(struct file file, struct page page,
285	unsigned from, unsigned to)	301	unsigned from, unsigned to)
286	{	302	{
287	struct inode *inode = page->mapping->host;	303	struct inode *inode = page->mapping->host;
288	int ret;	304	int ret;
@@ -295,11 +311,7 @@ int ocfs2_prepare_write(struct file file, struct page page,
295	goto out;	311	goto out;
296	}	312	}
297		313
298	down_read(&OCFS2_I(inode)->ip_alloc_sem);	314	ret = ocfs2_prepare_write_nolock(inode, page, from, to);
299
300	ret = block_prepare_write(page, from, to, ocfs2_get_block);
301
302	up_read(&OCFS2_I(inode)->ip_alloc_sem);
303		315
304	ocfs2_meta_unlock(inode, 0);	316	ocfs2_meta_unlock(inode, 0);
305	out:	317	out:
@@ -625,11 +637,31 @@ static ssize_t ocfs2_direct_IO(int rw,
625	int ret;	637	int ret;
626		638
627	mlog_entry_void();	639	mlog_entry_void();
		640
		641	/*
		642	* We get PR data locks even for O_DIRECT. This allows
		643	* concurrent O_DIRECT I/O but doesn't let O_DIRECT with
		644	* extending and buffered zeroing writes race. If they did
		645	* race then the buffered zeroing could be written back after
		646	* the O_DIRECT I/O. It's one thing to tell people not to mix
		647	* buffered and O_DIRECT writes, but expecting them to
		648	* understand that file extension is also an implicit buffered
		649	* write is too much. By getting the PR we force writeback of
		650	* the buffered zeroing before proceeding.
		651	*/
		652	ret = ocfs2_data_lock(inode, 0);
		653	if (ret < 0) {
		654	mlog_errno(ret);
		655	goto out;
		656	}
		657	ocfs2_data_unlock(inode, 0);
		658
628	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,	659	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
629	inode->i_sb->s_bdev, iov, offset,	660	inode->i_sb->s_bdev, iov, offset,
630	nr_segs,	661	nr_segs,
631	ocfs2_direct_IO_get_blocks,	662	ocfs2_direct_IO_get_blocks,
632	ocfs2_dio_end_io);	663	ocfs2_dio_end_io);
		664	out:
633	mlog_exit(ret);	665	mlog_exit(ret);
634	return ret;	666	return ret;
635	}	667	}