1 files changed, 79 insertions, 55 deletions
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index d8d69a72a10d..56e33590b656 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -16,6 +16,7 @@
 #include <linux/pagevec.h>
 #include <linux/mpage.h>
 #include <linux/fs.h>
+#include <linux/writeback.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
@@ -157,6 +158,32 @@ out_ignore:
 }
 /**
+ * gfs2_writepages - Write a bunch of dirty pages back to disk
+ * @mapping: The mapping to write
+ * @wbc: Write-back control
+ *
+ * For journaled files and/or ordered writes this just falls back to the
+ * kernel's default writepages path for now. We will probably want to change
+ * that eventually (i.e. when we look at allocate on flush).
+ *
+ * For the data=writeback case though we can already ignore buffer heads
+ * and write whole extents at once. This is a big reduction in the
+ * number of I/O requests we send and the bmap calls we make in this case.
+ */
+static int gfs2_writepages(struct address_space *mapping,
+                           struct writeback_control *wbc)
+{
+        struct inode *inode = mapping->host;
+        struct gfs2_inode *ip = GFS2_I(inode);
+        struct gfs2_sbd *sdp = GFS2_SB(inode);
+        if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
+                return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
+        return generic_writepages(mapping, wbc);
+}
+/**
 * stuffed_readpage - Fill in a Linux page with stuffed file data
 * @ip: the inode
 * @page: the page
@@ -256,7 +283,7 @@ out_unlock:
 *    the page lock and the glock) and return having done no I/O. Its
 *    obviously not something we'd want to do on too regular a basis.
 *    Any I/O we ignore at this time will be done via readpage later.
- * 2. We have to handle stuffed files here too.
+ * 2. We don't handle stuffed files here we let readpage do the honours.
 * 3. mpage_readpages() does most of the heavy lifting in the common case.
 * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
 * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_sbd *sdp = GFS2_SB(inode);
        struct gfs2_holder gh;
-        unsigned page_idx;
+        int ret = 0;
-        int ret;
        int do_unlock = 0;
        if (likely(file != &gfs2_internal_file_sentinel)) {
@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
                        goto out_unlock;
        }
 skip_lock:
-        if (gfs2_is_stuffed(ip)) {
+        if (!gfs2_is_stuffed(ip))
-                struct pagevec lru_pvec;
-                pagevec_init(&lru_pvec, 0);
-                for (page_idx = 0; page_idx < nr_pages; page_idx++) {
-                        struct page *page = list_entry(pages->prev, struct page, lru);
-                        prefetchw(&page->flags);
-                        list_del(&page->lru);
-                        if (!add_to_page_cache(page, mapping,
-                                               page->index, GFP_KERNEL)) {
-                                ret = stuffed_readpage(ip, page);
-                                unlock_page(page);
-                                if (!pagevec_add(&lru_pvec, page))
-                                         __pagevec_lru_add(&lru_pvec);
-                        } else {
-                                page_cache_release(page);
-                        }
-                }
-                pagevec_lru_add(&lru_pvec);
-                ret = 0;
-        } else {
-                /* What we really want to do .... */
                ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
-        }
        if (do_unlock) {
                gfs2_glock_dq_m(1, &gh);
@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file *file, struct page *page,
        gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|LM_FLAG_TRY_1CB, &ip->i_gh);
        error = gfs2_glock_nq_atime(&ip->i_gh);
        if (unlikely(error)) {
-                if (error == GLR_TRYFAILED)
+                if (error == GLR_TRYFAILED) {
+                        unlock_page(page);
                        error = AOP_TRUNCATED_PAGE;
+                }
                goto out_uninit;
        }
@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
        return;
 }
+/**
+ * gfs2_ok_for_dio - check that dio is valid on this file
+ * @ip: The inode
+ * @rw: READ or WRITE
+ * @offset: The offset at which we are reading or writing
+ *
+ * Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
+ *          1 (to accept the i/o request)
+ */
+static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
+{
+        /*
+         * Should we return an error here? I can't see that O_DIRECT for
+         * a journaled file makes any sense. For now we'll silently fall
+         * back to buffered I/O, likewise we do the same for stuffed
+         * files since they are (a) small and (b) unaligned.
+         */
+        if (gfs2_is_jdata(ip))
+                return 0;
+        if (gfs2_is_stuffed(ip))
+                return 0;
+        if (offset > i_size_read(&ip->i_inode))
+                return 0;
+        return 1;
+}
 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
                              const struct iovec *iov, loff_t offset,
                              unsigned long nr_segs)
@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
        struct gfs2_holder gh;
        int rv;
-        if (rw == READ)
-                mutex_lock(&inode->i_mutex);
        /*
-         * Shared lock, even if its a write, since we do no allocation
+         * Deferred lock, even if its a write, since we do no allocation
-         * on this path. All we need change is atime.
+         * on this path. All we need change is atime, and this lock mode
+         * ensures that other nodes have flushed their buffered read caches
+         * (i.e. their page cache entries for this inode). We do not,
+         * unfortunately have the option of only flushing a range like
+         * the VFS does.
         */
-        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+        gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
        rv = gfs2_glock_nq_atime(&gh);
        if (rv)
-                goto out;
+                return rv;
+        rv = gfs2_ok_for_dio(ip, rw, offset);
-        if (offset > i_size_read(inode))
+        if (rv != 1)
-                goto out;
+                goto out; /* dio not valid, fall back to buffered i/o */
-        /*
+        rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
-         * Should we return an error here? I can't see that O_DIRECT for
+                                           iov, offset, nr_segs,
-         * a journaled file makes any sense. For now we'll silently fall
+                                           gfs2_get_block_direct, NULL);
-         * back to buffered I/O, likewise we do the same for stuffed
-         * files since they are (a) small and (b) unaligned.
-         */
-        if (gfs2_is_jdata(ip))
-                goto out;
-        if (gfs2_is_stuffed(ip))
-                goto out;
-        rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
-                                            inode->i_sb->s_bdev,
-                                            iov, offset, nr_segs,
-                                            gfs2_get_block_direct, NULL);
 out:
        gfs2_glock_dq_m(1, &gh);
        gfs2_holder_uninit(&gh);
-        if (rw == READ)
-                mutex_unlock(&inode->i_mutex);
        return rv;
 }
@@ -763,6 +786,7 @@ out:
 const struct address_space_operations gfs2_file_aops = {
        .writepage = gfs2_writepage,
+        .writepages = gfs2_writepages,
        .readpage = gfs2_readpage,
        .readpages = gfs2_readpages,
        .sync_page = block_sync_page,

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index d8d69a72a10d..56e33590b656 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c
@@ -16,6 +16,7 @@
16	#include <linux/pagevec.h>	16	#include <linux/pagevec.h>
17	#include <linux/mpage.h>	17	#include <linux/mpage.h>
18	#include <linux/fs.h>	18	#include <linux/fs.h>
		19	#include <linux/writeback.h>
19	#include <linux/gfs2_ondisk.h>	20	#include <linux/gfs2_ondisk.h>
20	#include <linux/lm_interface.h>	21	#include <linux/lm_interface.h>
21		22
@@ -157,6 +158,32 @@ out_ignore:
157	}	158	}
158		159
159	/**	160	/**
		161	* gfs2_writepages - Write a bunch of dirty pages back to disk
		162	* @mapping: The mapping to write
		163	* @wbc: Write-back control
		164	*
		165	* For journaled files and/or ordered writes this just falls back to the
		166	* kernel's default writepages path for now. We will probably want to change
		167	* that eventually (i.e. when we look at allocate on flush).
		168	*
		169	* For the data=writeback case though we can already ignore buffer heads
		170	* and write whole extents at once. This is a big reduction in the
		171	* number of I/O requests we send and the bmap calls we make in this case.
		172	*/
		173	static int gfs2_writepages(struct address_space *mapping,
		174	struct writeback_control *wbc)
		175	{
		176	struct inode *inode = mapping->host;
		177	struct gfs2_inode *ip = GFS2_I(inode);
		178	struct gfs2_sbd *sdp = GFS2_SB(inode);
		179
		180	if (sdp->sd_args.ar_data == GFS2_DATA_WRITEBACK && !gfs2_is_jdata(ip))
		181	return mpage_writepages(mapping, wbc, gfs2_get_block_noalloc);
		182
		183	return generic_writepages(mapping, wbc);
		184	}
		185
		186	/**
160	* stuffed_readpage - Fill in a Linux page with stuffed file data	187	* stuffed_readpage - Fill in a Linux page with stuffed file data
161	* @ip: the inode	188	* @ip: the inode
162	* @page: the page	189	* @page: the page
@@ -256,7 +283,7 @@ out_unlock:
256	* the page lock and the glock) and return having done no I/O. Its	283	* the page lock and the glock) and return having done no I/O. Its
257	* obviously not something we'd want to do on too regular a basis.	284	* obviously not something we'd want to do on too regular a basis.
258	* Any I/O we ignore at this time will be done via readpage later.	285	* Any I/O we ignore at this time will be done via readpage later.
259	* 2. We have to handle stuffed files here too.	286	* 2. We don't handle stuffed files here we let readpage do the honours.
260	* 3. mpage_readpages() does most of the heavy lifting in the common case.	287	* 3. mpage_readpages() does most of the heavy lifting in the common case.
261	* 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.	288	* 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
262	* 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as	289	* 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
@@ -269,8 +296,7 @@ static int gfs2_readpages(struct file file, struct address_space mapping,
269	struct gfs2_inode *ip = GFS2_I(inode);	296	struct gfs2_inode *ip = GFS2_I(inode);
270	struct gfs2_sbd *sdp = GFS2_SB(inode);	297	struct gfs2_sbd *sdp = GFS2_SB(inode);
271	struct gfs2_holder gh;	298	struct gfs2_holder gh;
272	unsigned page_idx;	299	int ret = 0;
273	int ret;
274	int do_unlock = 0;	300	int do_unlock = 0;
275		301
276	if (likely(file != &gfs2_internal_file_sentinel)) {	302	if (likely(file != &gfs2_internal_file_sentinel)) {
@@ -289,29 +315,8 @@ static int gfs2_readpages(struct file file, struct address_space mapping,
289	goto out_unlock;	315	goto out_unlock;
290	}	316	}
291	skip_lock:	317	skip_lock:
292	if (gfs2_is_stuffed(ip)) {	318	if (!gfs2_is_stuffed(ip))
293	struct pagevec lru_pvec;
294	pagevec_init(&lru_pvec, 0);
295	for (page_idx = 0; page_idx < nr_pages; page_idx++) {
296	struct page *page = list_entry(pages->prev, struct page, lru);
297	prefetchw(&page->flags);
298	list_del(&page->lru);
299	if (!add_to_page_cache(page, mapping,
300	page->index, GFP_KERNEL)) {
301	ret = stuffed_readpage(ip, page);
302	unlock_page(page);
303	if (!pagevec_add(&lru_pvec, page))
304	__pagevec_lru_add(&lru_pvec);
305	} else {
306	page_cache_release(page);
307	}
308	}
309	pagevec_lru_add(&lru_pvec);
310	ret = 0;
311	} else {
312	/* What we really want to do .... */
313	ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);	319	ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
314	}
315		320
316	if (do_unlock) {	321	if (do_unlock) {
317	gfs2_glock_dq_m(1, &gh);	322	gfs2_glock_dq_m(1, &gh);
@@ -356,8 +361,10 @@ static int gfs2_prepare_write(struct file file, struct page page,
356	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME\|LM_FLAG_TRY_1CB, &ip->i_gh);	361	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME\|LM_FLAG_TRY_1CB, &ip->i_gh);
357	error = gfs2_glock_nq_atime(&ip->i_gh);	362	error = gfs2_glock_nq_atime(&ip->i_gh);
358	if (unlikely(error)) {	363	if (unlikely(error)) {
359	if (error == GLR_TRYFAILED)	364	if (error == GLR_TRYFAILED) {
		365	unlock_page(page);
360	error = AOP_TRUNCATED_PAGE;	366	error = AOP_TRUNCATED_PAGE;
		367	}
361	goto out_uninit;	368	goto out_uninit;
362	}	369	}
363		370
@@ -594,6 +601,36 @@ static void gfs2_invalidatepage(struct page *page, unsigned long offset)
594	return;	601	return;
595	}	602	}
596		603
		604	/**
		605	* gfs2_ok_for_dio - check that dio is valid on this file
		606	* @ip: The inode
		607	* @rw: READ or WRITE
		608	* @offset: The offset at which we are reading or writing
		609	*
		610	* Returns: 0 (to ignore the i/o request and thus fall back to buffered i/o)
		611	* 1 (to accept the i/o request)
		612	*/
		613	static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset)
		614	{
		615	/*
		616	* Should we return an error here? I can't see that O_DIRECT for
		617	* a journaled file makes any sense. For now we'll silently fall
		618	* back to buffered I/O, likewise we do the same for stuffed
		619	* files since they are (a) small and (b) unaligned.
		620	*/
		621	if (gfs2_is_jdata(ip))
		622	return 0;
		623
		624	if (gfs2_is_stuffed(ip))
		625	return 0;
		626
		627	if (offset > i_size_read(&ip->i_inode))
		628	return 0;
		629	return 1;
		630	}
		631
		632
		633
597	static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,	634	static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
598	const struct iovec *iov, loff_t offset,	635	const struct iovec *iov, loff_t offset,
599	unsigned long nr_segs)	636	unsigned long nr_segs)
@@ -604,42 +641,28 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
604	struct gfs2_holder gh;	641	struct gfs2_holder gh;
605	int rv;	642	int rv;
606		643
607	if (rw == READ)
608	mutex_lock(&inode->i_mutex);
609	/*	644	/*
610	* Shared lock, even if its a write, since we do no allocation	645	* Deferred lock, even if its a write, since we do no allocation
611	* on this path. All we need change is atime.	646	* on this path. All we need change is atime, and this lock mode
		647	* ensures that other nodes have flushed their buffered read caches
		648	* (i.e. their page cache entries for this inode). We do not,
		649	* unfortunately have the option of only flushing a range like
		650	* the VFS does.
612	*/	651	*/
613	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);	652	gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh);
614	rv = gfs2_glock_nq_atime(&gh);	653	rv = gfs2_glock_nq_atime(&gh);
615	if (rv)	654	if (rv)
616	goto out;	655	return rv;
617		656	rv = gfs2_ok_for_dio(ip, rw, offset);
618	if (offset > i_size_read(inode))	657	if (rv != 1)
619	goto out;	658	goto out; /* dio not valid, fall back to buffered i/o */
620		659
621	/*	660	rv = blockdev_direct_IO_no_locking(rw, iocb, inode, inode->i_sb->s_bdev,
622	* Should we return an error here? I can't see that O_DIRECT for	661	iov, offset, nr_segs,
623	* a journaled file makes any sense. For now we'll silently fall	662	gfs2_get_block_direct, NULL);
624	* back to buffered I/O, likewise we do the same for stuffed
625	* files since they are (a) small and (b) unaligned.
626	*/
627	if (gfs2_is_jdata(ip))
628	goto out;
629
630	if (gfs2_is_stuffed(ip))
631	goto out;
632
633	rv = blockdev_direct_IO_own_locking(rw, iocb, inode,
634	inode->i_sb->s_bdev,
635	iov, offset, nr_segs,
636	gfs2_get_block_direct, NULL);
637	out:	663	out:
638	gfs2_glock_dq_m(1, &gh);	664	gfs2_glock_dq_m(1, &gh);
639	gfs2_holder_uninit(&gh);	665	gfs2_holder_uninit(&gh);
640	if (rw == READ)
641	mutex_unlock(&inode->i_mutex);
642
643	return rv;	666	return rv;
644	}	667	}
645		668
@@ -763,6 +786,7 @@ out:
763		786
764	const struct address_space_operations gfs2_file_aops = {	787	const struct address_space_operations gfs2_file_aops = {
765	.writepage = gfs2_writepage,	788	.writepage = gfs2_writepage,
		789	.writepages = gfs2_writepages,
766	.readpage = gfs2_readpage,	790	.readpage = gfs2_readpage,
767	.readpages = gfs2_readpages,	791	.readpages = gfs2_readpages,
768	.sync_page = block_sync_page,	792	.sync_page = block_sync_page,