[GFS2] Put back O_DIRECT support

This patch adds back O_DIRECT support with various caveats attached: 1. Journaled data can be read via O_DIRECT since its now the same on disk format as normal data files. 2. Journaled data writes with O_DIRECT will be failed sliently back to normal writes (should we really do this I wonder or should we return an error instead?) 3. Stuffed files will be failed back to normal buffered I/O 4. All the usual corner cases (write beyond current end of file, write to an unallocated block) will also revert to normal buffered I/O. The I/O path is slightly odd as reads arrive at the page cache layer with the lock for the file already held, but writes arrive unlocked. Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
author: Steven Whitehouse <swhiteho@redhat.com> 2006-02-14 06:54:42 -0500
committer: Steven Whitehouse <swhiteho@redhat.com> 2006-02-14 06:54:42 -0500
commit: d1665e414297c3a46fd80cb8242ad0c8e82acae7 (patch)
tree: 7cb19fc4cbfc21d6d890dd3b373d3854920862db
parent: fc69d0d336214219abb521d8ff060f786d7f369e (diff)
2 files changed, 75 insertions, 26 deletions
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index b14357e89421..74706f352780 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -14,6 +14,7 @@
 #include <linux/buffer_head.h>
 #include <linux/pagemap.h>
 #include <linux/mpage.h>
+#include <linux/fs.h>
 #include <asm/semaphore.h>
 #include "gfs2.h"
@@ -555,30 +556,73 @@ static int gfs2_invalidatepage(struct page *page, unsigned long offset)
        return ret;
 }
-static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+static ssize_t gfs2_direct_IO_write(struct kiocb *iocb, const struct iovec *iov,
-                          loff_t offset, unsigned long nr_segs)
+                                    loff_t offset, unsigned long nr_segs)
+{
+        struct file *file = iocb->ki_filp;
+        struct inode *inode = file->f_mapping->host;
+        struct gfs2_inode *ip = get_v2ip(inode);
+        struct gfs2_holder gh;
+        int rv;
+        /*
+         * Shared lock, even though its write, since we do no allocation
+         * on this path. All we need change is atime.
+         */
+        gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
+        rv = gfs2_glock_nq_m_atime(1, &gh);
+        if (rv)
+                goto out;
+        /*
+         * Should we return an error here? I can't see that O_DIRECT for
+         * a journaled file makes any sense. For now we'll silently fall
+         * back to buffered I/O, likewise we do the same for stuffed
+         * files since they are (a) small and (b) unaligned.
+         */
+        if (gfs2_is_jdata(ip))
+                goto out;
+        if (gfs2_is_stuffed(ip))
+                goto out;
+        rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
+                                  iov, offset, nr_segs, get_blocks_noalloc,
+                                  NULL, DIO_OWN_LOCKING);
+out:
+        gfs2_glock_dq_m(1, &gh);
+        gfs2_holder_uninit(&gh);
+        return rv;
+}
+/**
+ * gfs2_direct_IO
+ *
+ * This is called with a shared lock already held for the read path.
+ * Currently, no locks are held when the write path is called.
+ */
+static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
+                              const struct iovec *iov, loff_t offset,
+                              unsigned long nr_segs)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file->f_mapping->host;
        struct gfs2_inode *ip = get_v2ip(inode);
        struct gfs2_sbd *sdp = ip->i_sbd;
-        get_blocks_t *gb = get_blocks;
        atomic_inc(&sdp->sd_ops_address);
-        if (gfs2_is_jdata(ip))
+        if (rw == WRITE)
-                return -EINVAL;
+                return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
-        if (rw == WRITE) {
+        if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
-                return -EOPNOTSUPP; /* for now */
+            gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
-        } else {
+                return -EINVAL;
-                if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
-                    gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
-                        return -EINVAL;
-        }
-        return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+        return __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
-                                  offset, nr_segs, gb, NULL);
+                                    offset, nr_segs, get_blocks, NULL,
+                                    DIO_OWN_LOCKING);
 }
 struct address_space_operations gfs2_file_aops = {
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index 56820b39a993..bcde7a0b76f1 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -176,16 +176,16 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
                 * If any segment has a negative length, or the cumulative
                 * length ever wraps negative then return -EINVAL.
                 */
-        count += iv->iov_len;
+                count += iv->iov_len;
-        if (unlikely((ssize_t)(count|iv->iov_len) < 0))
+                if (unlikely((ssize_t)(count|iv->iov_len) < 0))
-                return -EINVAL;
+                        return -EINVAL;
-        if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
+                if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
-                continue;
+                        continue;
-        if (seg == 0)
+                if (seg == 0)
-                return -EFAULT;
+                        return -EFAULT;
-        nr_segs = seg;
+                nr_segs = seg;
-        count -= iv->iov_len;   /* This segment is no good */
+                count -= iv->iov_len;   /* This segment is no good */
-        break;
+                break;
        }
        /* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
@@ -204,10 +204,14 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
                retval = gfs2_glock_nq_m_atime(1, &gh);
                if (retval)
                        goto out;
+                if (gfs2_is_stuffed(ip)) {
+                        gfs2_glock_dq_m(1, &gh);
+                        gfs2_holder_uninit(&gh);
+                        goto fallback_to_normal;
+                }
                size = i_size_read(inode);
                if (pos < size) {
-                         retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
+                        retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
                        if (retval > 0 && !is_sync_kiocb(iocb))
                                retval = -EIOCBQUEUED;
                        if (retval > 0)
@@ -219,6 +223,7 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
                goto out;
        }
+fallback_to_normal:
        retval = 0;
        if (count) {
                for (seg = 0; seg < nr_segs; seg++) {
author	Steven Whitehouse <swhiteho@redhat.com>	2006-02-14 06:54:42 -0500
committer	Steven Whitehouse <swhiteho@redhat.com>	2006-02-14 06:54:42 -0500
commit	d1665e414297c3a46fd80cb8242ad0c8e82acae7 (patch)
tree	7cb19fc4cbfc21d6d890dd3b373d3854920862db
parent	fc69d0d336214219abb521d8ff060f786d7f369e (diff)

diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index b14357e89421..74706f352780 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c
@@ -14,6 +14,7 @@
14	#include <linux/buffer_head.h>	14	#include <linux/buffer_head.h>
15	#include <linux/pagemap.h>	15	#include <linux/pagemap.h>
16	#include <linux/mpage.h>	16	#include <linux/mpage.h>
		17	#include <linux/fs.h>
17	#include <asm/semaphore.h>	18	#include <asm/semaphore.h>
18		19
19	#include "gfs2.h"	20	#include "gfs2.h"
@@ -555,30 +556,73 @@ static int gfs2_invalidatepage(struct page *page, unsigned long offset)
555	return ret;	556	return ret;
556	}	557	}
557		558
558	static ssize_t gfs2_direct_IO(int rw, struct kiocb iocb, const struct iovec iov,	559	static ssize_t gfs2_direct_IO_write(struct kiocb iocb, const struct iovec iov,
559	loff_t offset, unsigned long nr_segs)	560	loff_t offset, unsigned long nr_segs)
		561	{
		562	struct file *file = iocb->ki_filp;
		563	struct inode *inode = file->f_mapping->host;
		564	struct gfs2_inode *ip = get_v2ip(inode);
		565	struct gfs2_holder gh;
		566	int rv;
		567
		568	/*
		569	* Shared lock, even though its write, since we do no allocation
		570	* on this path. All we need change is atime.
		571	*/
		572	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
		573	rv = gfs2_glock_nq_m_atime(1, &gh);
		574	if (rv)
		575	goto out;
		576
		577	/*
		578	* Should we return an error here? I can't see that O_DIRECT for
		579	* a journaled file makes any sense. For now we'll silently fall
		580	* back to buffered I/O, likewise we do the same for stuffed
		581	* files since they are (a) small and (b) unaligned.
		582	*/
		583	if (gfs2_is_jdata(ip))
		584	goto out;
		585
		586	if (gfs2_is_stuffed(ip))
		587	goto out;
		588
		589	rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
		590	iov, offset, nr_segs, get_blocks_noalloc,
		591	NULL, DIO_OWN_LOCKING);
		592	out:
		593	gfs2_glock_dq_m(1, &gh);
		594	gfs2_holder_uninit(&gh);
		595
		596	return rv;
		597	}
		598
		599	/**
		600	* gfs2_direct_IO
		601	*
		602	* This is called with a shared lock already held for the read path.
		603	* Currently, no locks are held when the write path is called.
		604	*/
		605	static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
		606	const struct iovec *iov, loff_t offset,
		607	unsigned long nr_segs)
560	{	608	{
561	struct file *file = iocb->ki_filp;	609	struct file *file = iocb->ki_filp;
562	struct inode *inode = file->f_mapping->host;	610	struct inode *inode = file->f_mapping->host;
563	struct gfs2_inode *ip = get_v2ip(inode);	611	struct gfs2_inode *ip = get_v2ip(inode);
564	struct gfs2_sbd *sdp = ip->i_sbd;	612	struct gfs2_sbd *sdp = ip->i_sbd;
565	get_blocks_t *gb = get_blocks;
566		613
567	atomic_inc(&sdp->sd_ops_address);	614	atomic_inc(&sdp->sd_ops_address);
568		615
569	if (gfs2_is_jdata(ip))	616	if (rw == WRITE)
570	return -EINVAL;	617	return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
571		618
572	if (rw == WRITE) {	619	if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) \|\|
573	return -EOPNOTSUPP; /* for now */	620	gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
574	} else {	621	return -EINVAL;
575	if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) \|\|
576	gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
577	return -EINVAL;
578	}
579		622
580	return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,	623	return __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
581	offset, nr_segs, gb, NULL);	624	offset, nr_segs, get_blocks, NULL,
		625	DIO_OWN_LOCKING);
582	}	626	}
583		627
584	struct address_space_operations gfs2_file_aops = {	628	struct address_space_operations gfs2_file_aops = {


diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 56820b39a993..bcde7a0b76f1 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c
@@ -176,16 +176,16 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
176	* If any segment has a negative length, or the cumulative	176	* If any segment has a negative length, or the cumulative
177	* length ever wraps negative then return -EINVAL.	177	* length ever wraps negative then return -EINVAL.
178	*/	178	*/
179	count += iv->iov_len;	179	count += iv->iov_len;
180	if (unlikely((ssize_t)(count\|iv->iov_len) < 0))	180	if (unlikely((ssize_t)(count\|iv->iov_len) < 0))
181	return -EINVAL;	181	return -EINVAL;
182	if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))	182	if (access_ok(VERIFY_WRITE, iv->iov_base, iv->iov_len))
183	continue;	183	continue;
184	if (seg == 0)	184	if (seg == 0)
185	return -EFAULT;	185	return -EFAULT;
186	nr_segs = seg;	186	nr_segs = seg;
187	count -= iv->iov_len; /* This segment is no good */	187	count -= iv->iov_len; /* This segment is no good */
188	break;	188	break;
189	}	189	}
190		190
191	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */	191	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
@@ -204,10 +204,14 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
204	retval = gfs2_glock_nq_m_atime(1, &gh);	204	retval = gfs2_glock_nq_m_atime(1, &gh);
205	if (retval)	205	if (retval)
206	goto out;	206	goto out;
207		207	if (gfs2_is_stuffed(ip)) {
		208	gfs2_glock_dq_m(1, &gh);
		209	gfs2_holder_uninit(&gh);
		210	goto fallback_to_normal;
		211	}
208	size = i_size_read(inode);	212	size = i_size_read(inode);
209	if (pos < size) {	213	if (pos < size) {
210	retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);	214	retval = gfs2_direct_IO_read(iocb, iov, pos, nr_segs);
211	if (retval > 0 && !is_sync_kiocb(iocb))	215	if (retval > 0 && !is_sync_kiocb(iocb))
212	retval = -EIOCBQUEUED;	216	retval = -EIOCBQUEUED;
213	if (retval > 0)	217	if (retval > 0)
@@ -219,6 +223,7 @@ static ssize_t __gfs2_file_aio_read(struct kiocb *iocb,
219	goto out;	223	goto out;
220	}	224	}
221		225
		226	fallback_to_normal:
222	retval = 0;	227	retval = 0;
223	if (count) {	228	if (count) {
224	for (seg = 0; seg < nr_segs; seg++) {	229	for (seg = 0; seg < nr_segs; seg++) {