cleanup blockdev_direct_IO locking

Currently the locking in blockdev_direct_IO is a mess, we have three different locking types and very confusing checks for some of them. The most complicated one is DIO_OWN_LOCKING for reads, which happens to not actually be used. This patch gets rid of the DIO_OWN_LOCKING - as mentioned above the read case is unused anyway, and the write side is almost identical to DIO_NO_LOCKING. The difference is that DIO_NO_LOCKING always sets the create argument for the get_blocks callback to zero, but we can easily move that to the actual get_blocks callbacks. There are four users of the DIO_NO_LOCKING mode: gfs already ignores the create argument and thus is fine with the new version, ocfs2 only errors out if create were ever set, and we can remove this dead code now, the block device code only ever uses create for an error message if we are fully beyond the device which can never happen, and last but not least XFS will need the new behavour for writes. Now we can replace the lock_type variable with a flags one, where no flag means the DIO_NO_LOCKING behaviour and DIO_LOCKING is kept as the first flag. Separate out the check for not allowing to fill holes into a separate flag, although for now both flags always get set at the same time. Also revamp the documentation of the locking scheme to actually make sense. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
author: Christoph Hellwig <hch@lst.de> 2009-11-03 10:44:53 -0500
committer: Al Viro <viro@zeniv.linux.org.uk> 2009-12-16 12:16:49 -0500
commit: 1e431f5ce78f3ae8254d725060288b78ff74f086 (patch)
tree: a144fd7b6120ec61958c82023b25620a18aa3d6d /fs
parent: 1c7c474c31aea6d5cb2fb35f31d9e9e91ae466b1 (diff)
3 files changed, 63 insertions, 120 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b912270942fa..7dde0df8e8b6 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -53,13 +53,6 @@
 *
 * If blkfactor is zero then the user's request was aligned to the filesystem's
 * blocksize.
- *
- * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
- * This determines whether we need to do the fancy locking which prevents
- * direct-IO from being able to read uninitialised disk blocks.  If its zero
- * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
- * not held for the entire direct write (taken briefly, initially, during a
- * direct read though, but its never held for the duration of a direct-IO).
 */
 struct dio {
@@ -68,7 +61,7 @@ struct dio {
        struct inode *inode;
        int rw;
        loff_t i_size;                  /* i_size when submitted */
-        int lock_type;                  /* doesn't change */
+        int flags;                      /* doesn't change */
        unsigned blkbits;               /* doesn't change */
        unsigned blkfactor;             /* When we're using an alignment which
                                           is finer than the filesystem's soft
@@ -240,7 +233,8 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
        if (dio->end_io && dio->result)
                dio->end_io(dio->iocb, offset, transferred,
                            dio->map_bh.b_private);
-        if (dio->lock_type == DIO_LOCKING)
+        if (dio->flags & DIO_LOCKING)
                /* lockdep: non-owner release */
                up_read_non_owner(&dio->inode->i_alloc_sem);
@@ -515,21 +509,24 @@ static int get_more_blocks(struct dio *dio)
                map_bh->b_state = 0;
                map_bh->b_size = fs_count << dio->inode->i_blkbits;
+                /*
+                 * For writes inside i_size on a DIO_SKIP_HOLES filesystem we
+                 * forbid block creations: only overwrites are permitted.
+                 * We will return early to the caller once we see an
+                 * unmapped buffer head returned, and the caller will fall
+                 * back to buffered I/O.
+                 *
+                 * Otherwise the decision is left to the get_blocks method,
+                 * which may decide to handle it or also return an unmapped
+                 * buffer head.
+                 */
                create = dio->rw & WRITE;
-                if (dio->lock_type == DIO_LOCKING) {
+                if (dio->flags & DIO_SKIP_HOLES) {
                        if (dio->block_in_file < (i_size_read(dio->inode) >>
                                                        dio->blkbits))
                                create = 0;
-                } else if (dio->lock_type == DIO_NO_LOCKING) {
-                        create = 0;
                }
-                /*
-                 * For writes inside i_size we forbid block creations: only
-                 * overwrites are permitted.  We fall back to buffered writes
-                 * at a higher level for inside-i_size block-instantiating
-                 * writes.
-                 */
                ret = (*dio->get_block)(dio->inode, fs_startblk,
                                                map_bh, create);
        }
@@ -1039,7 +1036,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
         * we can let i_mutex go now that its achieved its purpose
         * of protecting us from looking up uninitialized blocks.
         */
-        if ((rw == READ) && (dio->lock_type == DIO_LOCKING))
+        if (rw == READ && (dio->flags & DIO_LOCKING))
                mutex_unlock(&dio->inode->i_mutex);
        /*
@@ -1086,30 +1083,28 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
 /*
 * This is a library function for use by filesystem drivers.
- * The locking rules are governed by the dio_lock_type parameter.
 *
- * DIO_NO_LOCKING (no locking, for raw block device access)
+ * The locking rules are governed by the flags parameter:
- * For writes, i_mutex is not held on entry; it is never taken.
+ *  - if the flags value contains DIO_LOCKING we use a fancy locking
+ *    scheme for dumb filesystems.
+ *    For writes this function is called under i_mutex and returns with
+ *    i_mutex held, for reads, i_mutex is not held on entry, but it is
+ *    taken and dropped again before returning.
+ *    For reads and writes i_alloc_sem is taken in shared mode and released
+ *    on I/O completion (which may happen asynchronously after returning to
+ *    the caller).
 *
- * DIO_LOCKING (simple locking for regular files)
+ *  - if the flags value does NOT contain DIO_LOCKING we don't use any
- * For writes we are called under i_mutex and return with i_mutex held, even
+ *    internal locking but rather rely on the filesystem to synchronize
- * though it is internally dropped.
+ *    direct I/O reads/writes versus each other and truncate.
- * For reads, i_mutex is not held on entry, but it is taken and dropped before
+ *    For reads and writes both i_mutex and i_alloc_sem are not held on
- * returning.
+ *    entry and are never taken.
- *
- * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
- *      uninitialised data, allowing parallel direct readers and writers)
- * For writes we are called without i_mutex, return without it, never touch it.
- * For reads we are called under i_mutex and return with i_mutex held, even
- * though it may be internally dropped.
- *
- * Additional i_alloc_sem locking requirements described inline below.
 */
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        struct block_device *bdev, const struct iovec *iov, loff_t offset, 
        unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-        int dio_lock_type)
+        int flags)
 {
        int seg;
        size_t size;
@@ -1120,8 +1115,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        ssize_t retval = -EINVAL;
        loff_t end = offset;
        struct dio *dio;
-        int release_i_mutex = 0;
-        int acquire_i_mutex = 0;
        if (rw & WRITE)
                rw = WRITE_ODIRECT_PLUG;
@@ -1156,43 +1149,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        if (!dio)
                goto out;
-        /*
+        dio->flags = flags;
-         * For block device access DIO_NO_LOCKING is used,
+        if (dio->flags & DIO_LOCKING) {
-         *      neither readers nor writers do any locking at all
-         * For regular files using DIO_LOCKING,
-         *      readers need to grab i_mutex and i_alloc_sem
-         *      writers need to grab i_alloc_sem only (i_mutex is already held)
-         * For regular files using DIO_OWN_LOCKING,
-         *      neither readers nor writers take any locks here
-         */
-        dio->lock_type = dio_lock_type;
-        if (dio_lock_type != DIO_NO_LOCKING) {
                /* watch out for a 0 len io from a tricksy fs */
                if (rw == READ && end > offset) {
-                        struct address_space *mapping;
+                        struct address_space *mapping =
+                                        iocb->ki_filp->f_mapping;
-                        mapping = iocb->ki_filp->f_mapping;
+                        /* will be released by direct_io_worker */
-                        if (dio_lock_type != DIO_OWN_LOCKING) {
+                        mutex_lock(&inode->i_mutex);
-                                mutex_lock(&inode->i_mutex);
-                                release_i_mutex = 1;
-                        }
                        retval = filemap_write_and_wait_range(mapping, offset,
                                                              end - 1);
                        if (retval) {
+                                mutex_unlock(&inode->i_mutex);
                                kfree(dio);
                                goto out;
                        }
-                        if (dio_lock_type == DIO_OWN_LOCKING) {
-                                mutex_unlock(&inode->i_mutex);
-                                acquire_i_mutex = 1;
-                        }
                }
-                if (dio_lock_type == DIO_LOCKING)
+                /*
-                        /* lockdep: not the owner will release it */
+                 * Will be released at I/O completion, possibly in a
-                        down_read_non_owner(&inode->i_alloc_sem);
+                 * different thread.
+                 */
+                down_read_non_owner(&inode->i_alloc_sem);
        }
        /*
@@ -1210,24 +1190,19 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
        /*
         * In case of error extending write may have instantiated a few
         * blocks outside i_size. Trim these off again for DIO_LOCKING.
-         * NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by
+         *
-         * it's own meaner.
+         * NOTE: filesystems with their own locking have to handle this
+         * on their own.
         */
-        if (unlikely(retval < 0 && (rw & WRITE))) {
+        if (dio->flags & DIO_LOCKING) {
-                loff_t isize = i_size_read(inode);
+                if (unlikely((rw & WRITE) && retval < 0)) {
+                        loff_t isize = i_size_read(inode);
-                if (end > isize && dio_lock_type == DIO_LOCKING)
+                        if (end > isize )
-                        vmtruncate(inode, isize);
+                                vmtruncate(inode, isize);
+                }
        }
-        if (rw == READ && dio_lock_type == DIO_LOCKING)
-                release_i_mutex = 0;
 out:
-        if (release_i_mutex)
-                mutex_unlock(&inode->i_mutex);
-        else if (acquire_i_mutex)
-                mutex_lock(&inode->i_mutex);
        return retval;
 }
 EXPORT_SYMBOL(__blockdev_direct_IO);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index deb2b132ae5e..3dae4a13f6e4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -547,6 +547,9 @@ bail:
 *
 * called like this: dio->get_blocks(dio->inode, fs_startblk,
 *                                      fs_count, map_bh, dio->rw == WRITE);
+ *
+ * Note that we never bother to allocate blocks here, and thus ignore the
+ * create argument.
 */
 static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
                                     struct buffer_head *bh_result, int create)
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
        inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
-        /*
-         * Any write past EOF is not allowed because we'd be extending.
-         */
-        if (create && (iblock + max_blocks) > inode_blocks) {
-                ret = -EIO;
-                goto bail;
-        }
        /* This figures out the size of the next contiguous block, and
         * our logical offset */
        ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
@@ -582,15 +577,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
                goto bail;
        }
-        if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
-                ocfs2_error(inode->i_sb,
-                            "Inode %llu has a hole at block %llu\n",
-                            (unsigned long long)OCFS2_I(inode)->ip_blkno,
-                            (unsigned long long)iblock);
-                ret = -EROFS;
-                goto bail;
-        }
        /* We should already CoW the refcounted extent. */
        BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
        /*
@@ -601,20 +587,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
         */
        if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
                map_bh(bh_result, inode->i_sb, p_blkno);
-        else {
+        else
-                /*
-                 * ocfs2_prepare_inode_for_write() should have caught
-                 * the case where we'd be filling a hole and triggered
-                 * a buffered write instead.
-                 */
-                if (create) {
-                        ret = -EIO;
-                        mlog_errno(ret);
-                        goto bail;
-                }
                clear_buffer_mapped(bh_result);
-        }
        /* make sure we don't map more than max_blocks blocks here as
           that's all the kernel will handle at this point. */
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index d798c54296eb..66abe36c1213 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1474,19 +1474,13 @@ xfs_vm_direct_IO(
        bdev = xfs_find_bdev_for_inode(XFS_I(inode));
-        if (rw == WRITE) {
+        iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
-                iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);
+                                        IOMAP_UNWRITTEN : IOMAP_READ);
-                ret = blockdev_direct_IO_own_locking(rw, iocb, inode,
-                        bdev, iov, offset, nr_segs,
+        ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
-                        xfs_get_blocks_direct,
+                                            offset, nr_segs,
-                        xfs_end_io_direct);
+                                            xfs_get_blocks_direct,
-        } else {
+                                            xfs_end_io_direct);
-                iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
-                ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
-                        bdev, iov, offset, nr_segs,
-                        xfs_get_blocks_direct,
-                        xfs_end_io_direct);
-        }
        if (unlikely(ret != -EIOCBQUEUED && iocb->private))
                xfs_destroy_ioend(iocb->private);
author	Christoph Hellwig <hch@lst.de>	2009-11-03 10:44:53 -0500
committer	Al Viro <viro@zeniv.linux.org.uk>	2009-12-16 12:16:49 -0500
commit	1e431f5ce78f3ae8254d725060288b78ff74f086 (patch)
tree	a144fd7b6120ec61958c82023b25620a18aa3d6d /fs
parent	1c7c474c31aea6d5cb2fb35f31d9e9e91ae466b1 (diff)

diff --git a/fs/direct-io.c b/fs/direct-io.c index b912270942fa..7dde0df8e8b6 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c
@@ -53,13 +53,6 @@
53	*	53	*
54	* If blkfactor is zero then the user's request was aligned to the filesystem's	54	* If blkfactor is zero then the user's request was aligned to the filesystem's
55	* blocksize.	55	* blocksize.
56	*
57	* lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
58	* This determines whether we need to do the fancy locking which prevents
59	* direct-IO from being able to read uninitialised disk blocks. If its zero
60	* (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
61	* not held for the entire direct write (taken briefly, initially, during a
62	* direct read though, but its never held for the duration of a direct-IO).
63	*/	56	*/
64		57
65	struct dio {	58	struct dio {
@@ -68,7 +61,7 @@ struct dio {
68	struct inode *inode;	61	struct inode *inode;
69	int rw;	62	int rw;
70	loff_t i_size; /* i_size when submitted */	63	loff_t i_size; /* i_size when submitted */
71	int lock_type; /* doesn't change */	64	int flags; /* doesn't change */
72	unsigned blkbits; /* doesn't change */	65	unsigned blkbits; /* doesn't change */
73	unsigned blkfactor; /* When we're using an alignment which	66	unsigned blkfactor; /* When we're using an alignment which
74	is finer than the filesystem's soft	67	is finer than the filesystem's soft
@@ -240,7 +233,8 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
240	if (dio->end_io && dio->result)	233	if (dio->end_io && dio->result)
241	dio->end_io(dio->iocb, offset, transferred,	234	dio->end_io(dio->iocb, offset, transferred,
242	dio->map_bh.b_private);	235	dio->map_bh.b_private);
243	if (dio->lock_type == DIO_LOCKING)	236
		237	if (dio->flags & DIO_LOCKING)
244	/* lockdep: non-owner release */	238	/* lockdep: non-owner release */
245	up_read_non_owner(&dio->inode->i_alloc_sem);	239	up_read_non_owner(&dio->inode->i_alloc_sem);
246		240
@@ -515,21 +509,24 @@ static int get_more_blocks(struct dio *dio)
515	map_bh->b_state = 0;	509	map_bh->b_state = 0;
516	map_bh->b_size = fs_count << dio->inode->i_blkbits;	510	map_bh->b_size = fs_count << dio->inode->i_blkbits;
517		511
		512	/*
		513	* For writes inside i_size on a DIO_SKIP_HOLES filesystem we
		514	* forbid block creations: only overwrites are permitted.
		515	* We will return early to the caller once we see an
		516	* unmapped buffer head returned, and the caller will fall
		517	* back to buffered I/O.
		518	*
		519	* Otherwise the decision is left to the get_blocks method,
		520	* which may decide to handle it or also return an unmapped
		521	* buffer head.
		522	*/
518	create = dio->rw & WRITE;	523	create = dio->rw & WRITE;
519	if (dio->lock_type == DIO_LOCKING) {	524	if (dio->flags & DIO_SKIP_HOLES) {
520	if (dio->block_in_file < (i_size_read(dio->inode) >>	525	if (dio->block_in_file < (i_size_read(dio->inode) >>
521	dio->blkbits))	526	dio->blkbits))
522	create = 0;	527	create = 0;
523	} else if (dio->lock_type == DIO_NO_LOCKING) {
524	create = 0;
525	}	528	}
526		529
527	/*
528	* For writes inside i_size we forbid block creations: only
529	* overwrites are permitted. We fall back to buffered writes
530	* at a higher level for inside-i_size block-instantiating
531	* writes.
532	*/
533	ret = (*dio->get_block)(dio->inode, fs_startblk,	530	ret = (*dio->get_block)(dio->inode, fs_startblk,
534	map_bh, create);	531	map_bh, create);
535	}	532	}
@@ -1039,7 +1036,7 @@ direct_io_worker(int rw, struct kiocb iocb, struct inode inode,
1039	* we can let i_mutex go now that its achieved its purpose	1036	* we can let i_mutex go now that its achieved its purpose
1040	* of protecting us from looking up uninitialized blocks.	1037	* of protecting us from looking up uninitialized blocks.
1041	*/	1038	*/
1042	if ((rw == READ) && (dio->lock_type == DIO_LOCKING))	1039	if (rw == READ && (dio->flags & DIO_LOCKING))
1043	mutex_unlock(&dio->inode->i_mutex);	1040	mutex_unlock(&dio->inode->i_mutex);
1044		1041
1045	/*	1042	/*
@@ -1086,30 +1083,28 @@ direct_io_worker(int rw, struct kiocb iocb, struct inode inode,
1086		1083
1087	/*	1084	/*
1088	* This is a library function for use by filesystem drivers.	1085	* This is a library function for use by filesystem drivers.
1089	* The locking rules are governed by the dio_lock_type parameter.
1090	*	1086	*
1091	* DIO_NO_LOCKING (no locking, for raw block device access)	1087	* The locking rules are governed by the flags parameter:
1092	* For writes, i_mutex is not held on entry; it is never taken.	1088	* - if the flags value contains DIO_LOCKING we use a fancy locking
		1089	* scheme for dumb filesystems.
		1090	* For writes this function is called under i_mutex and returns with
		1091	* i_mutex held, for reads, i_mutex is not held on entry, but it is
		1092	* taken and dropped again before returning.
		1093	* For reads and writes i_alloc_sem is taken in shared mode and released
		1094	* on I/O completion (which may happen asynchronously after returning to
		1095	* the caller).
1093	*	1096	*
1094	* DIO_LOCKING (simple locking for regular files)	1097	* - if the flags value does NOT contain DIO_LOCKING we don't use any
1095	* For writes we are called under i_mutex and return with i_mutex held, even	1098	* internal locking but rather rely on the filesystem to synchronize
1096	* though it is internally dropped.	1099	* direct I/O reads/writes versus each other and truncate.
1097	* For reads, i_mutex is not held on entry, but it is taken and dropped before	1100	* For reads and writes both i_mutex and i_alloc_sem are not held on
1098	* returning.	1101	* entry and are never taken.
1099	*
1100	* DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
1101	* uninitialised data, allowing parallel direct readers and writers)
1102	* For writes we are called without i_mutex, return without it, never touch it.
1103	* For reads we are called under i_mutex and return with i_mutex held, even
1104	* though it may be internally dropped.
1105	*
1106	* Additional i_alloc_sem locking requirements described inline below.
1107	*/	1102	*/
1108	ssize_t	1103	ssize_t
1109	__blockdev_direct_IO(int rw, struct kiocb iocb, struct inode inode,	1104	__blockdev_direct_IO(int rw, struct kiocb iocb, struct inode inode,
1110	struct block_device bdev, const struct iovec iov, loff_t offset,	1105	struct block_device bdev, const struct iovec iov, loff_t offset,
1111	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,	1106	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1112	int dio_lock_type)	1107	int flags)
1113	{	1108	{
1114	int seg;	1109	int seg;
1115	size_t size;	1110	size_t size;
@@ -1120,8 +1115,6 @@ __blockdev_direct_IO(int rw, struct kiocb iocb, struct inode inode,
1120	ssize_t retval = -EINVAL;	1115	ssize_t retval = -EINVAL;
1121	loff_t end = offset;	1116	loff_t end = offset;
1122	struct dio *dio;	1117	struct dio *dio;
1123	int release_i_mutex = 0;
1124	int acquire_i_mutex = 0;
1125		1118
1126	if (rw & WRITE)	1119	if (rw & WRITE)
1127	rw = WRITE_ODIRECT_PLUG;	1120	rw = WRITE_ODIRECT_PLUG;
@@ -1156,43 +1149,30 @@ __blockdev_direct_IO(int rw, struct kiocb iocb, struct inode inode,
1156	if (!dio)	1149	if (!dio)
1157	goto out;	1150	goto out;
1158		1151
1159	/*	1152	dio->flags = flags;
1160	* For block device access DIO_NO_LOCKING is used,	1153	if (dio->flags & DIO_LOCKING) {
1161	* neither readers nor writers do any locking at all
1162	* For regular files using DIO_LOCKING,
1163	* readers need to grab i_mutex and i_alloc_sem
1164	* writers need to grab i_alloc_sem only (i_mutex is already held)
1165	* For regular files using DIO_OWN_LOCKING,
1166	* neither readers nor writers take any locks here
1167	*/
1168	dio->lock_type = dio_lock_type;
1169	if (dio_lock_type != DIO_NO_LOCKING) {
1170	/* watch out for a 0 len io from a tricksy fs */	1154	/* watch out for a 0 len io from a tricksy fs */
1171	if (rw == READ && end > offset) {	1155	if (rw == READ && end > offset) {
1172	struct address_space *mapping;	1156	struct address_space *mapping =
		1157	iocb->ki_filp->f_mapping;
1173		1158
1174	mapping = iocb->ki_filp->f_mapping;	1159	/* will be released by direct_io_worker */
1175	if (dio_lock_type != DIO_OWN_LOCKING) {	1160	mutex_lock(&inode->i_mutex);
1176	mutex_lock(&inode->i_mutex);
1177	release_i_mutex = 1;
1178	}
1179		1161
1180	retval = filemap_write_and_wait_range(mapping, offset,	1162	retval = filemap_write_and_wait_range(mapping, offset,
1181	end - 1);	1163	end - 1);
1182	if (retval) {	1164	if (retval) {
		1165	mutex_unlock(&inode->i_mutex);
1183	kfree(dio);	1166	kfree(dio);
1184	goto out;	1167	goto out;
1185	}	1168	}
1186
1187	if (dio_lock_type == DIO_OWN_LOCKING) {
1188	mutex_unlock(&inode->i_mutex);
1189	acquire_i_mutex = 1;
1190	}
1191	}	1169	}
1192		1170
1193	if (dio_lock_type == DIO_LOCKING)	1171	/*
1194	/* lockdep: not the owner will release it */	1172	* Will be released at I/O completion, possibly in a
1195	down_read_non_owner(&inode->i_alloc_sem);	1173	* different thread.
		1174	*/
		1175	down_read_non_owner(&inode->i_alloc_sem);
1196	}	1176	}
1197		1177
1198	/*	1178	/*
@@ -1210,24 +1190,19 @@ __blockdev_direct_IO(int rw, struct kiocb iocb, struct inode inode,
1210	/*	1190	/*
1211	* In case of error extending write may have instantiated a few	1191	* In case of error extending write may have instantiated a few
1212	* blocks outside i_size. Trim these off again for DIO_LOCKING.	1192	* blocks outside i_size. Trim these off again for DIO_LOCKING.
1213	* NOTE: DIO_NO_LOCK/DIO_OWN_LOCK callers have to handle this by	1193	*
1214	* it's own meaner.	1194	* NOTE: filesystems with their own locking have to handle this
		1195	* on their own.
1215	*/	1196	*/
1216	if (unlikely(retval < 0 && (rw & WRITE))) {	1197	if (dio->flags & DIO_LOCKING) {
1217	loff_t isize = i_size_read(inode);	1198	if (unlikely((rw & WRITE) && retval < 0)) {
1218		1199	loff_t isize = i_size_read(inode);
1219	if (end > isize && dio_lock_type == DIO_LOCKING)	1200	if (end > isize )
1220	vmtruncate(inode, isize);	1201	vmtruncate(inode, isize);
		1202	}
1221	}	1203	}
1222		1204
1223	if (rw == READ && dio_lock_type == DIO_LOCKING)
1224	release_i_mutex = 0;
1225
1226	out:	1205	out:
1227	if (release_i_mutex)
1228	mutex_unlock(&inode->i_mutex);
1229	else if (acquire_i_mutex)
1230	mutex_lock(&inode->i_mutex);
1231	return retval;	1206	return retval;
1232	}	1207	}
1233	EXPORT_SYMBOL(__blockdev_direct_IO);	1208	EXPORT_SYMBOL(__blockdev_direct_IO);


diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index deb2b132ae5e..3dae4a13f6e4 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c
@@ -547,6 +547,9 @@ bail:
547	*	547	*
548	* called like this: dio->get_blocks(dio->inode, fs_startblk,	548	* called like this: dio->get_blocks(dio->inode, fs_startblk,
549	* fs_count, map_bh, dio->rw == WRITE);	549	* fs_count, map_bh, dio->rw == WRITE);
		550	*
		551	* Note that we never bother to allocate blocks here, and thus ignore the
		552	* create argument.
550	*/	553	*/
551	static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,	554	static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
552	struct buffer_head *bh_result, int create)	555	struct buffer_head *bh_result, int create)
@@ -563,14 +566,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
563		566
564	inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));	567	inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
565		568
566	/*
567	* Any write past EOF is not allowed because we'd be extending.
568	*/
569	if (create && (iblock + max_blocks) > inode_blocks) {
570	ret = -EIO;
571	goto bail;
572	}
573
574	/* This figures out the size of the next contiguous block, and	569	/* This figures out the size of the next contiguous block, and
575	* our logical offset */	570	* our logical offset */
576	ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,	571	ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
@@ -582,15 +577,6 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
582	goto bail;	577	goto bail;
583	}	578	}
584		579
585	if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) {
586	ocfs2_error(inode->i_sb,
587	"Inode %llu has a hole at block %llu\n",
588	(unsigned long long)OCFS2_I(inode)->ip_blkno,
589	(unsigned long long)iblock);
590	ret = -EROFS;
591	goto bail;
592	}
593
594	/* We should already CoW the refcounted extent. */	580	/* We should already CoW the refcounted extent. */
595	BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);	581	BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
596	/*	582	/*
@@ -601,20 +587,8 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
601	*/	587	*/
602	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))	588	if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
603	map_bh(bh_result, inode->i_sb, p_blkno);	589	map_bh(bh_result, inode->i_sb, p_blkno);
604	else {	590	else
605	/*
606	* ocfs2_prepare_inode_for_write() should have caught
607	* the case where we'd be filling a hole and triggered
608	* a buffered write instead.
609	*/
610	if (create) {
611	ret = -EIO;
612	mlog_errno(ret);
613	goto bail;
614	}
615
616	clear_buffer_mapped(bh_result);	591	clear_buffer_mapped(bh_result);
617	}
618		592
619	/* make sure we don't map more than max_blocks blocks here as	593	/* make sure we don't map more than max_blocks blocks here as
620	that's all the kernel will handle at this point. */	594	that's all the kernel will handle at this point. */


diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index d798c54296eb..66abe36c1213 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1474,19 +1474,13 @@ xfs_vm_direct_IO(
1474		1474
1475	bdev = xfs_find_bdev_for_inode(XFS_I(inode));	1475	bdev = xfs_find_bdev_for_inode(XFS_I(inode));
1476		1476
1477	if (rw == WRITE) {	1477	iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
1478	iocb->private = xfs_alloc_ioend(inode, IOMAP_UNWRITTEN);	1478	IOMAP_UNWRITTEN : IOMAP_READ);
1479	ret = blockdev_direct_IO_own_locking(rw, iocb, inode,	1479
1480	bdev, iov, offset, nr_segs,	1480	ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
1481	xfs_get_blocks_direct,	1481	offset, nr_segs,
1482	xfs_end_io_direct);	1482	xfs_get_blocks_direct,
1483	} else {	1483	xfs_end_io_direct);
1484	iocb->private = xfs_alloc_ioend(inode, IOMAP_READ);
1485	ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
1486	bdev, iov, offset, nr_segs,
1487	xfs_get_blocks_direct,
1488	xfs_end_io_direct);
1489	}
1490		1484
1491	if (unlikely(ret != -EIOCBQUEUED && iocb->private))	1485	if (unlikely(ret != -EIOCBQUEUED && iocb->private))
1492	xfs_destroy_ioend(iocb->private);	1486	xfs_destroy_ioend(iocb->private);