aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/direct-io.c90
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c2
-rw-r--r--mm/filemap.c9
3 files changed, 39 insertions, 62 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f11f05dc9e61..71f4aeac7632 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -226,6 +226,15 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
226{ 226{
227 ssize_t transferred = 0; 227 ssize_t transferred = 0;
228 228
229 /*
230 * AIO submission can race with bio completion to get here while
231 * expecting to have the last io completed by bio completion.
232 * In that case -EIOCBQUEUED is in fact not an error we want
233 * to preserve through this call.
234 */
235 if (ret == -EIOCBQUEUED)
236 ret = 0;
237
229 if (dio->result) { 238 if (dio->result) {
230 transferred = dio->result; 239 transferred = dio->result;
231 240
@@ -251,24 +260,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
251 return ret; 260 return ret;
252} 261}
253 262
254/*
255 * Called when a BIO has been processed. If the count goes to zero then IO is
256 * complete and we can signal this to the AIO layer.
257 */
258static void dio_complete_aio(struct dio *dio)
259{
260 int ret;
261
262 ret = dio_complete(dio, dio->iocb->ki_pos, 0);
263
264 /* Complete AIO later if falling back to buffered i/o */
265 if (dio->result == dio->size ||
266 ((dio->rw == READ) && dio->result)) {
267 aio_complete(dio->iocb, ret, 0);
268 kfree(dio);
269 }
270}
271
272static int dio_bio_complete(struct dio *dio, struct bio *bio); 263static int dio_bio_complete(struct dio *dio, struct bio *bio);
273/* 264/*
274 * Asynchronous IO callback. 265 * Asynchronous IO callback.
@@ -290,8 +281,11 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error)
290 if (remaining == 1 && waiter_holds_ref) 281 if (remaining == 1 && waiter_holds_ref)
291 wake_up_process(dio->waiter); 282 wake_up_process(dio->waiter);
292 283
293 if (remaining == 0) 284 if (remaining == 0) {
294 dio_complete_aio(dio); 285 int ret = dio_complete(dio, dio->iocb->ki_pos, 0);
286 aio_complete(dio->iocb, ret, 0);
287 kfree(dio);
288 }
295 289
296 return 0; 290 return 0;
297} 291}
@@ -1082,47 +1076,33 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1082 mutex_unlock(&dio->inode->i_mutex); 1076 mutex_unlock(&dio->inode->i_mutex);
1083 1077
1084 /* 1078 /*
1085 * OK, all BIOs are submitted, so we can decrement bio_count to truly 1079 * The only time we want to leave bios in flight is when a successful
1086 * reflect the number of to-be-processed BIOs. 1080 * partial aio read or full aio write have been setup. In that case
1081 * bio completion will call aio_complete. The only time it's safe to
1082 * call aio_complete is when we return -EIOCBQUEUED, so we key on that.
1083 * This had *better* be the only place that raises -EIOCBQUEUED.
1087 */ 1084 */
1088 if (dio->is_async) { 1085 BUG_ON(ret == -EIOCBQUEUED);
1089 int should_wait = 0; 1086 if (dio->is_async && ret == 0 && dio->result &&
1090 1087 ((rw & READ) || (dio->result == dio->size)))
1091 if (dio->result < dio->size && (rw & WRITE)) { 1088 ret = -EIOCBQUEUED;
1092 dio->waiter = current;
1093 should_wait = 1;
1094 }
1095 if (ret == 0)
1096 ret = dio->result;
1097
1098 if (should_wait)
1099 dio_await_completion(dio);
1100
1101 /* this can free the dio */
1102 if (atomic_dec_and_test(&dio->refcount))
1103 dio_complete_aio(dio);
1104 1089
1105 if (should_wait) 1090 if (ret != -EIOCBQUEUED)
1106 kfree(dio);
1107 } else {
1108 dio_await_completion(dio); 1091 dio_await_completion(dio);
1109 1092
1093 /*
1094 * Sync will always be dropping the final ref and completing the
1095 * operation. AIO can if it was a broken operation described above
1096 * or in fact if all the bios race to complete before we get here.
1097 * In that case dio_complete() translates the EIOCBQUEUED into
1098 * the proper return code that the caller will hand to aio_complete().
1099 */
1100 if (atomic_dec_and_test(&dio->refcount)) {
1110 ret = dio_complete(dio, offset, ret); 1101 ret = dio_complete(dio, offset, ret);
1102 kfree(dio);
1103 } else
1104 BUG_ON(ret != -EIOCBQUEUED);
1111 1105
1112 /* We could have also come here on an AIO file extend */
1113 if (!is_sync_kiocb(iocb) && (rw & WRITE) &&
1114 ret >= 0 && dio->result == dio->size)
1115 /*
1116 * For AIO writes where we have completed the
1117 * i/o, we have to mark the the aio complete.
1118 */
1119 aio_complete(iocb, ret, 0);
1120
1121 if (atomic_dec_and_test(&dio->refcount))
1122 kfree(dio);
1123 else
1124 BUG();
1125 }
1126 return ret; 1106 return ret;
1127} 1107}
1128 1108
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 8e6b56fc1cad..b56eb754e2d2 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -1406,7 +1406,7 @@ xfs_vm_direct_IO(
1406 xfs_end_io_direct); 1406 xfs_end_io_direct);
1407 } 1407 }
1408 1408
1409 if (unlikely(ret <= 0 && iocb->private)) 1409 if (unlikely(ret != -EIOCBQUEUED && iocb->private))
1410 xfs_destroy_ioend(iocb->private); 1410 xfs_destroy_ioend(iocb->private);
1411 return ret; 1411 return ret;
1412} 1412}
diff --git a/mm/filemap.c b/mm/filemap.c
index 606432f71b3a..8332c77b1bd1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1181,8 +1181,6 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
1181 if (pos < size) { 1181 if (pos < size) {
1182 retval = generic_file_direct_IO(READ, iocb, 1182 retval = generic_file_direct_IO(READ, iocb,
1183 iov, pos, nr_segs); 1183 iov, pos, nr_segs);
1184 if (retval > 0 && !is_sync_kiocb(iocb))
1185 retval = -EIOCBQUEUED;
1186 if (retval > 0) 1184 if (retval > 0)
1187 *ppos = pos + retval; 1185 *ppos = pos + retval;
1188 } 1186 }
@@ -2047,15 +2045,14 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
2047 * Sync the fs metadata but not the minor inode changes and 2045 * Sync the fs metadata but not the minor inode changes and
2048 * of course not the data as we did direct DMA for the IO. 2046 * of course not the data as we did direct DMA for the IO.
2049 * i_mutex is held, which protects generic_osync_inode() from 2047 * i_mutex is held, which protects generic_osync_inode() from
2050 * livelocking. 2048 * livelocking. AIO O_DIRECT ops attempt to sync metadata here.
2051 */ 2049 */
2052 if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { 2050 if ((written >= 0 || written == -EIOCBQUEUED) &&
2051 ((file->f_flags & O_SYNC) || IS_SYNC(inode))) {
2053 int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); 2052 int err = generic_osync_inode(inode, mapping, OSYNC_METADATA);
2054 if (err < 0) 2053 if (err < 0)
2055 written = err; 2054 written = err;
2056 } 2055 }
2057 if (written == count && !is_sync_kiocb(iocb))
2058 written = -EIOCBQUEUED;
2059 return written; 2056 return written;
2060} 2057}
2061EXPORT_SYMBOL(generic_file_direct_write); 2058EXPORT_SYMBOL(generic_file_direct_write);