diff options
-rw-r--r-- | fs/direct-io.c | 90 | ||||
-rw-r--r-- | fs/xfs/linux-2.6/xfs_aops.c | 2 | ||||
-rw-r--r-- | mm/filemap.c | 9 |
3 files changed, 39 insertions, 62 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index f11f05dc9e61..71f4aeac7632 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -226,6 +226,15 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) | |||
226 | { | 226 | { |
227 | ssize_t transferred = 0; | 227 | ssize_t transferred = 0; |
228 | 228 | ||
229 | /* | ||
230 | * AIO submission can race with bio completion to get here while | ||
231 | * expecting to have the last io completed by bio completion. | ||
232 | * In that case -EIOCBQUEUED is in fact not an error we want | ||
233 | * to preserve through this call. | ||
234 | */ | ||
235 | if (ret == -EIOCBQUEUED) | ||
236 | ret = 0; | ||
237 | |||
229 | if (dio->result) { | 238 | if (dio->result) { |
230 | transferred = dio->result; | 239 | transferred = dio->result; |
231 | 240 | ||
@@ -251,24 +260,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) | |||
251 | return ret; | 260 | return ret; |
252 | } | 261 | } |
253 | 262 | ||
254 | /* | ||
255 | * Called when a BIO has been processed. If the count goes to zero then IO is | ||
256 | * complete and we can signal this to the AIO layer. | ||
257 | */ | ||
258 | static void dio_complete_aio(struct dio *dio) | ||
259 | { | ||
260 | int ret; | ||
261 | |||
262 | ret = dio_complete(dio, dio->iocb->ki_pos, 0); | ||
263 | |||
264 | /* Complete AIO later if falling back to buffered i/o */ | ||
265 | if (dio->result == dio->size || | ||
266 | ((dio->rw == READ) && dio->result)) { | ||
267 | aio_complete(dio->iocb, ret, 0); | ||
268 | kfree(dio); | ||
269 | } | ||
270 | } | ||
271 | |||
272 | static int dio_bio_complete(struct dio *dio, struct bio *bio); | 263 | static int dio_bio_complete(struct dio *dio, struct bio *bio); |
273 | /* | 264 | /* |
274 | * Asynchronous IO callback. | 265 | * Asynchronous IO callback. |
@@ -290,8 +281,11 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error) | |||
290 | if (remaining == 1 && waiter_holds_ref) | 281 | if (remaining == 1 && waiter_holds_ref) |
291 | wake_up_process(dio->waiter); | 282 | wake_up_process(dio->waiter); |
292 | 283 | ||
293 | if (remaining == 0) | 284 | if (remaining == 0) { |
294 | dio_complete_aio(dio); | 285 | int ret = dio_complete(dio, dio->iocb->ki_pos, 0); |
286 | aio_complete(dio->iocb, ret, 0); | ||
287 | kfree(dio); | ||
288 | } | ||
295 | 289 | ||
296 | return 0; | 290 | return 0; |
297 | } | 291 | } |
@@ -1082,47 +1076,33 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1082 | mutex_unlock(&dio->inode->i_mutex); | 1076 | mutex_unlock(&dio->inode->i_mutex); |
1083 | 1077 | ||
1084 | /* | 1078 | /* |
1085 | * OK, all BIOs are submitted, so we can decrement bio_count to truly | 1079 | * The only time we want to leave bios in flight is when a successful |
1086 | * reflect the number of to-be-processed BIOs. | 1080 | * partial aio read or full aio write have been setup. In that case |
1081 | * bio completion will call aio_complete. The only time it's safe to | ||
1082 | * call aio_complete is when we return -EIOCBQUEUED, so we key on that. | ||
1083 | * This had *better* be the only place that raises -EIOCBQUEUED. | ||
1087 | */ | 1084 | */ |
1088 | if (dio->is_async) { | 1085 | BUG_ON(ret == -EIOCBQUEUED); |
1089 | int should_wait = 0; | 1086 | if (dio->is_async && ret == 0 && dio->result && |
1090 | 1087 | ((rw & READ) || (dio->result == dio->size))) | |
1091 | if (dio->result < dio->size && (rw & WRITE)) { | 1088 | ret = -EIOCBQUEUED; |
1092 | dio->waiter = current; | ||
1093 | should_wait = 1; | ||
1094 | } | ||
1095 | if (ret == 0) | ||
1096 | ret = dio->result; | ||
1097 | |||
1098 | if (should_wait) | ||
1099 | dio_await_completion(dio); | ||
1100 | |||
1101 | /* this can free the dio */ | ||
1102 | if (atomic_dec_and_test(&dio->refcount)) | ||
1103 | dio_complete_aio(dio); | ||
1104 | 1089 | ||
1105 | if (should_wait) | 1090 | if (ret != -EIOCBQUEUED) |
1106 | kfree(dio); | ||
1107 | } else { | ||
1108 | dio_await_completion(dio); | 1091 | dio_await_completion(dio); |
1109 | 1092 | ||
1093 | /* | ||
1094 | * Sync will always be dropping the final ref and completing the | ||
1095 | * operation. AIO can if it was a broken operation described above | ||
1096 | * or in fact if all the bios race to complete before we get here. | ||
1097 | * In that case dio_complete() translates the EIOCBQUEUED into | ||
1098 | * the proper return code that the caller will hand to aio_complete(). | ||
1099 | */ | ||
1100 | if (atomic_dec_and_test(&dio->refcount)) { | ||
1110 | ret = dio_complete(dio, offset, ret); | 1101 | ret = dio_complete(dio, offset, ret); |
1102 | kfree(dio); | ||
1103 | } else | ||
1104 | BUG_ON(ret != -EIOCBQUEUED); | ||
1111 | 1105 | ||
1112 | /* We could have also come here on an AIO file extend */ | ||
1113 | if (!is_sync_kiocb(iocb) && (rw & WRITE) && | ||
1114 | ret >= 0 && dio->result == dio->size) | ||
1115 | /* | ||
1116 | * For AIO writes where we have completed the | ||
1117 | * i/o, we have to mark the the aio complete. | ||
1118 | */ | ||
1119 | aio_complete(iocb, ret, 0); | ||
1120 | |||
1121 | if (atomic_dec_and_test(&dio->refcount)) | ||
1122 | kfree(dio); | ||
1123 | else | ||
1124 | BUG(); | ||
1125 | } | ||
1126 | return ret; | 1106 | return ret; |
1127 | } | 1107 | } |
1128 | 1108 | ||
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 8e6b56fc1cad..b56eb754e2d2 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1406,7 +1406,7 @@ xfs_vm_direct_IO( | |||
1406 | xfs_end_io_direct); | 1406 | xfs_end_io_direct); |
1407 | } | 1407 | } |
1408 | 1408 | ||
1409 | if (unlikely(ret <= 0 && iocb->private)) | 1409 | if (unlikely(ret != -EIOCBQUEUED && iocb->private)) |
1410 | xfs_destroy_ioend(iocb->private); | 1410 | xfs_destroy_ioend(iocb->private); |
1411 | return ret; | 1411 | return ret; |
1412 | } | 1412 | } |
diff --git a/mm/filemap.c b/mm/filemap.c index 606432f71b3a..8332c77b1bd1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1181,8 +1181,6 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1181 | if (pos < size) { | 1181 | if (pos < size) { |
1182 | retval = generic_file_direct_IO(READ, iocb, | 1182 | retval = generic_file_direct_IO(READ, iocb, |
1183 | iov, pos, nr_segs); | 1183 | iov, pos, nr_segs); |
1184 | if (retval > 0 && !is_sync_kiocb(iocb)) | ||
1185 | retval = -EIOCBQUEUED; | ||
1186 | if (retval > 0) | 1184 | if (retval > 0) |
1187 | *ppos = pos + retval; | 1185 | *ppos = pos + retval; |
1188 | } | 1186 | } |
@@ -2047,15 +2045,14 @@ generic_file_direct_write(struct kiocb *iocb, const struct iovec *iov, | |||
2047 | * Sync the fs metadata but not the minor inode changes and | 2045 | * Sync the fs metadata but not the minor inode changes and |
2048 | * of course not the data as we did direct DMA for the IO. | 2046 | * of course not the data as we did direct DMA for the IO. |
2049 | * i_mutex is held, which protects generic_osync_inode() from | 2047 | * i_mutex is held, which protects generic_osync_inode() from |
2050 | * livelocking. | 2048 | * livelocking. AIO O_DIRECT ops attempt to sync metadata here. |
2051 | */ | 2049 | */ |
2052 | if (written >= 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | 2050 | if ((written >= 0 || written == -EIOCBQUEUED) && |
2051 | ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { | ||
2053 | int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); | 2052 | int err = generic_osync_inode(inode, mapping, OSYNC_METADATA); |
2054 | if (err < 0) | 2053 | if (err < 0) |
2055 | written = err; | 2054 | written = err; |
2056 | } | 2055 | } |
2057 | if (written == count && !is_sync_kiocb(iocb)) | ||
2058 | written = -EIOCBQUEUED; | ||
2059 | return written; | 2056 | return written; |
2060 | } | 2057 | } |
2061 | EXPORT_SYMBOL(generic_file_direct_write); | 2058 | EXPORT_SYMBOL(generic_file_direct_write); |