aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndreas Gruenbacher <agruenba@redhat.com>2018-06-19 18:10:55 -0400
committerDarrick J. Wong <darrick.wong@oracle.com>2018-06-19 18:10:55 -0400
commitebf00be37de35788cad72f4f20b4a39e30c0be4a (patch)
treeda2cb75db298aea59b6b9f0051abe2d75153fb70
parent3d7b6b21f6c590c4d70b311bbdd78a214637c9c7 (diff)
iomap: complete partial direct I/O writes synchronously
According to xfstest generic/240, applications seem to expect direct I/O writes to either complete as a whole or to fail; short direct I/O writes are apparently not appreciated. This means that when only part of an asynchronous direct I/O write succeeds, we can either fail the entire write, or we can wait for the partial write to complete and retry the remaining write as buffered I/O. The old __blockdev_direct_IO helper has code for waiting for partial writes to complete; the new iomap_dio_rw iomap helper does not. The above mentioned fallback mode is needed for gfs2, which doesn't allow block allocations under direct I/O to avoid taking cluster-wide exclusive locks. As a consequence, an asynchronous direct I/O write to a file range that contains a hole will result in a short write. In that case, wait for the short write to complete to allow gfs2 to recover. Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/iomap.c21
1 files changed, 11 insertions, 10 deletions
diff --git a/fs/iomap.c b/fs/iomap.c
index 77397b5a96ef..9c454459a1e9 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -811,6 +811,7 @@ struct iomap_dio {
811 atomic_t ref; 811 atomic_t ref;
812 unsigned flags; 812 unsigned flags;
813 int error; 813 int error;
814 bool wait_for_completion;
814 815
815 union { 816 union {
816 /* used during submission and for synchronous completion: */ 817 /* used during submission and for synchronous completion: */
@@ -914,9 +915,8 @@ static void iomap_dio_bio_end_io(struct bio *bio)
914 iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status)); 915 iomap_dio_set_error(dio, blk_status_to_errno(bio->bi_status));
915 916
916 if (atomic_dec_and_test(&dio->ref)) { 917 if (atomic_dec_and_test(&dio->ref)) {
917 if (is_sync_kiocb(dio->iocb)) { 918 if (dio->wait_for_completion) {
918 struct task_struct *waiter = dio->submit.waiter; 919 struct task_struct *waiter = dio->submit.waiter;
919
920 WRITE_ONCE(dio->submit.waiter, NULL); 920 WRITE_ONCE(dio->submit.waiter, NULL);
921 wake_up_process(waiter); 921 wake_up_process(waiter);
922 } else if (dio->flags & IOMAP_DIO_WRITE) { 922 } else if (dio->flags & IOMAP_DIO_WRITE) {
@@ -1131,13 +1131,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1131 dio->end_io = end_io; 1131 dio->end_io = end_io;
1132 dio->error = 0; 1132 dio->error = 0;
1133 dio->flags = 0; 1133 dio->flags = 0;
1134 dio->wait_for_completion = is_sync_kiocb(iocb);
1134 1135
1135 dio->submit.iter = iter; 1136 dio->submit.iter = iter;
1136 if (is_sync_kiocb(iocb)) { 1137 dio->submit.waiter = current;
1137 dio->submit.waiter = current; 1138 dio->submit.cookie = BLK_QC_T_NONE;
1138 dio->submit.cookie = BLK_QC_T_NONE; 1139 dio->submit.last_queue = NULL;
1139 dio->submit.last_queue = NULL;
1140 }
1141 1140
1142 if (iov_iter_rw(iter) == READ) { 1141 if (iov_iter_rw(iter) == READ) {
1143 if (pos >= dio->i_size) 1142 if (pos >= dio->i_size)
@@ -1187,7 +1186,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1187 dio_warn_stale_pagecache(iocb->ki_filp); 1186 dio_warn_stale_pagecache(iocb->ki_filp);
1188 ret = 0; 1187 ret = 0;
1189 1188
1190 if (iov_iter_rw(iter) == WRITE && !is_sync_kiocb(iocb) && 1189 if (iov_iter_rw(iter) == WRITE && !dio->wait_for_completion &&
1191 !inode->i_sb->s_dio_done_wq) { 1190 !inode->i_sb->s_dio_done_wq) {
1192 ret = sb_init_dio_done_wq(inode->i_sb); 1191 ret = sb_init_dio_done_wq(inode->i_sb);
1193 if (ret < 0) 1192 if (ret < 0)
@@ -1202,8 +1201,10 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1202 iomap_dio_actor); 1201 iomap_dio_actor);
1203 if (ret <= 0) { 1202 if (ret <= 0) {
1204 /* magic error code to fall back to buffered I/O */ 1203 /* magic error code to fall back to buffered I/O */
1205 if (ret == -ENOTBLK) 1204 if (ret == -ENOTBLK) {
1205 dio->wait_for_completion = true;
1206 ret = 0; 1206 ret = 0;
1207 }
1207 break; 1208 break;
1208 } 1209 }
1209 pos += ret; 1210 pos += ret;
@@ -1224,7 +1225,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
1224 dio->flags &= ~IOMAP_DIO_NEED_SYNC; 1225 dio->flags &= ~IOMAP_DIO_NEED_SYNC;
1225 1226
1226 if (!atomic_dec_and_test(&dio->ref)) { 1227 if (!atomic_dec_and_test(&dio->ref)) {
1227 if (!is_sync_kiocb(iocb)) 1228 if (!dio->wait_for_completion)
1228 return -EIOCBQUEUED; 1229 return -EIOCBQUEUED;
1229 1230
1230 for (;;) { 1231 for (;;) {