diff options
-rw-r--r-- | fs/direct-io.c | 49 | ||||
-rw-r--r-- | fs/iomap.c | 29 | ||||
-rw-r--r-- | mm/filemap.c | 10 |
3 files changed, 67 insertions, 21 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index 5fa2211e49ae..62cf812ed0e5 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) | |||
229 | { | 229 | { |
230 | loff_t offset = dio->iocb->ki_pos; | 230 | loff_t offset = dio->iocb->ki_pos; |
231 | ssize_t transferred = 0; | 231 | ssize_t transferred = 0; |
232 | int err; | ||
232 | 233 | ||
233 | /* | 234 | /* |
234 | * AIO submission can race with bio completion to get here while | 235 | * AIO submission can race with bio completion to get here while |
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) | |||
258 | if (ret == 0) | 259 | if (ret == 0) |
259 | ret = transferred; | 260 | ret = transferred; |
260 | 261 | ||
262 | /* | ||
263 | * Try again to invalidate clean pages which might have been cached by | ||
264 | * non-direct readahead, or faulted in by get_user_pages() if the source | ||
265 | * of the write was an mmap'ed region of the file we're writing. Either | ||
266 | * one is a pretty crazy thing to do, so we don't support it 100%. If | ||
267 | * this invalidation fails, tough, the write still worked... | ||
268 | */ | ||
269 | if (ret > 0 && dio->op == REQ_OP_WRITE && | ||
270 | dio->inode->i_mapping->nrpages) { | ||
271 | err = invalidate_inode_pages2_range(dio->inode->i_mapping, | ||
272 | offset >> PAGE_SHIFT, | ||
273 | (offset + ret - 1) >> PAGE_SHIFT); | ||
274 | WARN_ON_ONCE(err); | ||
275 | } | ||
276 | |||
261 | if (dio->end_io) { | 277 | if (dio->end_io) { |
262 | int err; | ||
263 | 278 | ||
264 | // XXX: ki_pos?? | 279 | // XXX: ki_pos?? |
265 | err = dio->end_io(dio->iocb, offset, ret, dio->private); | 280 | err = dio->end_io(dio->iocb, offset, ret, dio->private); |
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio) | |||
304 | struct dio *dio = bio->bi_private; | 319 | struct dio *dio = bio->bi_private; |
305 | unsigned long remaining; | 320 | unsigned long remaining; |
306 | unsigned long flags; | 321 | unsigned long flags; |
322 | bool defer_completion = false; | ||
307 | 323 | ||
308 | /* cleanup the bio */ | 324 | /* cleanup the bio */ |
309 | dio_bio_complete(dio, bio); | 325 | dio_bio_complete(dio, bio); |
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio) | |||
315 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 331 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
316 | 332 | ||
317 | if (remaining == 0) { | 333 | if (remaining == 0) { |
318 | if (dio->result && dio->defer_completion) { | 334 | /* |
335 | * Defer completion when defer_completion is set or | ||
336 | * when the inode has pages mapped and this is AIO write. | ||
337 | * We need to invalidate those pages because there is a | ||
338 | * chance they contain stale data in the case buffered IO | ||
339 | * went in between AIO submission and completion into the | ||
340 | * same region. | ||
341 | */ | ||
342 | if (dio->result) | ||
343 | defer_completion = dio->defer_completion || | ||
344 | (dio->op == REQ_OP_WRITE && | ||
345 | dio->inode->i_mapping->nrpages); | ||
346 | if (defer_completion) { | ||
319 | INIT_WORK(&dio->complete_work, dio_aio_complete_work); | 347 | INIT_WORK(&dio->complete_work, dio_aio_complete_work); |
320 | queue_work(dio->inode->i_sb->s_dio_done_wq, | 348 | queue_work(dio->inode->i_sb->s_dio_done_wq, |
321 | &dio->complete_work); | 349 | &dio->complete_work); |
@@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, | |||
1210 | * For AIO O_(D)SYNC writes we need to defer completions to a workqueue | 1238 | * For AIO O_(D)SYNC writes we need to defer completions to a workqueue |
1211 | * so that we can call ->fsync. | 1239 | * so that we can call ->fsync. |
1212 | */ | 1240 | */ |
1213 | if (dio->is_async && iov_iter_rw(iter) == WRITE && | 1241 | if (dio->is_async && iov_iter_rw(iter) == WRITE) { |
1214 | ((iocb->ki_filp->f_flags & O_DSYNC) || | 1242 | retval = 0; |
1215 | IS_SYNC(iocb->ki_filp->f_mapping->host))) { | 1243 | if ((iocb->ki_filp->f_flags & O_DSYNC) || |
1216 | retval = dio_set_defer_completion(dio); | 1244 | IS_SYNC(iocb->ki_filp->f_mapping->host)) |
1245 | retval = dio_set_defer_completion(dio); | ||
1246 | else if (!dio->inode->i_sb->s_dio_done_wq) { | ||
1247 | /* | ||
1248 | * In case of AIO write racing with buffered read we | ||
1249 | * need to defer completion. We can't decide this now, | ||
1250 | * however the workqueue needs to be initialized here. | ||
1251 | */ | ||
1252 | retval = sb_init_dio_done_wq(dio->inode->i_sb); | ||
1253 | } | ||
1217 | if (retval) { | 1254 | if (retval) { |
1218 | /* | 1255 | /* |
1219 | * We grab i_mutex only for reads so we don't have | 1256 | * We grab i_mutex only for reads so we don't have |
diff --git a/fs/iomap.c b/fs/iomap.c index 269b24a01f32..8194d30bdca0 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
@@ -713,8 +713,24 @@ struct iomap_dio { | |||
713 | static ssize_t iomap_dio_complete(struct iomap_dio *dio) | 713 | static ssize_t iomap_dio_complete(struct iomap_dio *dio) |
714 | { | 714 | { |
715 | struct kiocb *iocb = dio->iocb; | 715 | struct kiocb *iocb = dio->iocb; |
716 | struct inode *inode = file_inode(iocb->ki_filp); | ||
716 | ssize_t ret; | 717 | ssize_t ret; |
717 | 718 | ||
719 | /* | ||
720 | * Try again to invalidate clean pages which might have been cached by | ||
721 | * non-direct readahead, or faulted in by get_user_pages() if the source | ||
722 | * of the write was an mmap'ed region of the file we're writing. Either | ||
723 | * one is a pretty crazy thing to do, so we don't support it 100%. If | ||
724 | * this invalidation fails, tough, the write still worked... | ||
725 | */ | ||
726 | if (!dio->error && | ||
727 | (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { | ||
728 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
729 | iocb->ki_pos >> PAGE_SHIFT, | ||
730 | (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT); | ||
731 | WARN_ON_ONCE(ret); | ||
732 | } | ||
733 | |||
718 | if (dio->end_io) { | 734 | if (dio->end_io) { |
719 | ret = dio->end_io(iocb, | 735 | ret = dio->end_io(iocb, |
720 | dio->error ? dio->error : dio->size, | 736 | dio->error ? dio->error : dio->size, |
@@ -1042,19 +1058,6 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, | |||
1042 | 1058 | ||
1043 | ret = iomap_dio_complete(dio); | 1059 | ret = iomap_dio_complete(dio); |
1044 | 1060 | ||
1045 | /* | ||
1046 | * Try again to invalidate clean pages which might have been cached by | ||
1047 | * non-direct readahead, or faulted in by get_user_pages() if the source | ||
1048 | * of the write was an mmap'ed region of the file we're writing. Either | ||
1049 | * one is a pretty crazy thing to do, so we don't support it 100%. If | ||
1050 | * this invalidation fails, tough, the write still worked... | ||
1051 | */ | ||
1052 | if (iov_iter_rw(iter) == WRITE) { | ||
1053 | int err = invalidate_inode_pages2_range(mapping, | ||
1054 | start >> PAGE_SHIFT, end >> PAGE_SHIFT); | ||
1055 | WARN_ON_ONCE(err); | ||
1056 | } | ||
1057 | |||
1058 | return ret; | 1061 | return ret; |
1059 | 1062 | ||
1060 | out_free_dio: | 1063 | out_free_dio: |
diff --git a/mm/filemap.c b/mm/filemap.c index 870971e20967..db250d0e0565 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -2926,9 +2926,15 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from) | |||
2926 | * we're writing. Either one is a pretty crazy thing to do, | 2926 | * we're writing. Either one is a pretty crazy thing to do, |
2927 | * so we don't support it 100%. If this invalidation | 2927 | * so we don't support it 100%. If this invalidation |
2928 | * fails, tough, the write still worked... | 2928 | * fails, tough, the write still worked... |
2929 | * | ||
2930 | * Most of the time we do not need this since dio_complete() will do | ||
2931 | * the invalidation for us. However there are some file systems that | ||
2932 | * do not end up with dio_complete() being called, so let's not break | ||
2933 | * them by removing it completely | ||
2929 | */ | 2934 | */ |
2930 | invalidate_inode_pages2_range(mapping, | 2935 | if (mapping->nrpages) |
2931 | pos >> PAGE_SHIFT, end); | 2936 | invalidate_inode_pages2_range(mapping, |
2937 | pos >> PAGE_SHIFT, end); | ||
2932 | 2938 | ||
2933 | if (written > 0) { | 2939 | if (written > 0) { |
2934 | pos += written; | 2940 | pos += written; |