diff options
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r-- | fs/direct-io.c | 49 |
1 files changed, 43 insertions, 6 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index 5fa2211e49ae..62cf812ed0e5 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -229,6 +229,7 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) | |||
229 | { | 229 | { |
230 | loff_t offset = dio->iocb->ki_pos; | 230 | loff_t offset = dio->iocb->ki_pos; |
231 | ssize_t transferred = 0; | 231 | ssize_t transferred = 0; |
232 | int err; | ||
232 | 233 | ||
233 | /* | 234 | /* |
234 | * AIO submission can race with bio completion to get here while | 235 | * AIO submission can race with bio completion to get here while |
@@ -258,8 +259,22 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, bool is_async) | |||
258 | if (ret == 0) | 259 | if (ret == 0) |
259 | ret = transferred; | 260 | ret = transferred; |
260 | 261 | ||
262 | /* | ||
263 | * Try again to invalidate clean pages which might have been cached by | ||
264 | * non-direct readahead, or faulted in by get_user_pages() if the source | ||
265 | * of the write was an mmap'ed region of the file we're writing. Either | ||
266 | * one is a pretty crazy thing to do, so we don't support it 100%. If | ||
267 | * this invalidation fails, tough, the write still worked... | ||
268 | */ | ||
269 | if (ret > 0 && dio->op == REQ_OP_WRITE && | ||
270 | dio->inode->i_mapping->nrpages) { | ||
271 | err = invalidate_inode_pages2_range(dio->inode->i_mapping, | ||
272 | offset >> PAGE_SHIFT, | ||
273 | (offset + ret - 1) >> PAGE_SHIFT); | ||
274 | WARN_ON_ONCE(err); | ||
275 | } | ||
276 | |||
261 | if (dio->end_io) { | 277 | if (dio->end_io) { |
262 | int err; | ||
263 | 278 | ||
264 | // XXX: ki_pos?? | 279 | // XXX: ki_pos?? |
265 | err = dio->end_io(dio->iocb, offset, ret, dio->private); | 280 | err = dio->end_io(dio->iocb, offset, ret, dio->private); |
@@ -304,6 +319,7 @@ static void dio_bio_end_aio(struct bio *bio) | |||
304 | struct dio *dio = bio->bi_private; | 319 | struct dio *dio = bio->bi_private; |
305 | unsigned long remaining; | 320 | unsigned long remaining; |
306 | unsigned long flags; | 321 | unsigned long flags; |
322 | bool defer_completion = false; | ||
307 | 323 | ||
308 | /* cleanup the bio */ | 324 | /* cleanup the bio */ |
309 | dio_bio_complete(dio, bio); | 325 | dio_bio_complete(dio, bio); |
@@ -315,7 +331,19 @@ static void dio_bio_end_aio(struct bio *bio) | |||
315 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 331 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
316 | 332 | ||
317 | if (remaining == 0) { | 333 | if (remaining == 0) { |
318 | if (dio->result && dio->defer_completion) { | 334 | /* |
335 | * Defer completion when defer_completion is set or | ||
336 | * when the inode has pages mapped and this is AIO write. | ||
337 | * We need to invalidate those pages because there is a | ||
338 | * chance they contain stale data in the case buffered IO | ||
339 | * went in between AIO submission and completion into the | ||
340 | * same region. | ||
341 | */ | ||
342 | if (dio->result) | ||
343 | defer_completion = dio->defer_completion || | ||
344 | (dio->op == REQ_OP_WRITE && | ||
345 | dio->inode->i_mapping->nrpages); | ||
346 | if (defer_completion) { | ||
319 | INIT_WORK(&dio->complete_work, dio_aio_complete_work); | 347 | INIT_WORK(&dio->complete_work, dio_aio_complete_work); |
320 | queue_work(dio->inode->i_sb->s_dio_done_wq, | 348 | queue_work(dio->inode->i_sb->s_dio_done_wq, |
321 | &dio->complete_work); | 349 | &dio->complete_work); |
@@ -1210,10 +1238,19 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, | |||
1210 | * For AIO O_(D)SYNC writes we need to defer completions to a workqueue | 1238 | * For AIO O_(D)SYNC writes we need to defer completions to a workqueue |
1211 | * so that we can call ->fsync. | 1239 | * so that we can call ->fsync. |
1212 | */ | 1240 | */ |
1213 | if (dio->is_async && iov_iter_rw(iter) == WRITE && | 1241 | if (dio->is_async && iov_iter_rw(iter) == WRITE) { |
1214 | ((iocb->ki_filp->f_flags & O_DSYNC) || | 1242 | retval = 0; |
1215 | IS_SYNC(iocb->ki_filp->f_mapping->host))) { | 1243 | if ((iocb->ki_filp->f_flags & O_DSYNC) || |
1216 | retval = dio_set_defer_completion(dio); | 1244 | IS_SYNC(iocb->ki_filp->f_mapping->host)) |
1245 | retval = dio_set_defer_completion(dio); | ||
1246 | else if (!dio->inode->i_sb->s_dio_done_wq) { | ||
1247 | /* | ||
1248 | * In case of AIO write racing with buffered read we | ||
1249 | * need to defer completion. We can't decide this now, | ||
1250 | * however the workqueue needs to be initialized here. | ||
1251 | */ | ||
1252 | retval = sb_init_dio_done_wq(dio->inode->i_sb); | ||
1253 | } | ||
1217 | if (retval) { | 1254 | if (retval) { |
1218 | /* | 1255 | /* |
1219 | * We grab i_mutex only for reads so we don't have | 1256 | * We grab i_mutex only for reads so we don't have |