diff options
| -rw-r--r-- | fs/direct-io.c | 20 | ||||
| -rw-r--r-- | fs/iomap.c | 41 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 11 | ||||
| -rw-r--r-- | fs/xfs/libxfs/xfs_bmap.h | 1 | ||||
| -rw-r--r-- | fs/xfs/xfs_aops.c | 47 | ||||
| -rw-r--r-- | fs/xfs/xfs_fsmap.c | 48 |
6 files changed, 107 insertions, 61 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index 563254869e2f..b53e66d9abd7 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
| @@ -265,12 +265,24 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) | |||
| 265 | if (ret == 0) | 265 | if (ret == 0) |
| 266 | ret = transferred; | 266 | ret = transferred; |
| 267 | 267 | ||
| 268 | if (dio->end_io) { | ||
| 269 | // XXX: ki_pos?? | ||
| 270 | err = dio->end_io(dio->iocb, offset, ret, dio->private); | ||
| 271 | if (err) | ||
| 272 | ret = err; | ||
| 273 | } | ||
| 274 | |||
| 268 | /* | 275 | /* |
| 269 | * Try again to invalidate clean pages which might have been cached by | 276 | * Try again to invalidate clean pages which might have been cached by |
| 270 | * non-direct readahead, or faulted in by get_user_pages() if the source | 277 | * non-direct readahead, or faulted in by get_user_pages() if the source |
| 271 | * of the write was an mmap'ed region of the file we're writing. Either | 278 | * of the write was an mmap'ed region of the file we're writing. Either |
| 272 | * one is a pretty crazy thing to do, so we don't support it 100%. If | 279 | * one is a pretty crazy thing to do, so we don't support it 100%. If |
| 273 | * this invalidation fails, tough, the write still worked... | 280 | * this invalidation fails, tough, the write still worked... |
| 281 | * | ||
| 282 | * And this page cache invalidation has to be after dio->end_io(), as | ||
| 283 | * some filesystems convert unwritten extents to real allocations in | ||
| 284 | * end_io() when necessary, otherwise a racing buffer read would cache | ||
| 285 | * zeros from unwritten extents. | ||
| 274 | */ | 286 | */ |
| 275 | if (flags & DIO_COMPLETE_INVALIDATE && | 287 | if (flags & DIO_COMPLETE_INVALIDATE && |
| 276 | ret > 0 && dio->op == REQ_OP_WRITE && | 288 | ret > 0 && dio->op == REQ_OP_WRITE && |
| @@ -281,14 +293,6 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags) | |||
| 281 | WARN_ON_ONCE(err); | 293 | WARN_ON_ONCE(err); |
| 282 | } | 294 | } |
| 283 | 295 | ||
| 284 | if (dio->end_io) { | ||
| 285 | |||
| 286 | // XXX: ki_pos?? | ||
| 287 | err = dio->end_io(dio->iocb, offset, ret, dio->private); | ||
| 288 | if (err) | ||
| 289 | ret = err; | ||
| 290 | } | ||
| 291 | |||
| 292 | if (!(dio->flags & DIO_SKIP_DIO_COUNT)) | 296 | if (!(dio->flags & DIO_SKIP_DIO_COUNT)) |
| 293 | inode_dio_end(dio->inode); | 297 | inode_dio_end(dio->inode); |
| 294 | 298 | ||
diff --git a/fs/iomap.c b/fs/iomap.c index be61cf742b5e..d4801f8dd4fd 100644 --- a/fs/iomap.c +++ b/fs/iomap.c | |||
| @@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) | |||
| 714 | { | 714 | { |
| 715 | struct kiocb *iocb = dio->iocb; | 715 | struct kiocb *iocb = dio->iocb; |
| 716 | struct inode *inode = file_inode(iocb->ki_filp); | 716 | struct inode *inode = file_inode(iocb->ki_filp); |
| 717 | loff_t offset = iocb->ki_pos; | ||
| 717 | ssize_t ret; | 718 | ssize_t ret; |
| 718 | 719 | ||
| 719 | /* | ||
| 720 | * Try again to invalidate clean pages which might have been cached by | ||
| 721 | * non-direct readahead, or faulted in by get_user_pages() if the source | ||
| 722 | * of the write was an mmap'ed region of the file we're writing. Either | ||
| 723 | * one is a pretty crazy thing to do, so we don't support it 100%. If | ||
| 724 | * this invalidation fails, tough, the write still worked... | ||
| 725 | */ | ||
| 726 | if (!dio->error && | ||
| 727 | (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { | ||
| 728 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
| 729 | iocb->ki_pos >> PAGE_SHIFT, | ||
| 730 | (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT); | ||
| 731 | WARN_ON_ONCE(ret); | ||
| 732 | } | ||
| 733 | |||
| 734 | if (dio->end_io) { | 720 | if (dio->end_io) { |
| 735 | ret = dio->end_io(iocb, | 721 | ret = dio->end_io(iocb, |
| 736 | dio->error ? dio->error : dio->size, | 722 | dio->error ? dio->error : dio->size, |
| @@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio) | |||
| 742 | if (likely(!ret)) { | 728 | if (likely(!ret)) { |
| 743 | ret = dio->size; | 729 | ret = dio->size; |
| 744 | /* check for short read */ | 730 | /* check for short read */ |
| 745 | if (iocb->ki_pos + ret > dio->i_size && | 731 | if (offset + ret > dio->i_size && |
| 746 | !(dio->flags & IOMAP_DIO_WRITE)) | 732 | !(dio->flags & IOMAP_DIO_WRITE)) |
| 747 | ret = dio->i_size - iocb->ki_pos; | 733 | ret = dio->i_size - offset; |
| 748 | iocb->ki_pos += ret; | 734 | iocb->ki_pos += ret; |
| 749 | } | 735 | } |
| 750 | 736 | ||
| 737 | /* | ||
| 738 | * Try again to invalidate clean pages which might have been cached by | ||
| 739 | * non-direct readahead, or faulted in by get_user_pages() if the source | ||
| 740 | * of the write was an mmap'ed region of the file we're writing. Either | ||
| 741 | * one is a pretty crazy thing to do, so we don't support it 100%. If | ||
| 742 | * this invalidation fails, tough, the write still worked... | ||
| 743 | * | ||
| 744 | * And this page cache invalidation has to be after dio->end_io(), as | ||
| 745 | * some filesystems convert unwritten extents to real allocations in | ||
| 746 | * end_io() when necessary, otherwise a racing buffer read would cache | ||
| 747 | * zeros from unwritten extents. | ||
| 748 | */ | ||
| 749 | if (!dio->error && | ||
| 750 | (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) { | ||
| 751 | int err; | ||
| 752 | err = invalidate_inode_pages2_range(inode->i_mapping, | ||
| 753 | offset >> PAGE_SHIFT, | ||
| 754 | (offset + dio->size - 1) >> PAGE_SHIFT); | ||
| 755 | WARN_ON_ONCE(err); | ||
| 756 | } | ||
| 757 | |||
| 751 | inode_dio_end(file_inode(iocb->ki_filp)); | 758 | inode_dio_end(file_inode(iocb->ki_filp)); |
| 752 | kfree(dio); | 759 | kfree(dio); |
| 753 | 760 | ||
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index def32fa1c225..89263797cf32 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c | |||
| @@ -3852,6 +3852,17 @@ xfs_trim_extent( | |||
| 3852 | } | 3852 | } |
| 3853 | } | 3853 | } |
| 3854 | 3854 | ||
| 3855 | /* trim extent to within eof */ | ||
| 3856 | void | ||
| 3857 | xfs_trim_extent_eof( | ||
| 3858 | struct xfs_bmbt_irec *irec, | ||
| 3859 | struct xfs_inode *ip) | ||
| 3860 | |||
| 3861 | { | ||
| 3862 | xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount, | ||
| 3863 | i_size_read(VFS_I(ip)))); | ||
| 3864 | } | ||
| 3865 | |||
| 3855 | /* | 3866 | /* |
| 3856 | * Trim the returned map to the required bounds | 3867 | * Trim the returned map to the required bounds |
| 3857 | */ | 3868 | */ |
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 851982a5dfbc..502e0d8fb4ff 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h | |||
| @@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, | |||
| 208 | 208 | ||
| 209 | void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, | 209 | void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, |
| 210 | xfs_filblks_t len); | 210 | xfs_filblks_t len); |
| 211 | void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *); | ||
| 211 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); | 212 | int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); |
| 212 | void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); | 213 | void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); |
| 213 | void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, | 214 | void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f18e5932aec4..a3eeaba156c5 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -446,6 +446,19 @@ xfs_imap_valid( | |||
| 446 | { | 446 | { |
| 447 | offset >>= inode->i_blkbits; | 447 | offset >>= inode->i_blkbits; |
| 448 | 448 | ||
| 449 | /* | ||
| 450 | * We have to make sure the cached mapping is within EOF to protect | ||
| 451 | * against eofblocks trimming on file release leaving us with a stale | ||
| 452 | * mapping. Otherwise, a page for a subsequent file extending buffered | ||
| 453 | * write could get picked up by this writeback cycle and written to the | ||
| 454 | * wrong blocks. | ||
| 455 | * | ||
| 456 | * Note that what we really want here is a generic mapping invalidation | ||
| 457 | * mechanism to protect us from arbitrary extent modifying contexts, not | ||
| 458 | * just eofblocks. | ||
| 459 | */ | ||
| 460 | xfs_trim_extent_eof(imap, XFS_I(inode)); | ||
| 461 | |||
| 449 | return offset >= imap->br_startoff && | 462 | return offset >= imap->br_startoff && |
| 450 | offset < imap->br_startoff + imap->br_blockcount; | 463 | offset < imap->br_startoff + imap->br_blockcount; |
| 451 | } | 464 | } |
| @@ -735,6 +748,14 @@ xfs_vm_invalidatepage( | |||
| 735 | { | 748 | { |
| 736 | trace_xfs_invalidatepage(page->mapping->host, page, offset, | 749 | trace_xfs_invalidatepage(page->mapping->host, page, offset, |
| 737 | length); | 750 | length); |
| 751 | |||
| 752 | /* | ||
| 753 | * If we are invalidating the entire page, clear the dirty state from it | ||
| 754 | * so that we can check for attempts to release dirty cached pages in | ||
| 755 | * xfs_vm_releasepage(). | ||
| 756 | */ | ||
| 757 | if (offset == 0 && length >= PAGE_SIZE) | ||
| 758 | cancel_dirty_page(page); | ||
| 738 | block_invalidatepage(page, offset, length); | 759 | block_invalidatepage(page, offset, length); |
| 739 | } | 760 | } |
| 740 | 761 | ||
| @@ -1190,25 +1211,27 @@ xfs_vm_releasepage( | |||
| 1190 | * mm accommodates an old ext3 case where clean pages might not have had | 1211 | * mm accommodates an old ext3 case where clean pages might not have had |
| 1191 | * the dirty bit cleared. Thus, it can send actual dirty pages to | 1212 | * the dirty bit cleared. Thus, it can send actual dirty pages to |
| 1192 | * ->releasepage() via shrink_active_list(). Conversely, | 1213 | * ->releasepage() via shrink_active_list(). Conversely, |
| 1193 | * block_invalidatepage() can send pages that are still marked dirty | 1214 | * block_invalidatepage() can send pages that are still marked dirty but |
| 1194 | * but otherwise have invalidated buffers. | 1215 | * otherwise have invalidated buffers. |
| 1195 | * | 1216 | * |
| 1196 | * We want to release the latter to avoid unnecessary buildup of the | 1217 | * We want to release the latter to avoid unnecessary buildup of the |
| 1197 | * LRU, skip the former and warn if we've left any lingering | 1218 | * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages |
| 1198 | * delalloc/unwritten buffers on clean pages. Skip pages with delalloc | 1219 | * that are entirely invalidated and need to be released. Hence the |
| 1199 | * or unwritten buffers and warn if the page is not dirty. Otherwise | 1220 | * only time we should get dirty pages here is through |
| 1200 | * try to release the buffers. | 1221 | * shrink_active_list() and so we can simply skip those now. |
| 1222 | * | ||
| 1223 | * warn if we've left any lingering delalloc/unwritten buffers on clean | ||
| 1224 | * or invalidated pages we are about to release. | ||
| 1201 | */ | 1225 | */ |
| 1226 | if (PageDirty(page)) | ||
| 1227 | return 0; | ||
| 1228 | |||
| 1202 | xfs_count_page_state(page, &delalloc, &unwritten); | 1229 | xfs_count_page_state(page, &delalloc, &unwritten); |
| 1203 | 1230 | ||
| 1204 | if (delalloc) { | 1231 | if (WARN_ON_ONCE(delalloc)) |
| 1205 | WARN_ON_ONCE(!PageDirty(page)); | ||
| 1206 | return 0; | 1232 | return 0; |
| 1207 | } | 1233 | if (WARN_ON_ONCE(unwritten)) |
| 1208 | if (unwritten) { | ||
| 1209 | WARN_ON_ONCE(!PageDirty(page)); | ||
| 1210 | return 0; | 1234 | return 0; |
| 1211 | } | ||
| 1212 | 1235 | ||
| 1213 | return try_to_free_buffers(page); | 1236 | return try_to_free_buffers(page); |
| 1214 | } | 1237 | } |
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 560e0b40ac1b..43cfc07996a4 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c | |||
| @@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper( | |||
| 367 | return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); | 367 | return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); |
| 368 | } | 368 | } |
| 369 | 369 | ||
| 370 | /* Transform a rtbitmap "record" into a fsmap */ | ||
| 371 | STATIC int | ||
| 372 | xfs_getfsmap_rtdev_rtbitmap_helper( | ||
| 373 | struct xfs_trans *tp, | ||
| 374 | struct xfs_rtalloc_rec *rec, | ||
| 375 | void *priv) | ||
| 376 | { | ||
| 377 | struct xfs_mount *mp = tp->t_mountp; | ||
| 378 | struct xfs_getfsmap_info *info = priv; | ||
| 379 | struct xfs_rmap_irec irec; | ||
| 380 | xfs_daddr_t rec_daddr; | ||
| 381 | |||
| 382 | rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); | ||
| 383 | |||
| 384 | irec.rm_startblock = rec->ar_startblock; | ||
| 385 | irec.rm_blockcount = rec->ar_blockcount; | ||
| 386 | irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ | ||
| 387 | irec.rm_offset = 0; | ||
| 388 | irec.rm_flags = 0; | ||
| 389 | |||
| 390 | return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); | ||
| 391 | } | ||
| 392 | |||
| 393 | /* Transform a bnobt irec into a fsmap */ | 370 | /* Transform a bnobt irec into a fsmap */ |
| 394 | STATIC int | 371 | STATIC int |
| 395 | xfs_getfsmap_datadev_bnobt_helper( | 372 | xfs_getfsmap_datadev_bnobt_helper( |
| @@ -475,6 +452,30 @@ xfs_getfsmap_logdev( | |||
| 475 | return xfs_getfsmap_helper(tp, info, &rmap, 0); | 452 | return xfs_getfsmap_helper(tp, info, &rmap, 0); |
| 476 | } | 453 | } |
| 477 | 454 | ||
| 455 | #ifdef CONFIG_XFS_RT | ||
| 456 | /* Transform a rtbitmap "record" into a fsmap */ | ||
| 457 | STATIC int | ||
| 458 | xfs_getfsmap_rtdev_rtbitmap_helper( | ||
| 459 | struct xfs_trans *tp, | ||
| 460 | struct xfs_rtalloc_rec *rec, | ||
| 461 | void *priv) | ||
| 462 | { | ||
| 463 | struct xfs_mount *mp = tp->t_mountp; | ||
| 464 | struct xfs_getfsmap_info *info = priv; | ||
| 465 | struct xfs_rmap_irec irec; | ||
| 466 | xfs_daddr_t rec_daddr; | ||
| 467 | |||
| 468 | rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock); | ||
| 469 | |||
| 470 | irec.rm_startblock = rec->ar_startblock; | ||
| 471 | irec.rm_blockcount = rec->ar_blockcount; | ||
| 472 | irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ | ||
| 473 | irec.rm_offset = 0; | ||
| 474 | irec.rm_flags = 0; | ||
| 475 | |||
| 476 | return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); | ||
| 477 | } | ||
| 478 | |||
| 478 | /* Execute a getfsmap query against the realtime device. */ | 479 | /* Execute a getfsmap query against the realtime device. */ |
| 479 | STATIC int | 480 | STATIC int |
| 480 | __xfs_getfsmap_rtdev( | 481 | __xfs_getfsmap_rtdev( |
| @@ -521,7 +522,6 @@ __xfs_getfsmap_rtdev( | |||
| 521 | return query_fn(tp, info); | 522 | return query_fn(tp, info); |
| 522 | } | 523 | } |
| 523 | 524 | ||
| 524 | #ifdef CONFIG_XFS_RT | ||
| 525 | /* Actually query the realtime bitmap. */ | 525 | /* Actually query the realtime bitmap. */ |
| 526 | STATIC int | 526 | STATIC int |
| 527 | xfs_getfsmap_rtdev_rtbitmap_query( | 527 | xfs_getfsmap_rtdev_rtbitmap_query( |
