aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/direct-io.c20
-rw-r--r--fs/iomap.c41
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c11
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h1
-rw-r--r--fs/xfs/xfs_aops.c47
-rw-r--r--fs/xfs/xfs_fsmap.c48
6 files changed, 107 insertions, 61 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 563254869e2f..b53e66d9abd7 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -265,12 +265,24 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
265 if (ret == 0) 265 if (ret == 0)
266 ret = transferred; 266 ret = transferred;
267 267
268 if (dio->end_io) {
269 // XXX: ki_pos??
270 err = dio->end_io(dio->iocb, offset, ret, dio->private);
271 if (err)
272 ret = err;
273 }
274
268 /* 275 /*
269 * Try again to invalidate clean pages which might have been cached by 276 * Try again to invalidate clean pages which might have been cached by
270 * non-direct readahead, or faulted in by get_user_pages() if the source 277 * non-direct readahead, or faulted in by get_user_pages() if the source
271 * of the write was an mmap'ed region of the file we're writing. Either 278 * of the write was an mmap'ed region of the file we're writing. Either
272 * one is a pretty crazy thing to do, so we don't support it 100%. If 279 * one is a pretty crazy thing to do, so we don't support it 100%. If
273 * this invalidation fails, tough, the write still worked... 280 * this invalidation fails, tough, the write still worked...
281 *
282 * And this page cache invalidation has to be after dio->end_io(), as
283 * some filesystems convert unwritten extents to real allocations in
284 * end_io() when necessary, otherwise a racing buffer read would cache
285 * zeros from unwritten extents.
274 */ 286 */
275 if (flags & DIO_COMPLETE_INVALIDATE && 287 if (flags & DIO_COMPLETE_INVALIDATE &&
276 ret > 0 && dio->op == REQ_OP_WRITE && 288 ret > 0 && dio->op == REQ_OP_WRITE &&
@@ -281,14 +293,6 @@ static ssize_t dio_complete(struct dio *dio, ssize_t ret, unsigned int flags)
281 WARN_ON_ONCE(err); 293 WARN_ON_ONCE(err);
282 } 294 }
283 295
284 if (dio->end_io) {
285
286 // XXX: ki_pos??
287 err = dio->end_io(dio->iocb, offset, ret, dio->private);
288 if (err)
289 ret = err;
290 }
291
292 if (!(dio->flags & DIO_SKIP_DIO_COUNT)) 296 if (!(dio->flags & DIO_SKIP_DIO_COUNT))
293 inode_dio_end(dio->inode); 297 inode_dio_end(dio->inode);
294 298
diff --git a/fs/iomap.c b/fs/iomap.c
index be61cf742b5e..d4801f8dd4fd 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -714,23 +714,9 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
714{ 714{
715 struct kiocb *iocb = dio->iocb; 715 struct kiocb *iocb = dio->iocb;
716 struct inode *inode = file_inode(iocb->ki_filp); 716 struct inode *inode = file_inode(iocb->ki_filp);
717 loff_t offset = iocb->ki_pos;
717 ssize_t ret; 718 ssize_t ret;
718 719
719 /*
720 * Try again to invalidate clean pages which might have been cached by
721 * non-direct readahead, or faulted in by get_user_pages() if the source
722 * of the write was an mmap'ed region of the file we're writing. Either
723 * one is a pretty crazy thing to do, so we don't support it 100%. If
724 * this invalidation fails, tough, the write still worked...
725 */
726 if (!dio->error &&
727 (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
728 ret = invalidate_inode_pages2_range(inode->i_mapping,
729 iocb->ki_pos >> PAGE_SHIFT,
730 (iocb->ki_pos + dio->size - 1) >> PAGE_SHIFT);
731 WARN_ON_ONCE(ret);
732 }
733
734 if (dio->end_io) { 720 if (dio->end_io) {
735 ret = dio->end_io(iocb, 721 ret = dio->end_io(iocb,
736 dio->error ? dio->error : dio->size, 722 dio->error ? dio->error : dio->size,
@@ -742,12 +728,33 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
742 if (likely(!ret)) { 728 if (likely(!ret)) {
743 ret = dio->size; 729 ret = dio->size;
744 /* check for short read */ 730 /* check for short read */
745 if (iocb->ki_pos + ret > dio->i_size && 731 if (offset + ret > dio->i_size &&
746 !(dio->flags & IOMAP_DIO_WRITE)) 732 !(dio->flags & IOMAP_DIO_WRITE))
747 ret = dio->i_size - iocb->ki_pos; 733 ret = dio->i_size - offset;
748 iocb->ki_pos += ret; 734 iocb->ki_pos += ret;
749 } 735 }
750 736
737 /*
738 * Try again to invalidate clean pages which might have been cached by
739 * non-direct readahead, or faulted in by get_user_pages() if the source
740 * of the write was an mmap'ed region of the file we're writing. Either
741 * one is a pretty crazy thing to do, so we don't support it 100%. If
742 * this invalidation fails, tough, the write still worked...
743 *
744 * And this page cache invalidation has to be after dio->end_io(), as
745 * some filesystems convert unwritten extents to real allocations in
746 * end_io() when necessary, otherwise a racing buffer read would cache
747 * zeros from unwritten extents.
748 */
749 if (!dio->error &&
750 (dio->flags & IOMAP_DIO_WRITE) && inode->i_mapping->nrpages) {
751 int err;
752 err = invalidate_inode_pages2_range(inode->i_mapping,
753 offset >> PAGE_SHIFT,
754 (offset + dio->size - 1) >> PAGE_SHIFT);
755 WARN_ON_ONCE(err);
756 }
757
751 inode_dio_end(file_inode(iocb->ki_filp)); 758 inode_dio_end(file_inode(iocb->ki_filp));
752 kfree(dio); 759 kfree(dio);
753 760
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index def32fa1c225..89263797cf32 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3852,6 +3852,17 @@ xfs_trim_extent(
3852 } 3852 }
3853} 3853}
3854 3854
3855/* trim extent to within eof */
3856void
3857xfs_trim_extent_eof(
3858 struct xfs_bmbt_irec *irec,
3859 struct xfs_inode *ip)
3860
3861{
3862 xfs_trim_extent(irec, 0, XFS_B_TO_FSB(ip->i_mount,
3863 i_size_read(VFS_I(ip))));
3864}
3865
3855/* 3866/*
3856 * Trim the returned map to the required bounds 3867 * Trim the returned map to the required bounds
3857 */ 3868 */
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 851982a5dfbc..502e0d8fb4ff 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -208,6 +208,7 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,
208 208
209void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, 209void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
210 xfs_filblks_t len); 210 xfs_filblks_t len);
211void xfs_trim_extent_eof(struct xfs_bmbt_irec *, struct xfs_inode *);
211int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); 212int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
212void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); 213void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
213void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, 214void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops,
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f18e5932aec4..a3eeaba156c5 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -446,6 +446,19 @@ xfs_imap_valid(
446{ 446{
447 offset >>= inode->i_blkbits; 447 offset >>= inode->i_blkbits;
448 448
449 /*
450 * We have to make sure the cached mapping is within EOF to protect
451 * against eofblocks trimming on file release leaving us with a stale
452 * mapping. Otherwise, a page for a subsequent file extending buffered
453 * write could get picked up by this writeback cycle and written to the
454 * wrong blocks.
455 *
456 * Note that what we really want here is a generic mapping invalidation
457 * mechanism to protect us from arbitrary extent modifying contexts, not
458 * just eofblocks.
459 */
460 xfs_trim_extent_eof(imap, XFS_I(inode));
461
449 return offset >= imap->br_startoff && 462 return offset >= imap->br_startoff &&
450 offset < imap->br_startoff + imap->br_blockcount; 463 offset < imap->br_startoff + imap->br_blockcount;
451} 464}
@@ -735,6 +748,14 @@ xfs_vm_invalidatepage(
735{ 748{
736 trace_xfs_invalidatepage(page->mapping->host, page, offset, 749 trace_xfs_invalidatepage(page->mapping->host, page, offset,
737 length); 750 length);
751
752 /*
753 * If we are invalidating the entire page, clear the dirty state from it
754 * so that we can check for attempts to release dirty cached pages in
755 * xfs_vm_releasepage().
756 */
757 if (offset == 0 && length >= PAGE_SIZE)
758 cancel_dirty_page(page);
738 block_invalidatepage(page, offset, length); 759 block_invalidatepage(page, offset, length);
739} 760}
740 761
@@ -1190,25 +1211,27 @@ xfs_vm_releasepage(
1190 * mm accommodates an old ext3 case where clean pages might not have had 1211 * mm accommodates an old ext3 case where clean pages might not have had
1191 * the dirty bit cleared. Thus, it can send actual dirty pages to 1212 * the dirty bit cleared. Thus, it can send actual dirty pages to
1192 * ->releasepage() via shrink_active_list(). Conversely, 1213 * ->releasepage() via shrink_active_list(). Conversely,
1193 * block_invalidatepage() can send pages that are still marked dirty 1214 * block_invalidatepage() can send pages that are still marked dirty but
1194 * but otherwise have invalidated buffers. 1215 * otherwise have invalidated buffers.
1195 * 1216 *
1196 * We want to release the latter to avoid unnecessary buildup of the 1217 * We want to release the latter to avoid unnecessary buildup of the
1197 * LRU, skip the former and warn if we've left any lingering 1218 * LRU, so xfs_vm_invalidatepage() clears the page dirty flag on pages
1198 * delalloc/unwritten buffers on clean pages. Skip pages with delalloc 1219 * that are entirely invalidated and need to be released. Hence the
1199 * or unwritten buffers and warn if the page is not dirty. Otherwise 1220 * only time we should get dirty pages here is through
1200 * try to release the buffers. 1221 * shrink_active_list() and so we can simply skip those now.
1222 *
1223 * warn if we've left any lingering delalloc/unwritten buffers on clean
1224 * or invalidated pages we are about to release.
1201 */ 1225 */
1226 if (PageDirty(page))
1227 return 0;
1228
1202 xfs_count_page_state(page, &delalloc, &unwritten); 1229 xfs_count_page_state(page, &delalloc, &unwritten);
1203 1230
1204 if (delalloc) { 1231 if (WARN_ON_ONCE(delalloc))
1205 WARN_ON_ONCE(!PageDirty(page));
1206 return 0; 1232 return 0;
1207 } 1233 if (WARN_ON_ONCE(unwritten))
1208 if (unwritten) {
1209 WARN_ON_ONCE(!PageDirty(page));
1210 return 0; 1234 return 0;
1211 }
1212 1235
1213 return try_to_free_buffers(page); 1236 return try_to_free_buffers(page);
1214} 1237}
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 560e0b40ac1b..43cfc07996a4 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -367,29 +367,6 @@ xfs_getfsmap_datadev_helper(
367 return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); 367 return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr);
368} 368}
369 369
370/* Transform a rtbitmap "record" into a fsmap */
371STATIC int
372xfs_getfsmap_rtdev_rtbitmap_helper(
373 struct xfs_trans *tp,
374 struct xfs_rtalloc_rec *rec,
375 void *priv)
376{
377 struct xfs_mount *mp = tp->t_mountp;
378 struct xfs_getfsmap_info *info = priv;
379 struct xfs_rmap_irec irec;
380 xfs_daddr_t rec_daddr;
381
382 rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
383
384 irec.rm_startblock = rec->ar_startblock;
385 irec.rm_blockcount = rec->ar_blockcount;
386 irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
387 irec.rm_offset = 0;
388 irec.rm_flags = 0;
389
390 return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
391}
392
393/* Transform a bnobt irec into a fsmap */ 370/* Transform a bnobt irec into a fsmap */
394STATIC int 371STATIC int
395xfs_getfsmap_datadev_bnobt_helper( 372xfs_getfsmap_datadev_bnobt_helper(
@@ -475,6 +452,30 @@ xfs_getfsmap_logdev(
475 return xfs_getfsmap_helper(tp, info, &rmap, 0); 452 return xfs_getfsmap_helper(tp, info, &rmap, 0);
476} 453}
477 454
455#ifdef CONFIG_XFS_RT
456/* Transform a rtbitmap "record" into a fsmap */
457STATIC int
458xfs_getfsmap_rtdev_rtbitmap_helper(
459 struct xfs_trans *tp,
460 struct xfs_rtalloc_rec *rec,
461 void *priv)
462{
463 struct xfs_mount *mp = tp->t_mountp;
464 struct xfs_getfsmap_info *info = priv;
465 struct xfs_rmap_irec irec;
466 xfs_daddr_t rec_daddr;
467
468 rec_daddr = XFS_FSB_TO_BB(mp, rec->ar_startblock);
469
470 irec.rm_startblock = rec->ar_startblock;
471 irec.rm_blockcount = rec->ar_blockcount;
472 irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
473 irec.rm_offset = 0;
474 irec.rm_flags = 0;
475
476 return xfs_getfsmap_helper(tp, info, &irec, rec_daddr);
477}
478
478/* Execute a getfsmap query against the realtime device. */ 479/* Execute a getfsmap query against the realtime device. */
479STATIC int 480STATIC int
480__xfs_getfsmap_rtdev( 481__xfs_getfsmap_rtdev(
@@ -521,7 +522,6 @@ __xfs_getfsmap_rtdev(
521 return query_fn(tp, info); 522 return query_fn(tp, info);
522} 523}
523 524
524#ifdef CONFIG_XFS_RT
525/* Actually query the realtime bitmap. */ 525/* Actually query the realtime bitmap. */
526STATIC int 526STATIC int
527xfs_getfsmap_rtdev_rtbitmap_query( 527xfs_getfsmap_rtdev_rtbitmap_query(