aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2017-02-02 18:14:01 -0500
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-04-08 03:30:31 -0400
commit8370826f7d3274fe64de32c58aa49a7384f0c9e9 (patch)
tree68a514c9cb92588d37c9524cc7326dd170a2faf7 /fs
parent3d2bd2fd5cbaf3d4e0f0642030cd7d21facb07e7 (diff)
xfs: allow unwritten extents in the CoW fork
commit 05a630d76bd3f39baf0eecfa305bed2820796dee upstream. In the data fork, we only allow extents to perform the following state transitions: delay -> real <-> unwritten There's no way to move directly from a delalloc reservation to an /unwritten/ allocated extent. However, for the CoW fork we want to be able to do the following to each extent: delalloc -> unwritten -> written -> remapped to data fork This will help us to avoid a race in the speculative CoW preallocation code between a first thread that is allocating a CoW extent and a second thread that is remapping part of a file after a write. In order to do this, however, we need two things: first, we have to be able to transition from da to unwritten, and second the function that converts between real and unwritten has to be made aware of the cow fork. Do both of those things. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'fs')
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c80
1 files changed, 50 insertions, 30 deletions
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 02c466081991..9e3b069fc84b 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1952,6 +1952,7 @@ xfs_bmap_add_extent_delay_real(
1952 */ 1952 */
1953 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_); 1953 trace_xfs_bmap_pre_update(bma->ip, bma->idx, state, _THIS_IP_);
1954 xfs_bmbt_set_startblock(ep, new->br_startblock); 1954 xfs_bmbt_set_startblock(ep, new->br_startblock);
1955 xfs_bmbt_set_state(ep, new->br_state);
1955 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_); 1956 trace_xfs_bmap_post_update(bma->ip, bma->idx, state, _THIS_IP_);
1956 1957
1957 (*nextents)++; 1958 (*nextents)++;
@@ -2290,6 +2291,7 @@ STATIC int /* error */
2290xfs_bmap_add_extent_unwritten_real( 2291xfs_bmap_add_extent_unwritten_real(
2291 struct xfs_trans *tp, 2292 struct xfs_trans *tp,
2292 xfs_inode_t *ip, /* incore inode pointer */ 2293 xfs_inode_t *ip, /* incore inode pointer */
2294 int whichfork,
2293 xfs_extnum_t *idx, /* extent number to update/insert */ 2295 xfs_extnum_t *idx, /* extent number to update/insert */
2294 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */ 2296 xfs_btree_cur_t **curp, /* if *curp is null, not a btree */
2295 xfs_bmbt_irec_t *new, /* new data to add to file extents */ 2297 xfs_bmbt_irec_t *new, /* new data to add to file extents */
@@ -2309,12 +2311,14 @@ xfs_bmap_add_extent_unwritten_real(
2309 /* left is 0, right is 1, prev is 2 */ 2311 /* left is 0, right is 1, prev is 2 */
2310 int rval=0; /* return value (logging flags) */ 2312 int rval=0; /* return value (logging flags) */
2311 int state = 0;/* state bits, accessed thru macros */ 2313 int state = 0;/* state bits, accessed thru macros */
2312 struct xfs_mount *mp = tp->t_mountp; 2314 struct xfs_mount *mp = ip->i_mount;
2313 2315
2314 *logflagsp = 0; 2316 *logflagsp = 0;
2315 2317
2316 cur = *curp; 2318 cur = *curp;
2317 ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); 2319 ifp = XFS_IFORK_PTR(ip, whichfork);
2320 if (whichfork == XFS_COW_FORK)
2321 state |= BMAP_COWFORK;
2318 2322
2319 ASSERT(*idx >= 0); 2323 ASSERT(*idx >= 0);
2320 ASSERT(*idx <= xfs_iext_count(ifp)); 2324 ASSERT(*idx <= xfs_iext_count(ifp));
@@ -2373,7 +2377,7 @@ xfs_bmap_add_extent_unwritten_real(
2373 * Don't set contiguous if the combined extent would be too large. 2377 * Don't set contiguous if the combined extent would be too large.
2374 * Also check for all-three-contiguous being too large. 2378 * Also check for all-three-contiguous being too large.
2375 */ 2379 */
2376 if (*idx < xfs_iext_count(&ip->i_df) - 1) { 2380 if (*idx < xfs_iext_count(ifp) - 1) {
2377 state |= BMAP_RIGHT_VALID; 2381 state |= BMAP_RIGHT_VALID;
2378 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT); 2382 xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *idx + 1), &RIGHT);
2379 if (isnullstartblock(RIGHT.br_startblock)) 2383 if (isnullstartblock(RIGHT.br_startblock))
@@ -2413,7 +2417,8 @@ xfs_bmap_add_extent_unwritten_real(
2413 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2417 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2414 2418
2415 xfs_iext_remove(ip, *idx + 1, 2, state); 2419 xfs_iext_remove(ip, *idx + 1, 2, state);
2416 ip->i_d.di_nextents -= 2; 2420 XFS_IFORK_NEXT_SET(ip, whichfork,
2421 XFS_IFORK_NEXTENTS(ip, whichfork) - 2);
2417 if (cur == NULL) 2422 if (cur == NULL)
2418 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2423 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2419 else { 2424 else {
@@ -2456,7 +2461,8 @@ xfs_bmap_add_extent_unwritten_real(
2456 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2461 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2457 2462
2458 xfs_iext_remove(ip, *idx + 1, 1, state); 2463 xfs_iext_remove(ip, *idx + 1, 1, state);
2459 ip->i_d.di_nextents--; 2464 XFS_IFORK_NEXT_SET(ip, whichfork,
2465 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2460 if (cur == NULL) 2466 if (cur == NULL)
2461 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2467 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2462 else { 2468 else {
@@ -2491,7 +2497,8 @@ xfs_bmap_add_extent_unwritten_real(
2491 xfs_bmbt_set_state(ep, newext); 2497 xfs_bmbt_set_state(ep, newext);
2492 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2498 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2493 xfs_iext_remove(ip, *idx + 1, 1, state); 2499 xfs_iext_remove(ip, *idx + 1, 1, state);
2494 ip->i_d.di_nextents--; 2500 XFS_IFORK_NEXT_SET(ip, whichfork,
2501 XFS_IFORK_NEXTENTS(ip, whichfork) - 1);
2495 if (cur == NULL) 2502 if (cur == NULL)
2496 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2503 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2497 else { 2504 else {
@@ -2603,7 +2610,8 @@ xfs_bmap_add_extent_unwritten_real(
2603 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_); 2610 trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
2604 2611
2605 xfs_iext_insert(ip, *idx, 1, new, state); 2612 xfs_iext_insert(ip, *idx, 1, new, state);
2606 ip->i_d.di_nextents++; 2613 XFS_IFORK_NEXT_SET(ip, whichfork,
2614 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2607 if (cur == NULL) 2615 if (cur == NULL)
2608 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2616 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2609 else { 2617 else {
@@ -2681,7 +2689,8 @@ xfs_bmap_add_extent_unwritten_real(
2681 ++*idx; 2689 ++*idx;
2682 xfs_iext_insert(ip, *idx, 1, new, state); 2690 xfs_iext_insert(ip, *idx, 1, new, state);
2683 2691
2684 ip->i_d.di_nextents++; 2692 XFS_IFORK_NEXT_SET(ip, whichfork,
2693 XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
2685 if (cur == NULL) 2694 if (cur == NULL)
2686 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2695 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2687 else { 2696 else {
@@ -2729,7 +2738,8 @@ xfs_bmap_add_extent_unwritten_real(
2729 ++*idx; 2738 ++*idx;
2730 xfs_iext_insert(ip, *idx, 2, &r[0], state); 2739 xfs_iext_insert(ip, *idx, 2, &r[0], state);
2731 2740
2732 ip->i_d.di_nextents += 2; 2741 XFS_IFORK_NEXT_SET(ip, whichfork,
2742 XFS_IFORK_NEXTENTS(ip, whichfork) + 2);
2733 if (cur == NULL) 2743 if (cur == NULL)
2734 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT; 2744 rval = XFS_ILOG_CORE | XFS_ILOG_DEXT;
2735 else { 2745 else {
@@ -2783,17 +2793,17 @@ xfs_bmap_add_extent_unwritten_real(
2783 } 2793 }
2784 2794
2785 /* update reverse mappings */ 2795 /* update reverse mappings */
2786 error = xfs_rmap_convert_extent(mp, dfops, ip, XFS_DATA_FORK, new); 2796 error = xfs_rmap_convert_extent(mp, dfops, ip, whichfork, new);
2787 if (error) 2797 if (error)
2788 goto done; 2798 goto done;
2789 2799
2790 /* convert to a btree if necessary */ 2800 /* convert to a btree if necessary */
2791 if (xfs_bmap_needs_btree(ip, XFS_DATA_FORK)) { 2801 if (xfs_bmap_needs_btree(ip, whichfork)) {
2792 int tmp_logflags; /* partial log flag return val */ 2802 int tmp_logflags; /* partial log flag return val */
2793 2803
2794 ASSERT(cur == NULL); 2804 ASSERT(cur == NULL);
2795 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur, 2805 error = xfs_bmap_extents_to_btree(tp, ip, first, dfops, &cur,
2796 0, &tmp_logflags, XFS_DATA_FORK); 2806 0, &tmp_logflags, whichfork);
2797 *logflagsp |= tmp_logflags; 2807 *logflagsp |= tmp_logflags;
2798 if (error) 2808 if (error)
2799 goto done; 2809 goto done;
@@ -2805,7 +2815,7 @@ xfs_bmap_add_extent_unwritten_real(
2805 *curp = cur; 2815 *curp = cur;
2806 } 2816 }
2807 2817
2808 xfs_bmap_check_leaf_extents(*curp, ip, XFS_DATA_FORK); 2818 xfs_bmap_check_leaf_extents(*curp, ip, whichfork);
2809done: 2819done:
2810 *logflagsp |= rval; 2820 *logflagsp |= rval;
2811 return error; 2821 return error;
@@ -4458,10 +4468,16 @@ xfs_bmapi_allocate(
4458 bma->got.br_state = XFS_EXT_NORM; 4468 bma->got.br_state = XFS_EXT_NORM;
4459 4469
4460 /* 4470 /*
4461 * A wasdelay extent has been initialized, so shouldn't be flagged 4471 * In the data fork, a wasdelay extent has been initialized, so
4462 * as unwritten. 4472 * shouldn't be flagged as unwritten.
4473 *
4474 * For the cow fork, however, we convert delalloc reservations
4475 * (extents allocated for speculative preallocation) to
4476 * allocated unwritten extents, and only convert the unwritten
4477 * extents to real extents when we're about to write the data.
4463 */ 4478 */
4464 if (!bma->wasdel && (bma->flags & XFS_BMAPI_PREALLOC) && 4479 if ((!bma->wasdel || (bma->flags & XFS_BMAPI_COWFORK)) &&
4480 (bma->flags & XFS_BMAPI_PREALLOC) &&
4465 xfs_sb_version_hasextflgbit(&mp->m_sb)) 4481 xfs_sb_version_hasextflgbit(&mp->m_sb))
4466 bma->got.br_state = XFS_EXT_UNWRITTEN; 4482 bma->got.br_state = XFS_EXT_UNWRITTEN;
4467 4483
@@ -4512,8 +4528,6 @@ xfs_bmapi_convert_unwritten(
4512 (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT)) 4528 (XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT))
4513 return 0; 4529 return 0;
4514 4530
4515 ASSERT(whichfork != XFS_COW_FORK);
4516
4517 /* 4531 /*
4518 * Modify (by adding) the state flag, if writing. 4532 * Modify (by adding) the state flag, if writing.
4519 */ 4533 */
@@ -4538,8 +4552,8 @@ xfs_bmapi_convert_unwritten(
4538 return error; 4552 return error;
4539 } 4553 }
4540 4554
4541 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, &bma->idx, 4555 error = xfs_bmap_add_extent_unwritten_real(bma->tp, bma->ip, whichfork,
4542 &bma->cur, mval, bma->firstblock, bma->dfops, 4556 &bma->idx, &bma->cur, mval, bma->firstblock, bma->dfops,
4543 &tmp_logflags); 4557 &tmp_logflags);
4544 /* 4558 /*
4545 * Log the inode core unconditionally in the unwritten extent conversion 4559 * Log the inode core unconditionally in the unwritten extent conversion
@@ -4548,8 +4562,12 @@ xfs_bmapi_convert_unwritten(
4548 * in the transaction for the sake of fsync(), even if nothing has 4562 * in the transaction for the sake of fsync(), even if nothing has
4549 * changed, because fsync() will not force the log for this transaction 4563 * changed, because fsync() will not force the log for this transaction
4550 * unless it sees the inode pinned. 4564 * unless it sees the inode pinned.
4565 *
4566 * Note: If we're only converting cow fork extents, there aren't
4567 * any on-disk updates to make, so we don't need to log anything.
4551 */ 4568 */
4552 bma->logflags |= tmp_logflags | XFS_ILOG_CORE; 4569 if (whichfork != XFS_COW_FORK)
4570 bma->logflags |= tmp_logflags | XFS_ILOG_CORE;
4553 if (error) 4571 if (error)
4554 return error; 4572 return error;
4555 4573
@@ -4623,15 +4641,15 @@ xfs_bmapi_write(
4623 ASSERT(*nmap >= 1); 4641 ASSERT(*nmap >= 1);
4624 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP); 4642 ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);
4625 ASSERT(!(flags & XFS_BMAPI_IGSTATE)); 4643 ASSERT(!(flags & XFS_BMAPI_IGSTATE));
4626 ASSERT(tp != NULL); 4644 ASSERT(tp != NULL ||
4645 (flags & (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK)) ==
4646 (XFS_BMAPI_CONVERT | XFS_BMAPI_COWFORK));
4627 ASSERT(len > 0); 4647 ASSERT(len > 0);
4628 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); 4648 ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL);
4629 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 4649 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
4630 ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK); 4650 ASSERT(!(flags & XFS_BMAPI_REMAP) || whichfork == XFS_DATA_FORK);
4631 ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP)); 4651 ASSERT(!(flags & XFS_BMAPI_PREALLOC) || !(flags & XFS_BMAPI_REMAP));
4632 ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP)); 4652 ASSERT(!(flags & XFS_BMAPI_CONVERT) || !(flags & XFS_BMAPI_REMAP));
4633 ASSERT(!(flags & XFS_BMAPI_PREALLOC) || whichfork != XFS_COW_FORK);
4634 ASSERT(!(flags & XFS_BMAPI_CONVERT) || whichfork != XFS_COW_FORK);
4635 4653
4636 /* zeroing is for currently only for data extents, not metadata */ 4654 /* zeroing is for currently only for data extents, not metadata */
4637 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) != 4655 ASSERT((flags & (XFS_BMAPI_METADATA | XFS_BMAPI_ZERO)) !=
@@ -5653,8 +5671,8 @@ __xfs_bunmapi(
5653 } 5671 }
5654 del.br_state = XFS_EXT_UNWRITTEN; 5672 del.br_state = XFS_EXT_UNWRITTEN;
5655 error = xfs_bmap_add_extent_unwritten_real(tp, ip, 5673 error = xfs_bmap_add_extent_unwritten_real(tp, ip,
5656 &lastx, &cur, &del, firstblock, dfops, 5674 whichfork, &lastx, &cur, &del,
5657 &logflags); 5675 firstblock, dfops, &logflags);
5658 if (error) 5676 if (error)
5659 goto error0; 5677 goto error0;
5660 goto nodelete; 5678 goto nodelete;
@@ -5711,8 +5729,9 @@ __xfs_bunmapi(
5711 prev.br_state = XFS_EXT_UNWRITTEN; 5729 prev.br_state = XFS_EXT_UNWRITTEN;
5712 lastx--; 5730 lastx--;
5713 error = xfs_bmap_add_extent_unwritten_real(tp, 5731 error = xfs_bmap_add_extent_unwritten_real(tp,
5714 ip, &lastx, &cur, &prev, 5732 ip, whichfork, &lastx, &cur,
5715 firstblock, dfops, &logflags); 5733 &prev, firstblock, dfops,
5734 &logflags);
5716 if (error) 5735 if (error)
5717 goto error0; 5736 goto error0;
5718 goto nodelete; 5737 goto nodelete;
@@ -5720,8 +5739,9 @@ __xfs_bunmapi(
5720 ASSERT(del.br_state == XFS_EXT_NORM); 5739 ASSERT(del.br_state == XFS_EXT_NORM);
5721 del.br_state = XFS_EXT_UNWRITTEN; 5740 del.br_state = XFS_EXT_UNWRITTEN;
5722 error = xfs_bmap_add_extent_unwritten_real(tp, 5741 error = xfs_bmap_add_extent_unwritten_real(tp,
5723 ip, &lastx, &cur, &del, 5742 ip, whichfork, &lastx, &cur,
5724 firstblock, dfops, &logflags); 5743 &del, firstblock, dfops,
5744 &logflags);
5725 if (error) 5745 if (error)
5726 goto error0; 5746 goto error0;
5727 goto nodelete; 5747 goto nodelete;