aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-04-24 15:06:16 -0400
committerAlex Elder <aelder@sgi.com>2011-04-28 14:18:04 -0400
commit97d3ac75e5e0ebf7ca38ae74cebd201c09b97ab2 (patch)
treee08af7a4bbb93c22dfeb1bcb2d3caf83aef717c9
parente26f0501cf743a4289603501413f97ffcb4612f2 (diff)
xfs: exact busy extent tracking
Update the extent tree in case we have to reuse a busy extent, so that it always is kept uptodate. This is done by replacing the busy list searches with a new xfs_alloc_busy_reuse helper, which updates the busy extent tree in case of a reuse. This allows us to allow reusing metadata extents unconditionally, and thus avoid log forces especially for allocation btree blocks. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h79
-rw-r--r--fs/xfs/xfs_ag.h1
-rw-r--r--fs/xfs/xfs_alloc.c373
-rw-r--r--fs/xfs/xfs_alloc.h4
-rw-r--r--fs/xfs/xfs_alloc_btree.c15
-rw-r--r--fs/xfs/xfs_log.c7
-rw-r--r--fs/xfs/xfs_log.h2
-rw-r--r--fs/xfs/xfs_log_priv.h2
-rw-r--r--fs/xfs/xfs_types.h2
9 files changed, 237 insertions, 248 deletions
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index e5252c4d95c9..37df9ad7fd1a 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap,
1151 1151
1152); 1152);
1153 1153
1154#define XFS_BUSY_SYNC \ 1154DECLARE_EVENT_CLASS(xfs_busy_class,
1155 { 0, "async" }, \
1156 { 1, "sync" }
1157
1158TRACE_EVENT(xfs_alloc_busy,
1159 TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno,
1160 xfs_agblock_t agbno, xfs_extlen_t len, int sync),
1161 TP_ARGS(trans, agno, agbno, len, sync),
1162 TP_STRUCT__entry(
1163 __field(dev_t, dev)
1164 __field(struct xfs_trans *, tp)
1165 __field(int, tid)
1166 __field(xfs_agnumber_t, agno)
1167 __field(xfs_agblock_t, agbno)
1168 __field(xfs_extlen_t, len)
1169 __field(int, sync)
1170 ),
1171 TP_fast_assign(
1172 __entry->dev = trans->t_mountp->m_super->s_dev;
1173 __entry->tp = trans;
1174 __entry->tid = trans->t_ticket->t_tid;
1175 __entry->agno = agno;
1176 __entry->agbno = agbno;
1177 __entry->len = len;
1178 __entry->sync = sync;
1179 ),
1180 TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s",
1181 MAJOR(__entry->dev), MINOR(__entry->dev),
1182 __entry->tp,
1183 __entry->tid,
1184 __entry->agno,
1185 __entry->agbno,
1186 __entry->len,
1187 __print_symbolic(__entry->sync, XFS_BUSY_SYNC))
1188
1189);
1190
1191TRACE_EVENT(xfs_alloc_unbusy,
1192 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1155 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1193 xfs_agblock_t agbno, xfs_extlen_t len), 1156 xfs_agblock_t agbno, xfs_extlen_t len),
1194 TP_ARGS(mp, agno, agbno, len), 1157 TP_ARGS(mp, agno, agbno, len),
@@ -1210,36 +1173,16 @@ TRACE_EVENT(xfs_alloc_unbusy,
1210 __entry->agbno, 1173 __entry->agbno,
1211 __entry->len) 1174 __entry->len)
1212); 1175);
1213 1176#define DEFINE_BUSY_EVENT(name) \
1214#define XFS_BUSY_STATES \ 1177DEFINE_EVENT(xfs_busy_class, name, \
1215 { 0, "missing" }, \ 1178 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
1216 { 1, "found" } 1179 xfs_agblock_t agbno, xfs_extlen_t len), \
1217 1180 TP_ARGS(mp, agno, agbno, len))
1218TRACE_EVENT(xfs_alloc_busysearch, 1181DEFINE_BUSY_EVENT(xfs_alloc_busy);
1219 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1182DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem);
1220 xfs_agblock_t agbno, xfs_extlen_t len, int found), 1183DEFINE_BUSY_EVENT(xfs_alloc_busy_force);
1221 TP_ARGS(mp, agno, agbno, len, found), 1184DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse);
1222 TP_STRUCT__entry( 1185DEFINE_BUSY_EVENT(xfs_alloc_busy_clear);
1223 __field(dev_t, dev)
1224 __field(xfs_agnumber_t, agno)
1225 __field(xfs_agblock_t, agbno)
1226 __field(xfs_extlen_t, len)
1227 __field(int, found)
1228 ),
1229 TP_fast_assign(
1230 __entry->dev = mp->m_super->s_dev;
1231 __entry->agno = agno;
1232 __entry->agbno = agbno;
1233 __entry->len = len;
1234 __entry->found = found;
1235 ),
1236 TP_printk("dev %d:%d agno %u agbno %u len %u %s",
1237 MAJOR(__entry->dev), MINOR(__entry->dev),
1238 __entry->agno,
1239 __entry->agbno,
1240 __entry->len,
1241 __print_symbolic(__entry->found, XFS_BUSY_STATES))
1242);
1243 1186
1244TRACE_EVENT(xfs_alloc_busy_trim, 1187TRACE_EVENT(xfs_alloc_busy_trim,
1245 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, 1188 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h
index 58632cc17f2d..da0a561ffba2 100644
--- a/fs/xfs/xfs_ag.h
+++ b/fs/xfs/xfs_ag.h
@@ -187,7 +187,6 @@ struct xfs_busy_extent {
187 xfs_agnumber_t agno; 187 xfs_agnumber_t agno;
188 xfs_agblock_t bno; 188 xfs_agblock_t bno;
189 xfs_extlen_t length; 189 xfs_extlen_t length;
190 xlog_tid_t tid; /* transaction that created this */
191}; 190};
192 191
193/* 192/*
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index ff54ef428348..53157d4d5e8b 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1396,8 +1396,9 @@ xfs_alloc_ag_vextent_small(
1396 if (error) 1396 if (error)
1397 goto error0; 1397 goto error0;
1398 if (fbno != NULLAGBLOCK) { 1398 if (fbno != NULLAGBLOCK) {
1399 if (xfs_alloc_busy_search(args->mp, args->agno, fbno, 1)) 1399 xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1,
1400 xfs_trans_set_sync(args->tp); 1400 args->userdata);
1401
1401 if (args->userdata) { 1402 if (args->userdata) {
1402 xfs_buf_t *bp; 1403 xfs_buf_t *bp;
1403 1404
@@ -2475,100 +2476,6 @@ error0:
2475 return error; 2476 return error;
2476} 2477}
2477 2478
2478
2479/*
2480 * AG Busy list management
2481 * The busy list contains block ranges that have been freed but whose
2482 * transactions have not yet hit disk. If any block listed in a busy
2483 * list is reused, the transaction that freed it must be forced to disk
2484 * before continuing to use the block.
2485 *
2486 * xfs_alloc_busy_insert - add to the per-ag busy list
2487 * xfs_alloc_busy_clear - remove an item from the per-ag busy list
2488 * xfs_alloc_busy_search - search for a busy extent
2489 */
2490
2491/*
2492 * Insert a new extent into the busy tree.
2493 *
2494 * The busy extent tree is indexed by the start block of the busy extent.
2495 * there can be multiple overlapping ranges in the busy extent tree but only
2496 * ever one entry at a given start block. The reason for this is that
2497 * multi-block extents can be freed, then smaller chunks of that extent
2498 * allocated and freed again before the first transaction commit is on disk.
2499 * If the exact same start block is freed a second time, we have to wait for
2500 * that busy extent to pass out of the tree before the new extent is inserted.
2501 * There are two main cases we have to handle here.
2502 *
2503 * The first case is a transaction that triggers a "free - allocate - free"
2504 * cycle. This can occur during btree manipulations as a btree block is freed
2505 * to the freelist, then allocated from the free list, then freed again. In
2506 * this case, the second extxpnet free is what triggers the duplicate and as
2507 * such the transaction IDs should match. Because the extent was allocated in
2508 * this transaction, the transaction must be marked as synchronous. This is
2509 * true for all cases where the free/alloc/free occurs in the one transaction,
2510 * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
2511 * This serves to catch violations of the second case quite effectively.
2512 *
2513 * The second case is where the free/alloc/free occur in different
2514 * transactions. In this case, the thread freeing the extent the second time
2515 * can't mark the extent busy immediately because it is already tracked in a
2516 * transaction that may be committing. When the log commit for the existing
2517 * busy extent completes, the busy extent will be removed from the tree. If we
2518 * allow the second busy insert to continue using that busy extent structure,
2519 * it can be freed before this transaction is safely in the log. Hence our
2520 * only option in this case is to force the log to remove the existing busy
2521 * extent from the list before we insert the new one with the current
2522 * transaction ID.
2523 *
2524 * The problem we are trying to avoid in the free-alloc-free in separate
2525 * transactions is most easily described with a timeline:
2526 *
2527 * Thread 1 Thread 2 Thread 3 xfslogd
2528 * xact alloc
2529 * free X
2530 * mark busy
2531 * commit xact
2532 * free xact
2533 * xact alloc
2534 * alloc X
2535 * busy search
2536 * mark xact sync
2537 * commit xact
2538 * free xact
2539 * force log
2540 * checkpoint starts
2541 * ....
2542 * xact alloc
2543 * free X
2544 * mark busy
2545 * finds match
2546 * *** KABOOM! ***
2547 * ....
2548 * log IO completes
2549 * unbusy X
2550 * checkpoint completes
2551 *
2552 * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
2553 * the checkpoint completes, and the busy extent it matched will have been
2554 * removed from the tree when it is woken. Hence it can then continue safely.
2555 *
2556 * However, to ensure this matching process is robust, we need to use the
2557 * transaction ID for identifying transaction, as delayed logging results in
2558 * the busy extent and transaction lifecycles being different. i.e. the busy
2559 * extent is active for a lot longer than the transaction. Hence the
2560 * transaction structure can be freed and reallocated, then mark the same
2561 * extent busy again in the new transaction. In this case the new transaction
2562 * will have a different tid but can have the same address, and hence we need
2563 * to check against the tid.
2564 *
2565 * Future: for delayed logging, we could avoid the log force if the extent was
2566 * first freed in the current checkpoint sequence. This, however, requires the
2567 * ability to pin the current checkpoint in memory until this transaction
2568 * commits to ensure that both the original free and the current one combine
2569 * logically into the one checkpoint. If the checkpoint sequences are
2570 * different, however, we still need to wait on a log force.
2571 */
2572void 2479void
2573xfs_alloc_busy_insert( 2480xfs_alloc_busy_insert(
2574 struct xfs_trans *tp, 2481 struct xfs_trans *tp,
@@ -2580,9 +2487,7 @@ xfs_alloc_busy_insert(
2580 struct xfs_busy_extent *busyp; 2487 struct xfs_busy_extent *busyp;
2581 struct xfs_perag *pag; 2488 struct xfs_perag *pag;
2582 struct rb_node **rbp; 2489 struct rb_node **rbp;
2583 struct rb_node *parent; 2490 struct rb_node *parent = NULL;
2584 int match;
2585
2586 2491
2587 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); 2492 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
2588 if (!new) { 2493 if (!new) {
@@ -2591,7 +2496,7 @@ xfs_alloc_busy_insert(
2591 * block, make this a synchronous transaction to insure that 2496 * block, make this a synchronous transaction to insure that
2592 * the block is not reused before this transaction commits. 2497 * the block is not reused before this transaction commits.
2593 */ 2498 */
2594 trace_xfs_alloc_busy(tp, agno, bno, len, 1); 2499 trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
2595 xfs_trans_set_sync(tp); 2500 xfs_trans_set_sync(tp);
2596 return; 2501 return;
2597 } 2502 }
@@ -2599,66 +2504,28 @@ xfs_alloc_busy_insert(
2599 new->agno = agno; 2504 new->agno = agno;
2600 new->bno = bno; 2505 new->bno = bno;
2601 new->length = len; 2506 new->length = len;
2602 new->tid = xfs_log_get_trans_ident(tp);
2603
2604 INIT_LIST_HEAD(&new->list); 2507 INIT_LIST_HEAD(&new->list);
2605 2508
2606 /* trace before insert to be able to see failed inserts */ 2509 /* trace before insert to be able to see failed inserts */
2607 trace_xfs_alloc_busy(tp, agno, bno, len, 0); 2510 trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
2608 2511
2609 pag = xfs_perag_get(tp->t_mountp, new->agno); 2512 pag = xfs_perag_get(tp->t_mountp, new->agno);
2610restart:
2611 spin_lock(&pag->pagb_lock); 2513 spin_lock(&pag->pagb_lock);
2612 rbp = &pag->pagb_tree.rb_node; 2514 rbp = &pag->pagb_tree.rb_node;
2613 parent = NULL; 2515 while (*rbp) {
2614 busyp = NULL;
2615 match = 0;
2616 while (*rbp && match >= 0) {
2617 parent = *rbp; 2516 parent = *rbp;
2618 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); 2517 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
2619 2518
2620 if (new->bno < busyp->bno) { 2519 if (new->bno < busyp->bno) {
2621 /* may overlap, but exact start block is lower */
2622 rbp = &(*rbp)->rb_left; 2520 rbp = &(*rbp)->rb_left;
2623 if (new->bno + new->length > busyp->bno) 2521 ASSERT(new->bno + new->length <= busyp->bno);
2624 match = busyp->tid == new->tid ? 1 : -1;
2625 } else if (new->bno > busyp->bno) { 2522 } else if (new->bno > busyp->bno) {
2626 /* may overlap, but exact start block is higher */
2627 rbp = &(*rbp)->rb_right; 2523 rbp = &(*rbp)->rb_right;
2628 if (bno < busyp->bno + busyp->length) 2524 ASSERT(bno >= busyp->bno + busyp->length);
2629 match = busyp->tid == new->tid ? 1 : -1;
2630 } else { 2525 } else {
2631 match = busyp->tid == new->tid ? 1 : -1; 2526 ASSERT(0);
2632 break;
2633 } 2527 }
2634 } 2528 }
2635 if (match < 0) {
2636 /* overlap marked busy in different transaction */
2637 spin_unlock(&pag->pagb_lock);
2638 xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
2639 goto restart;
2640 }
2641 if (match > 0) {
2642 /*
2643 * overlap marked busy in same transaction. Update if exact
2644 * start block match, otherwise combine the busy extents into
2645 * a single range.
2646 */
2647 if (busyp->bno == new->bno) {
2648 busyp->length = max(busyp->length, new->length);
2649 spin_unlock(&pag->pagb_lock);
2650 ASSERT(tp->t_flags & XFS_TRANS_SYNC);
2651 xfs_perag_put(pag);
2652 kmem_free(new);
2653 return;
2654 }
2655 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2656 new->length = max(busyp->bno + busyp->length,
2657 new->bno + new->length) -
2658 min(busyp->bno, new->bno);
2659 new->bno = min(busyp->bno, new->bno);
2660 } else
2661 busyp = NULL;
2662 2529
2663 rb_link_node(&new->rb_node, parent, rbp); 2530 rb_link_node(&new->rb_node, parent, rbp);
2664 rb_insert_color(&new->rb_node, &pag->pagb_tree); 2531 rb_insert_color(&new->rb_node, &pag->pagb_tree);
@@ -2666,7 +2533,6 @@ restart:
2666 list_add(&new->list, &tp->t_busy); 2533 list_add(&new->list, &tp->t_busy);
2667 spin_unlock(&pag->pagb_lock); 2534 spin_unlock(&pag->pagb_lock);
2668 xfs_perag_put(pag); 2535 xfs_perag_put(pag);
2669 kmem_free(busyp);
2670} 2536}
2671 2537
2672/* 2538/*
@@ -2715,12 +2581,196 @@ xfs_alloc_busy_search(
2715 } 2581 }
2716 } 2582 }
2717 spin_unlock(&pag->pagb_lock); 2583 spin_unlock(&pag->pagb_lock);
2718 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
2719 xfs_perag_put(pag); 2584 xfs_perag_put(pag);
2720 return match; 2585 return match;
2721} 2586}
2722 2587
2723/* 2588/*
2589 * The found free extent [fbno, fend] overlaps part or all of the given busy
2590 * extent. If the overlap covers the beginning, the end, or all of the busy
2591 * extent, the overlapping portion can be made unbusy and used for the
2592 * allocation. We can't split a busy extent because we can't modify a
2593 * transaction/CIL context busy list, but we can update an entries block
2594 * number or length.
2595 *
2596 * Returns true if the extent can safely be reused, or false if the search
2597 * needs to be restarted.
2598 */
2599STATIC bool
2600xfs_alloc_busy_update_extent(
2601 struct xfs_mount *mp,
2602 struct xfs_perag *pag,
2603 struct xfs_busy_extent *busyp,
2604 xfs_agblock_t fbno,
2605 xfs_extlen_t flen,
2606 bool userdata)
2607{
2608 xfs_agblock_t fend = fbno + flen;
2609 xfs_agblock_t bbno = busyp->bno;
2610 xfs_agblock_t bend = bbno + busyp->length;
2611
2612 /*
2613 * If there is a busy extent overlapping a user allocation, we have
2614 * no choice but to force the log and retry the search.
2615 *
2616 * Fortunately this does not happen during normal operation, but
2617 * only if the filesystem is very low on space and has to dip into
2618 * the AGFL for normal allocations.
2619 */
2620 if (userdata)
2621 goto out_force_log;
2622
2623 if (bbno < fbno && bend > fend) {
2624 /*
2625 * Case 1:
2626 * bbno bend
2627 * +BBBBBBBBBBBBBBBBB+
2628 * +---------+
2629 * fbno fend
2630 */
2631
2632 /*
2633 * We would have to split the busy extent to be able to track
2634 * it correct, which we cannot do because we would have to
2635 * modify the list of busy extents attached to the transaction
2636 * or CIL context, which is immutable.
2637 *
2638 * Force out the log to clear the busy extent and retry the
2639 * search.
2640 */
2641 goto out_force_log;
2642 } else if (bbno >= fbno && bend <= fend) {
2643 /*
2644 * Case 2:
2645 * bbno bend
2646 * +BBBBBBBBBBBBBBBBB+
2647 * +-----------------+
2648 * fbno fend
2649 *
2650 * Case 3:
2651 * bbno bend
2652 * +BBBBBBBBBBBBBBBBB+
2653 * +--------------------------+
2654 * fbno fend
2655 *
2656 * Case 4:
2657 * bbno bend
2658 * +BBBBBBBBBBBBBBBBB+
2659 * +--------------------------+
2660 * fbno fend
2661 *
2662 * Case 5:
2663 * bbno bend
2664 * +BBBBBBBBBBBBBBBBB+
2665 * +-----------------------------------+
2666 * fbno fend
2667 *
2668 */
2669
2670 /*
2671 * The busy extent is fully covered by the extent we are
2672 * allocating, and can simply be removed from the rbtree.
2673 * However we cannot remove it from the immutable list
2674 * tracking busy extents in the transaction or CIL context,
2675 * so set the length to zero to mark it invalid.
2676 *
2677 * We also need to restart the busy extent search from the
2678 * tree root, because erasing the node can rearrange the
2679 * tree topology.
2680 */
2681 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2682 busyp->length = 0;
2683 return false;
2684 } else if (fend < bend) {
2685 /*
2686 * Case 6:
2687 * bbno bend
2688 * +BBBBBBBBBBBBBBBBB+
2689 * +---------+
2690 * fbno fend
2691 *
2692 * Case 7:
2693 * bbno bend
2694 * +BBBBBBBBBBBBBBBBB+
2695 * +------------------+
2696 * fbno fend
2697 *
2698 */
2699 busyp->bno = fend;
2700 } else if (bbno < fbno) {
2701 /*
2702 * Case 8:
2703 * bbno bend
2704 * +BBBBBBBBBBBBBBBBB+
2705 * +-------------+
2706 * fbno fend
2707 *
2708 * Case 9:
2709 * bbno bend
2710 * +BBBBBBBBBBBBBBBBB+
2711 * +----------------------+
2712 * fbno fend
2713 */
2714 busyp->length = fbno - busyp->bno;
2715 } else {
2716 ASSERT(0);
2717 }
2718
2719 trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
2720 return true;
2721
2722out_force_log:
2723 spin_unlock(&pag->pagb_lock);
2724 xfs_log_force(mp, XFS_LOG_SYNC);
2725 trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
2726 spin_lock(&pag->pagb_lock);
2727 return false;
2728}
2729
2730
2731/*
2732 * For a given extent [fbno, flen], make sure we can reuse it safely.
2733 */
2734void
2735xfs_alloc_busy_reuse(
2736 struct xfs_mount *mp,
2737 xfs_agnumber_t agno,
2738 xfs_agblock_t fbno,
2739 xfs_extlen_t flen,
2740 bool userdata)
2741{
2742 struct xfs_perag *pag;
2743 struct rb_node *rbp;
2744
2745 ASSERT(flen > 0);
2746
2747 pag = xfs_perag_get(mp, agno);
2748 spin_lock(&pag->pagb_lock);
2749restart:
2750 rbp = pag->pagb_tree.rb_node;
2751 while (rbp) {
2752 struct xfs_busy_extent *busyp =
2753 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2754 xfs_agblock_t bbno = busyp->bno;
2755 xfs_agblock_t bend = bbno + busyp->length;
2756
2757 if (fbno + flen <= bbno) {
2758 rbp = rbp->rb_left;
2759 continue;
2760 } else if (fbno >= bend) {
2761 rbp = rbp->rb_right;
2762 continue;
2763 }
2764
2765 if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
2766 userdata))
2767 goto restart;
2768 }
2769 spin_unlock(&pag->pagb_lock);
2770 xfs_perag_put(pag);
2771}
2772
2773/*
2724 * For a given extent [fbno, flen], search the busy extent list to find a 2774 * For a given extent [fbno, flen], search the busy extent list to find a
2725 * subset of the extent that is not busy. If *rlen is smaller than 2775 * subset of the extent that is not busy. If *rlen is smaller than
2726 * args->minlen no suitable extent could be found, and the higher level 2776 * args->minlen no suitable extent could be found, and the higher level
@@ -2734,13 +2784,16 @@ xfs_alloc_busy_trim(
2734 xfs_agblock_t *rbno, 2784 xfs_agblock_t *rbno,
2735 xfs_extlen_t *rlen) 2785 xfs_extlen_t *rlen)
2736{ 2786{
2737 xfs_agblock_t fbno = bno; 2787 xfs_agblock_t fbno;
2738 xfs_extlen_t flen = len; 2788 xfs_extlen_t flen;
2739 struct rb_node *rbp; 2789 struct rb_node *rbp;
2740 2790
2741 ASSERT(flen > 0); 2791 ASSERT(len > 0);
2742 2792
2743 spin_lock(&args->pag->pagb_lock); 2793 spin_lock(&args->pag->pagb_lock);
2794restart:
2795 fbno = bno;
2796 flen = len;
2744 rbp = args->pag->pagb_tree.rb_node; 2797 rbp = args->pag->pagb_tree.rb_node;
2745 while (rbp && flen >= args->minlen) { 2798 while (rbp && flen >= args->minlen) {
2746 struct xfs_busy_extent *busyp = 2799 struct xfs_busy_extent *busyp =
@@ -2757,6 +2810,18 @@ xfs_alloc_busy_trim(
2757 continue; 2810 continue;
2758 } 2811 }
2759 2812
2813 /*
2814 * If this is a metadata allocation, try to reuse the busy
2815 * extent instead of trimming the allocation.
2816 */
2817 if (!args->userdata) {
2818 if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
2819 busyp, fbno, flen,
2820 false))
2821 goto restart;
2822 continue;
2823 }
2824
2760 if (bbno <= fbno) { 2825 if (bbno <= fbno) {
2761 /* start overlap */ 2826 /* start overlap */
2762 2827
@@ -2906,17 +2971,15 @@ xfs_alloc_busy_clear(
2906{ 2971{
2907 struct xfs_perag *pag; 2972 struct xfs_perag *pag;
2908 2973
2909 trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
2910 busyp->length);
2911
2912 ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno,
2913 busyp->length) == 1);
2914
2915 list_del_init(&busyp->list); 2974 list_del_init(&busyp->list);
2916 2975
2917 pag = xfs_perag_get(mp, busyp->agno); 2976 pag = xfs_perag_get(mp, busyp->agno);
2918 spin_lock(&pag->pagb_lock); 2977 spin_lock(&pag->pagb_lock);
2919 rb_erase(&busyp->rb_node, &pag->pagb_tree); 2978 if (busyp->length) {
2979 trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
2980 busyp->length);
2981 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2982 }
2920 spin_unlock(&pag->pagb_lock); 2983 spin_unlock(&pag->pagb_lock);
2921 xfs_perag_put(pag); 2984 xfs_perag_put(pag);
2922 2985
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index d0b3bc72005b..de5e27c95170 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -145,6 +145,10 @@ xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp);
145int 145int
146xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, 146xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
147 xfs_agblock_t bno, xfs_extlen_t len); 147 xfs_agblock_t bno, xfs_extlen_t len);
148
149void
150xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
151 xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
148#endif /* __KERNEL__ */ 152#endif /* __KERNEL__ */
149 153
150/* 154/*
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c
index bcfe92f47edd..8b469d53599f 100644
--- a/fs/xfs/xfs_alloc_btree.c
+++ b/fs/xfs/xfs_alloc_btree.c
@@ -94,8 +94,8 @@ xfs_allocbt_alloc_block(
94 *stat = 0; 94 *stat = 0;
95 return 0; 95 return 0;
96 } 96 }
97 if (xfs_alloc_busy_search(cur->bc_mp, cur->bc_private.a.agno, bno, 1)) 97
98 xfs_trans_set_sync(cur->bc_tp); 98 xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false);
99 99
100 xfs_trans_agbtree_delta(cur->bc_tp, 1); 100 xfs_trans_agbtree_delta(cur->bc_tp, 1);
101 new->s = cpu_to_be32(bno); 101 new->s = cpu_to_be32(bno);
@@ -120,17 +120,6 @@ xfs_allocbt_free_block(
120 if (error) 120 if (error)
121 return error; 121 return error;
122 122
123 /*
124 * Since blocks move to the free list without the coordination used in
125 * xfs_bmap_finish, we can't allow block to be available for
126 * reallocation and non-transaction writing (user data) until we know
127 * that the transaction that moved it to the free list is permanently
128 * on disk. We track the blocks by declaring these blocks as "busy";
129 * the busy list is maintained on a per-ag basis and each transaction
130 * records which entries should be removed when the iclog commits to
131 * disk. If a busy block is allocated, the iclog is pushed up to the
132 * LSN that freed the block.
133 */
134 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); 123 xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1);
135 xfs_trans_agbtree_delta(cur->bc_tp, -1); 124 xfs_trans_agbtree_delta(cur->bc_tp, -1);
136 return 0; 125 return 0;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index b612ce4520ae..18fd4beffcc3 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3248,13 +3248,6 @@ xfs_log_ticket_get(
3248 return ticket; 3248 return ticket;
3249} 3249}
3250 3250
3251xlog_tid_t
3252xfs_log_get_trans_ident(
3253 struct xfs_trans *tp)
3254{
3255 return tp->t_ticket->t_tid;
3256}
3257
3258/* 3251/*
3259 * Allocate and initialise a new log ticket. 3252 * Allocate and initialise a new log ticket.
3260 */ 3253 */
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 3bd3291ef8d2..78c9039994af 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *);
189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); 189struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);
190void xfs_log_ticket_put(struct xlog_ticket *ticket); 190void xfs_log_ticket_put(struct xlog_ticket *ticket);
191 191
192xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp);
193
194void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, 192void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,
195 struct xfs_log_vec *log_vector, 193 struct xfs_log_vec *log_vector,
196 xfs_lsn_t *commit_lsn, int flags); 194 xfs_lsn_t *commit_lsn, int flags);
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 5864850e9e34..2d3b6a498d63 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i)
146 shutdown */ 146 shutdown */
147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ 147#define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */
148 148
149typedef __uint32_t xlog_tid_t;
150
149#ifdef __KERNEL__ 151#ifdef __KERNEL__
150/* 152/*
151 * Below are states for covering allocation transactions. 153 * Below are states for covering allocation transactions.
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 26d1867d8156..65584b55607d 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */
73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ 73typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ 74typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */
75 75
76typedef __uint32_t xlog_tid_t; /* transaction ID type */
77
78/* 76/*
79 * These types are 64 bits on disk but are either 32 or 64 bits in memory. 77 * These types are 64 bits on disk but are either 32 or 64 bits in memory.
80 * Disk based types: 78 * Disk based types: