diff options
author | Christoph Hellwig <hch@infradead.org> | 2011-04-24 15:06:16 -0400 |
---|---|---|
committer | Alex Elder <aelder@sgi.com> | 2011-04-28 14:18:04 -0400 |
commit | 97d3ac75e5e0ebf7ca38ae74cebd201c09b97ab2 (patch) | |
tree | e08af7a4bbb93c22dfeb1bcb2d3caf83aef717c9 /fs/xfs | |
parent | e26f0501cf743a4289603501413f97ffcb4612f2 (diff) |
xfs: exact busy extent tracking
Update the extent tree in case we have to reuse a busy extent, so that it
always is kept uptodate. This is done by replacing the busy list searches
with a new xfs_alloc_busy_reuse helper, which updates the busy extent tree
in case of a reuse. This allows us to allow reusing metadata extents
unconditionally, and thus avoid log forces especially for allocation btree
blocks.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/linux-2.6/xfs_trace.h | 79 | ||||
-rw-r--r-- | fs/xfs/xfs_ag.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.c | 373 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc_btree.c | 15 | ||||
-rw-r--r-- | fs/xfs/xfs_log.c | 7 | ||||
-rw-r--r-- | fs/xfs/xfs_log.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_priv.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_types.h | 2 |
9 files changed, 237 insertions, 248 deletions
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index e5252c4d95c9..37df9ad7fd1a 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap, | |||
1151 | 1151 | ||
1152 | ); | 1152 | ); |
1153 | 1153 | ||
1154 | #define XFS_BUSY_SYNC \ | 1154 | DECLARE_EVENT_CLASS(xfs_busy_class, |
1155 | { 0, "async" }, \ | ||
1156 | { 1, "sync" } | ||
1157 | |||
1158 | TRACE_EVENT(xfs_alloc_busy, | ||
1159 | TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, | ||
1160 | xfs_agblock_t agbno, xfs_extlen_t len, int sync), | ||
1161 | TP_ARGS(trans, agno, agbno, len, sync), | ||
1162 | TP_STRUCT__entry( | ||
1163 | __field(dev_t, dev) | ||
1164 | __field(struct xfs_trans *, tp) | ||
1165 | __field(int, tid) | ||
1166 | __field(xfs_agnumber_t, agno) | ||
1167 | __field(xfs_agblock_t, agbno) | ||
1168 | __field(xfs_extlen_t, len) | ||
1169 | __field(int, sync) | ||
1170 | ), | ||
1171 | TP_fast_assign( | ||
1172 | __entry->dev = trans->t_mountp->m_super->s_dev; | ||
1173 | __entry->tp = trans; | ||
1174 | __entry->tid = trans->t_ticket->t_tid; | ||
1175 | __entry->agno = agno; | ||
1176 | __entry->agbno = agbno; | ||
1177 | __entry->len = len; | ||
1178 | __entry->sync = sync; | ||
1179 | ), | ||
1180 | TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", | ||
1181 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1182 | __entry->tp, | ||
1183 | __entry->tid, | ||
1184 | __entry->agno, | ||
1185 | __entry->agbno, | ||
1186 | __entry->len, | ||
1187 | __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) | ||
1188 | |||
1189 | ); | ||
1190 | |||
1191 | TRACE_EVENT(xfs_alloc_unbusy, | ||
1192 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1155 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
1193 | xfs_agblock_t agbno, xfs_extlen_t len), | 1156 | xfs_agblock_t agbno, xfs_extlen_t len), |
1194 | TP_ARGS(mp, agno, agbno, len), | 1157 | TP_ARGS(mp, agno, agbno, len), |
@@ -1210,36 +1173,16 @@ TRACE_EVENT(xfs_alloc_unbusy, | |||
1210 | __entry->agbno, | 1173 | __entry->agbno, |
1211 | __entry->len) | 1174 | __entry->len) |
1212 | ); | 1175 | ); |
1213 | 1176 | #define DEFINE_BUSY_EVENT(name) \ | |
1214 | #define XFS_BUSY_STATES \ | 1177 | DEFINE_EVENT(xfs_busy_class, name, \ |
1215 | { 0, "missing" }, \ | 1178 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ |
1216 | { 1, "found" } | 1179 | xfs_agblock_t agbno, xfs_extlen_t len), \ |
1217 | 1180 | TP_ARGS(mp, agno, agbno, len)) | |
1218 | TRACE_EVENT(xfs_alloc_busysearch, | 1181 | DEFINE_BUSY_EVENT(xfs_alloc_busy); |
1219 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1182 | DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); |
1220 | xfs_agblock_t agbno, xfs_extlen_t len, int found), | 1183 | DEFINE_BUSY_EVENT(xfs_alloc_busy_force); |
1221 | TP_ARGS(mp, agno, agbno, len, found), | 1184 | DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); |
1222 | TP_STRUCT__entry( | 1185 | DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); |
1223 | __field(dev_t, dev) | ||
1224 | __field(xfs_agnumber_t, agno) | ||
1225 | __field(xfs_agblock_t, agbno) | ||
1226 | __field(xfs_extlen_t, len) | ||
1227 | __field(int, found) | ||
1228 | ), | ||
1229 | TP_fast_assign( | ||
1230 | __entry->dev = mp->m_super->s_dev; | ||
1231 | __entry->agno = agno; | ||
1232 | __entry->agbno = agbno; | ||
1233 | __entry->len = len; | ||
1234 | __entry->found = found; | ||
1235 | ), | ||
1236 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", | ||
1237 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
1238 | __entry->agno, | ||
1239 | __entry->agbno, | ||
1240 | __entry->len, | ||
1241 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | ||
1242 | ); | ||
1243 | 1186 | ||
1244 | TRACE_EVENT(xfs_alloc_busy_trim, | 1187 | TRACE_EVENT(xfs_alloc_busy_trim, |
1245 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1188 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 58632cc17f2d..da0a561ffba2 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -187,7 +187,6 @@ struct xfs_busy_extent { | |||
187 | xfs_agnumber_t agno; | 187 | xfs_agnumber_t agno; |
188 | xfs_agblock_t bno; | 188 | xfs_agblock_t bno; |
189 | xfs_extlen_t length; | 189 | xfs_extlen_t length; |
190 | xlog_tid_t tid; /* transaction that created this */ | ||
191 | }; | 190 | }; |
192 | 191 | ||
193 | /* | 192 | /* |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index ff54ef428348..53157d4d5e8b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -1396,8 +1396,9 @@ xfs_alloc_ag_vextent_small( | |||
1396 | if (error) | 1396 | if (error) |
1397 | goto error0; | 1397 | goto error0; |
1398 | if (fbno != NULLAGBLOCK) { | 1398 | if (fbno != NULLAGBLOCK) { |
1399 | if (xfs_alloc_busy_search(args->mp, args->agno, fbno, 1)) | 1399 | xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, |
1400 | xfs_trans_set_sync(args->tp); | 1400 | args->userdata); |
1401 | |||
1401 | if (args->userdata) { | 1402 | if (args->userdata) { |
1402 | xfs_buf_t *bp; | 1403 | xfs_buf_t *bp; |
1403 | 1404 | ||
@@ -2475,100 +2476,6 @@ error0: | |||
2475 | return error; | 2476 | return error; |
2476 | } | 2477 | } |
2477 | 2478 | ||
2478 | |||
2479 | /* | ||
2480 | * AG Busy list management | ||
2481 | * The busy list contains block ranges that have been freed but whose | ||
2482 | * transactions have not yet hit disk. If any block listed in a busy | ||
2483 | * list is reused, the transaction that freed it must be forced to disk | ||
2484 | * before continuing to use the block. | ||
2485 | * | ||
2486 | * xfs_alloc_busy_insert - add to the per-ag busy list | ||
2487 | * xfs_alloc_busy_clear - remove an item from the per-ag busy list | ||
2488 | * xfs_alloc_busy_search - search for a busy extent | ||
2489 | */ | ||
2490 | |||
2491 | /* | ||
2492 | * Insert a new extent into the busy tree. | ||
2493 | * | ||
2494 | * The busy extent tree is indexed by the start block of the busy extent. | ||
2495 | * there can be multiple overlapping ranges in the busy extent tree but only | ||
2496 | * ever one entry at a given start block. The reason for this is that | ||
2497 | * multi-block extents can be freed, then smaller chunks of that extent | ||
2498 | * allocated and freed again before the first transaction commit is on disk. | ||
2499 | * If the exact same start block is freed a second time, we have to wait for | ||
2500 | * that busy extent to pass out of the tree before the new extent is inserted. | ||
2501 | * There are two main cases we have to handle here. | ||
2502 | * | ||
2503 | * The first case is a transaction that triggers a "free - allocate - free" | ||
2504 | * cycle. This can occur during btree manipulations as a btree block is freed | ||
2505 | * to the freelist, then allocated from the free list, then freed again. In | ||
2506 | * this case, the second extxpnet free is what triggers the duplicate and as | ||
2507 | * such the transaction IDs should match. Because the extent was allocated in | ||
2508 | * this transaction, the transaction must be marked as synchronous. This is | ||
2509 | * true for all cases where the free/alloc/free occurs in the one transaction, | ||
2510 | * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. | ||
2511 | * This serves to catch violations of the second case quite effectively. | ||
2512 | * | ||
2513 | * The second case is where the free/alloc/free occur in different | ||
2514 | * transactions. In this case, the thread freeing the extent the second time | ||
2515 | * can't mark the extent busy immediately because it is already tracked in a | ||
2516 | * transaction that may be committing. When the log commit for the existing | ||
2517 | * busy extent completes, the busy extent will be removed from the tree. If we | ||
2518 | * allow the second busy insert to continue using that busy extent structure, | ||
2519 | * it can be freed before this transaction is safely in the log. Hence our | ||
2520 | * only option in this case is to force the log to remove the existing busy | ||
2521 | * extent from the list before we insert the new one with the current | ||
2522 | * transaction ID. | ||
2523 | * | ||
2524 | * The problem we are trying to avoid in the free-alloc-free in separate | ||
2525 | * transactions is most easily described with a timeline: | ||
2526 | * | ||
2527 | * Thread 1 Thread 2 Thread 3 xfslogd | ||
2528 | * xact alloc | ||
2529 | * free X | ||
2530 | * mark busy | ||
2531 | * commit xact | ||
2532 | * free xact | ||
2533 | * xact alloc | ||
2534 | * alloc X | ||
2535 | * busy search | ||
2536 | * mark xact sync | ||
2537 | * commit xact | ||
2538 | * free xact | ||
2539 | * force log | ||
2540 | * checkpoint starts | ||
2541 | * .... | ||
2542 | * xact alloc | ||
2543 | * free X | ||
2544 | * mark busy | ||
2545 | * finds match | ||
2546 | * *** KABOOM! *** | ||
2547 | * .... | ||
2548 | * log IO completes | ||
2549 | * unbusy X | ||
2550 | * checkpoint completes | ||
2551 | * | ||
2552 | * By issuing a log force in thread 3 @ "KABOOM", the thread will block until | ||
2553 | * the checkpoint completes, and the busy extent it matched will have been | ||
2554 | * removed from the tree when it is woken. Hence it can then continue safely. | ||
2555 | * | ||
2556 | * However, to ensure this matching process is robust, we need to use the | ||
2557 | * transaction ID for identifying transaction, as delayed logging results in | ||
2558 | * the busy extent and transaction lifecycles being different. i.e. the busy | ||
2559 | * extent is active for a lot longer than the transaction. Hence the | ||
2560 | * transaction structure can be freed and reallocated, then mark the same | ||
2561 | * extent busy again in the new transaction. In this case the new transaction | ||
2562 | * will have a different tid but can have the same address, and hence we need | ||
2563 | * to check against the tid. | ||
2564 | * | ||
2565 | * Future: for delayed logging, we could avoid the log force if the extent was | ||
2566 | * first freed in the current checkpoint sequence. This, however, requires the | ||
2567 | * ability to pin the current checkpoint in memory until this transaction | ||
2568 | * commits to ensure that both the original free and the current one combine | ||
2569 | * logically into the one checkpoint. If the checkpoint sequences are | ||
2570 | * different, however, we still need to wait on a log force. | ||
2571 | */ | ||
2572 | void | 2479 | void |
2573 | xfs_alloc_busy_insert( | 2480 | xfs_alloc_busy_insert( |
2574 | struct xfs_trans *tp, | 2481 | struct xfs_trans *tp, |
@@ -2580,9 +2487,7 @@ xfs_alloc_busy_insert( | |||
2580 | struct xfs_busy_extent *busyp; | 2487 | struct xfs_busy_extent *busyp; |
2581 | struct xfs_perag *pag; | 2488 | struct xfs_perag *pag; |
2582 | struct rb_node **rbp; | 2489 | struct rb_node **rbp; |
2583 | struct rb_node *parent; | 2490 | struct rb_node *parent = NULL; |
2584 | int match; | ||
2585 | |||
2586 | 2491 | ||
2587 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); | 2492 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); |
2588 | if (!new) { | 2493 | if (!new) { |
@@ -2591,7 +2496,7 @@ xfs_alloc_busy_insert( | |||
2591 | * block, make this a synchronous transaction to insure that | 2496 | * block, make this a synchronous transaction to insure that |
2592 | * the block is not reused before this transaction commits. | 2497 | * the block is not reused before this transaction commits. |
2593 | */ | 2498 | */ |
2594 | trace_xfs_alloc_busy(tp, agno, bno, len, 1); | 2499 | trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); |
2595 | xfs_trans_set_sync(tp); | 2500 | xfs_trans_set_sync(tp); |
2596 | return; | 2501 | return; |
2597 | } | 2502 | } |
@@ -2599,66 +2504,28 @@ xfs_alloc_busy_insert( | |||
2599 | new->agno = agno; | 2504 | new->agno = agno; |
2600 | new->bno = bno; | 2505 | new->bno = bno; |
2601 | new->length = len; | 2506 | new->length = len; |
2602 | new->tid = xfs_log_get_trans_ident(tp); | ||
2603 | |||
2604 | INIT_LIST_HEAD(&new->list); | 2507 | INIT_LIST_HEAD(&new->list); |
2605 | 2508 | ||
2606 | /* trace before insert to be able to see failed inserts */ | 2509 | /* trace before insert to be able to see failed inserts */ |
2607 | trace_xfs_alloc_busy(tp, agno, bno, len, 0); | 2510 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); |
2608 | 2511 | ||
2609 | pag = xfs_perag_get(tp->t_mountp, new->agno); | 2512 | pag = xfs_perag_get(tp->t_mountp, new->agno); |
2610 | restart: | ||
2611 | spin_lock(&pag->pagb_lock); | 2513 | spin_lock(&pag->pagb_lock); |
2612 | rbp = &pag->pagb_tree.rb_node; | 2514 | rbp = &pag->pagb_tree.rb_node; |
2613 | parent = NULL; | 2515 | while (*rbp) { |
2614 | busyp = NULL; | ||
2615 | match = 0; | ||
2616 | while (*rbp && match >= 0) { | ||
2617 | parent = *rbp; | 2516 | parent = *rbp; |
2618 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | 2517 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); |
2619 | 2518 | ||
2620 | if (new->bno < busyp->bno) { | 2519 | if (new->bno < busyp->bno) { |
2621 | /* may overlap, but exact start block is lower */ | ||
2622 | rbp = &(*rbp)->rb_left; | 2520 | rbp = &(*rbp)->rb_left; |
2623 | if (new->bno + new->length > busyp->bno) | 2521 | ASSERT(new->bno + new->length <= busyp->bno); |
2624 | match = busyp->tid == new->tid ? 1 : -1; | ||
2625 | } else if (new->bno > busyp->bno) { | 2522 | } else if (new->bno > busyp->bno) { |
2626 | /* may overlap, but exact start block is higher */ | ||
2627 | rbp = &(*rbp)->rb_right; | 2523 | rbp = &(*rbp)->rb_right; |
2628 | if (bno < busyp->bno + busyp->length) | 2524 | ASSERT(bno >= busyp->bno + busyp->length); |
2629 | match = busyp->tid == new->tid ? 1 : -1; | ||
2630 | } else { | 2525 | } else { |
2631 | match = busyp->tid == new->tid ? 1 : -1; | 2526 | ASSERT(0); |
2632 | break; | ||
2633 | } | 2527 | } |
2634 | } | 2528 | } |
2635 | if (match < 0) { | ||
2636 | /* overlap marked busy in different transaction */ | ||
2637 | spin_unlock(&pag->pagb_lock); | ||
2638 | xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); | ||
2639 | goto restart; | ||
2640 | } | ||
2641 | if (match > 0) { | ||
2642 | /* | ||
2643 | * overlap marked busy in same transaction. Update if exact | ||
2644 | * start block match, otherwise combine the busy extents into | ||
2645 | * a single range. | ||
2646 | */ | ||
2647 | if (busyp->bno == new->bno) { | ||
2648 | busyp->length = max(busyp->length, new->length); | ||
2649 | spin_unlock(&pag->pagb_lock); | ||
2650 | ASSERT(tp->t_flags & XFS_TRANS_SYNC); | ||
2651 | xfs_perag_put(pag); | ||
2652 | kmem_free(new); | ||
2653 | return; | ||
2654 | } | ||
2655 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2656 | new->length = max(busyp->bno + busyp->length, | ||
2657 | new->bno + new->length) - | ||
2658 | min(busyp->bno, new->bno); | ||
2659 | new->bno = min(busyp->bno, new->bno); | ||
2660 | } else | ||
2661 | busyp = NULL; | ||
2662 | 2529 | ||
2663 | rb_link_node(&new->rb_node, parent, rbp); | 2530 | rb_link_node(&new->rb_node, parent, rbp); |
2664 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | 2531 | rb_insert_color(&new->rb_node, &pag->pagb_tree); |
@@ -2666,7 +2533,6 @@ restart: | |||
2666 | list_add(&new->list, &tp->t_busy); | 2533 | list_add(&new->list, &tp->t_busy); |
2667 | spin_unlock(&pag->pagb_lock); | 2534 | spin_unlock(&pag->pagb_lock); |
2668 | xfs_perag_put(pag); | 2535 | xfs_perag_put(pag); |
2669 | kmem_free(busyp); | ||
2670 | } | 2536 | } |
2671 | 2537 | ||
2672 | /* | 2538 | /* |
@@ -2715,12 +2581,196 @@ xfs_alloc_busy_search( | |||
2715 | } | 2581 | } |
2716 | } | 2582 | } |
2717 | spin_unlock(&pag->pagb_lock); | 2583 | spin_unlock(&pag->pagb_lock); |
2718 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); | ||
2719 | xfs_perag_put(pag); | 2584 | xfs_perag_put(pag); |
2720 | return match; | 2585 | return match; |
2721 | } | 2586 | } |
2722 | 2587 | ||
2723 | /* | 2588 | /* |
2589 | * The found free extent [fbno, fend] overlaps part or all of the given busy | ||
2590 | * extent. If the overlap covers the beginning, the end, or all of the busy | ||
2591 | * extent, the overlapping portion can be made unbusy and used for the | ||
2592 | * allocation. We can't split a busy extent because we can't modify a | ||
2593 | * transaction/CIL context busy list, but we can update an entries block | ||
2594 | * number or length. | ||
2595 | * | ||
2596 | * Returns true if the extent can safely be reused, or false if the search | ||
2597 | * needs to be restarted. | ||
2598 | */ | ||
2599 | STATIC bool | ||
2600 | xfs_alloc_busy_update_extent( | ||
2601 | struct xfs_mount *mp, | ||
2602 | struct xfs_perag *pag, | ||
2603 | struct xfs_busy_extent *busyp, | ||
2604 | xfs_agblock_t fbno, | ||
2605 | xfs_extlen_t flen, | ||
2606 | bool userdata) | ||
2607 | { | ||
2608 | xfs_agblock_t fend = fbno + flen; | ||
2609 | xfs_agblock_t bbno = busyp->bno; | ||
2610 | xfs_agblock_t bend = bbno + busyp->length; | ||
2611 | |||
2612 | /* | ||
2613 | * If there is a busy extent overlapping a user allocation, we have | ||
2614 | * no choice but to force the log and retry the search. | ||
2615 | * | ||
2616 | * Fortunately this does not happen during normal operation, but | ||
2617 | * only if the filesystem is very low on space and has to dip into | ||
2618 | * the AGFL for normal allocations. | ||
2619 | */ | ||
2620 | if (userdata) | ||
2621 | goto out_force_log; | ||
2622 | |||
2623 | if (bbno < fbno && bend > fend) { | ||
2624 | /* | ||
2625 | * Case 1: | ||
2626 | * bbno bend | ||
2627 | * +BBBBBBBBBBBBBBBBB+ | ||
2628 | * +---------+ | ||
2629 | * fbno fend | ||
2630 | */ | ||
2631 | |||
2632 | /* | ||
2633 | * We would have to split the busy extent to be able to track | ||
2634 | * it correct, which we cannot do because we would have to | ||
2635 | * modify the list of busy extents attached to the transaction | ||
2636 | * or CIL context, which is immutable. | ||
2637 | * | ||
2638 | * Force out the log to clear the busy extent and retry the | ||
2639 | * search. | ||
2640 | */ | ||
2641 | goto out_force_log; | ||
2642 | } else if (bbno >= fbno && bend <= fend) { | ||
2643 | /* | ||
2644 | * Case 2: | ||
2645 | * bbno bend | ||
2646 | * +BBBBBBBBBBBBBBBBB+ | ||
2647 | * +-----------------+ | ||
2648 | * fbno fend | ||
2649 | * | ||
2650 | * Case 3: | ||
2651 | * bbno bend | ||
2652 | * +BBBBBBBBBBBBBBBBB+ | ||
2653 | * +--------------------------+ | ||
2654 | * fbno fend | ||
2655 | * | ||
2656 | * Case 4: | ||
2657 | * bbno bend | ||
2658 | * +BBBBBBBBBBBBBBBBB+ | ||
2659 | * +--------------------------+ | ||
2660 | * fbno fend | ||
2661 | * | ||
2662 | * Case 5: | ||
2663 | * bbno bend | ||
2664 | * +BBBBBBBBBBBBBBBBB+ | ||
2665 | * +-----------------------------------+ | ||
2666 | * fbno fend | ||
2667 | * | ||
2668 | */ | ||
2669 | |||
2670 | /* | ||
2671 | * The busy extent is fully covered by the extent we are | ||
2672 | * allocating, and can simply be removed from the rbtree. | ||
2673 | * However we cannot remove it from the immutable list | ||
2674 | * tracking busy extents in the transaction or CIL context, | ||
2675 | * so set the length to zero to mark it invalid. | ||
2676 | * | ||
2677 | * We also need to restart the busy extent search from the | ||
2678 | * tree root, because erasing the node can rearrange the | ||
2679 | * tree topology. | ||
2680 | */ | ||
2681 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2682 | busyp->length = 0; | ||
2683 | return false; | ||
2684 | } else if (fend < bend) { | ||
2685 | /* | ||
2686 | * Case 6: | ||
2687 | * bbno bend | ||
2688 | * +BBBBBBBBBBBBBBBBB+ | ||
2689 | * +---------+ | ||
2690 | * fbno fend | ||
2691 | * | ||
2692 | * Case 7: | ||
2693 | * bbno bend | ||
2694 | * +BBBBBBBBBBBBBBBBB+ | ||
2695 | * +------------------+ | ||
2696 | * fbno fend | ||
2697 | * | ||
2698 | */ | ||
2699 | busyp->bno = fend; | ||
2700 | } else if (bbno < fbno) { | ||
2701 | /* | ||
2702 | * Case 8: | ||
2703 | * bbno bend | ||
2704 | * +BBBBBBBBBBBBBBBBB+ | ||
2705 | * +-------------+ | ||
2706 | * fbno fend | ||
2707 | * | ||
2708 | * Case 9: | ||
2709 | * bbno bend | ||
2710 | * +BBBBBBBBBBBBBBBBB+ | ||
2711 | * +----------------------+ | ||
2712 | * fbno fend | ||
2713 | */ | ||
2714 | busyp->length = fbno - busyp->bno; | ||
2715 | } else { | ||
2716 | ASSERT(0); | ||
2717 | } | ||
2718 | |||
2719 | trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); | ||
2720 | return true; | ||
2721 | |||
2722 | out_force_log: | ||
2723 | spin_unlock(&pag->pagb_lock); | ||
2724 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
2725 | trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); | ||
2726 | spin_lock(&pag->pagb_lock); | ||
2727 | return false; | ||
2728 | } | ||
2729 | |||
2730 | |||
2731 | /* | ||
2732 | * For a given extent [fbno, flen], make sure we can reuse it safely. | ||
2733 | */ | ||
2734 | void | ||
2735 | xfs_alloc_busy_reuse( | ||
2736 | struct xfs_mount *mp, | ||
2737 | xfs_agnumber_t agno, | ||
2738 | xfs_agblock_t fbno, | ||
2739 | xfs_extlen_t flen, | ||
2740 | bool userdata) | ||
2741 | { | ||
2742 | struct xfs_perag *pag; | ||
2743 | struct rb_node *rbp; | ||
2744 | |||
2745 | ASSERT(flen > 0); | ||
2746 | |||
2747 | pag = xfs_perag_get(mp, agno); | ||
2748 | spin_lock(&pag->pagb_lock); | ||
2749 | restart: | ||
2750 | rbp = pag->pagb_tree.rb_node; | ||
2751 | while (rbp) { | ||
2752 | struct xfs_busy_extent *busyp = | ||
2753 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2754 | xfs_agblock_t bbno = busyp->bno; | ||
2755 | xfs_agblock_t bend = bbno + busyp->length; | ||
2756 | |||
2757 | if (fbno + flen <= bbno) { | ||
2758 | rbp = rbp->rb_left; | ||
2759 | continue; | ||
2760 | } else if (fbno >= bend) { | ||
2761 | rbp = rbp->rb_right; | ||
2762 | continue; | ||
2763 | } | ||
2764 | |||
2765 | if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, | ||
2766 | userdata)) | ||
2767 | goto restart; | ||
2768 | } | ||
2769 | spin_unlock(&pag->pagb_lock); | ||
2770 | xfs_perag_put(pag); | ||
2771 | } | ||
2772 | |||
2773 | /* | ||
2724 | * For a given extent [fbno, flen], search the busy extent list to find a | 2774 | * For a given extent [fbno, flen], search the busy extent list to find a |
2725 | * subset of the extent that is not busy. If *rlen is smaller than | 2775 | * subset of the extent that is not busy. If *rlen is smaller than |
2726 | * args->minlen no suitable extent could be found, and the higher level | 2776 | * args->minlen no suitable extent could be found, and the higher level |
@@ -2734,13 +2784,16 @@ xfs_alloc_busy_trim( | |||
2734 | xfs_agblock_t *rbno, | 2784 | xfs_agblock_t *rbno, |
2735 | xfs_extlen_t *rlen) | 2785 | xfs_extlen_t *rlen) |
2736 | { | 2786 | { |
2737 | xfs_agblock_t fbno = bno; | 2787 | xfs_agblock_t fbno; |
2738 | xfs_extlen_t flen = len; | 2788 | xfs_extlen_t flen; |
2739 | struct rb_node *rbp; | 2789 | struct rb_node *rbp; |
2740 | 2790 | ||
2741 | ASSERT(flen > 0); | 2791 | ASSERT(len > 0); |
2742 | 2792 | ||
2743 | spin_lock(&args->pag->pagb_lock); | 2793 | spin_lock(&args->pag->pagb_lock); |
2794 | restart: | ||
2795 | fbno = bno; | ||
2796 | flen = len; | ||
2744 | rbp = args->pag->pagb_tree.rb_node; | 2797 | rbp = args->pag->pagb_tree.rb_node; |
2745 | while (rbp && flen >= args->minlen) { | 2798 | while (rbp && flen >= args->minlen) { |
2746 | struct xfs_busy_extent *busyp = | 2799 | struct xfs_busy_extent *busyp = |
@@ -2757,6 +2810,18 @@ xfs_alloc_busy_trim( | |||
2757 | continue; | 2810 | continue; |
2758 | } | 2811 | } |
2759 | 2812 | ||
2813 | /* | ||
2814 | * If this is a metadata allocation, try to reuse the busy | ||
2815 | * extent instead of trimming the allocation. | ||
2816 | */ | ||
2817 | if (!args->userdata) { | ||
2818 | if (!xfs_alloc_busy_update_extent(args->mp, args->pag, | ||
2819 | busyp, fbno, flen, | ||
2820 | false)) | ||
2821 | goto restart; | ||
2822 | continue; | ||
2823 | } | ||
2824 | |||
2760 | if (bbno <= fbno) { | 2825 | if (bbno <= fbno) { |
2761 | /* start overlap */ | 2826 | /* start overlap */ |
2762 | 2827 | ||
@@ -2906,17 +2971,15 @@ xfs_alloc_busy_clear( | |||
2906 | { | 2971 | { |
2907 | struct xfs_perag *pag; | 2972 | struct xfs_perag *pag; |
2908 | 2973 | ||
2909 | trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, | ||
2910 | busyp->length); | ||
2911 | |||
2912 | ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, | ||
2913 | busyp->length) == 1); | ||
2914 | |||
2915 | list_del_init(&busyp->list); | 2974 | list_del_init(&busyp->list); |
2916 | 2975 | ||
2917 | pag = xfs_perag_get(mp, busyp->agno); | 2976 | pag = xfs_perag_get(mp, busyp->agno); |
2918 | spin_lock(&pag->pagb_lock); | 2977 | spin_lock(&pag->pagb_lock); |
2919 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | 2978 | if (busyp->length) { |
2979 | trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno, | ||
2980 | busyp->length); | ||
2981 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2982 | } | ||
2920 | spin_unlock(&pag->pagb_lock); | 2983 | spin_unlock(&pag->pagb_lock); |
2921 | xfs_perag_put(pag); | 2984 | xfs_perag_put(pag); |
2922 | 2985 | ||
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index d0b3bc72005b..de5e27c95170 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -145,6 +145,10 @@ xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); | |||
145 | int | 145 | int |
146 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | 146 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, |
147 | xfs_agblock_t bno, xfs_extlen_t len); | 147 | xfs_agblock_t bno, xfs_extlen_t len); |
148 | |||
149 | void | ||
150 | xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
151 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | ||
148 | #endif /* __KERNEL__ */ | 152 | #endif /* __KERNEL__ */ |
149 | 153 | ||
150 | /* | 154 | /* |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index bcfe92f47edd..8b469d53599f 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -94,8 +94,8 @@ xfs_allocbt_alloc_block( | |||
94 | *stat = 0; | 94 | *stat = 0; |
95 | return 0; | 95 | return 0; |
96 | } | 96 | } |
97 | if (xfs_alloc_busy_search(cur->bc_mp, cur->bc_private.a.agno, bno, 1)) | 97 | |
98 | xfs_trans_set_sync(cur->bc_tp); | 98 | xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); |
99 | 99 | ||
100 | xfs_trans_agbtree_delta(cur->bc_tp, 1); | 100 | xfs_trans_agbtree_delta(cur->bc_tp, 1); |
101 | new->s = cpu_to_be32(bno); | 101 | new->s = cpu_to_be32(bno); |
@@ -120,17 +120,6 @@ xfs_allocbt_free_block( | |||
120 | if (error) | 120 | if (error) |
121 | return error; | 121 | return error; |
122 | 122 | ||
123 | /* | ||
124 | * Since blocks move to the free list without the coordination used in | ||
125 | * xfs_bmap_finish, we can't allow block to be available for | ||
126 | * reallocation and non-transaction writing (user data) until we know | ||
127 | * that the transaction that moved it to the free list is permanently | ||
128 | * on disk. We track the blocks by declaring these blocks as "busy"; | ||
129 | * the busy list is maintained on a per-ag basis and each transaction | ||
130 | * records which entries should be removed when the iclog commits to | ||
131 | * disk. If a busy block is allocated, the iclog is pushed up to the | ||
132 | * LSN that freed the block. | ||
133 | */ | ||
134 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 123 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); |
135 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 124 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
136 | return 0; | 125 | return 0; |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b612ce4520ae..18fd4beffcc3 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -3248,13 +3248,6 @@ xfs_log_ticket_get( | |||
3248 | return ticket; | 3248 | return ticket; |
3249 | } | 3249 | } |
3250 | 3250 | ||
3251 | xlog_tid_t | ||
3252 | xfs_log_get_trans_ident( | ||
3253 | struct xfs_trans *tp) | ||
3254 | { | ||
3255 | return tp->t_ticket->t_tid; | ||
3256 | } | ||
3257 | |||
3258 | /* | 3251 | /* |
3259 | * Allocate and initialise a new log ticket. | 3252 | * Allocate and initialise a new log ticket. |
3260 | */ | 3253 | */ |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 3bd3291ef8d2..78c9039994af 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *); | |||
189 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); | 189 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
190 | void xfs_log_ticket_put(struct xlog_ticket *ticket); | 190 | void xfs_log_ticket_put(struct xlog_ticket *ticket); |
191 | 191 | ||
192 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); | ||
193 | |||
194 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | 192 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, |
195 | struct xfs_log_vec *log_vector, | 193 | struct xfs_log_vec *log_vector, |
196 | xfs_lsn_t *commit_lsn, int flags); | 194 | xfs_lsn_t *commit_lsn, int flags); |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 5864850e9e34..2d3b6a498d63 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i) | |||
146 | shutdown */ | 146 | shutdown */ |
147 | #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ | 147 | #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ |
148 | 148 | ||
149 | typedef __uint32_t xlog_tid_t; | ||
150 | |||
149 | #ifdef __KERNEL__ | 151 | #ifdef __KERNEL__ |
150 | /* | 152 | /* |
151 | * Below are states for covering allocation transactions. | 153 | * Below are states for covering allocation transactions. |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 26d1867d8156..65584b55607d 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ | |||
73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ | 73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ |
74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ | 74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ |
75 | 75 | ||
76 | typedef __uint32_t xlog_tid_t; /* transaction ID type */ | ||
77 | |||
78 | /* | 76 | /* |
79 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. | 77 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. |
80 | * Disk based types: | 78 | * Disk based types: |