aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_alloc.c')
-rw-r--r--fs/xfs/xfs_alloc.c572
1 files changed, 1 insertions, 571 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 95ee705f146b..ae6df2585895 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -31,6 +31,7 @@
31#include "xfs_inode.h" 31#include "xfs_inode.h"
32#include "xfs_btree.h" 32#include "xfs_btree.h"
33#include "xfs_alloc.h" 33#include "xfs_alloc.h"
34#include "xfs_extent_busy.h"
34#include "xfs_error.h" 35#include "xfs_error.h"
35#include "xfs_trace.h" 36#include "xfs_trace.h"
36 37
@@ -2500,574 +2501,3 @@ error0:
2500 xfs_perag_put(args.pag); 2501 xfs_perag_put(args.pag);
2501 return error; 2502 return error;
2502} 2503}
2503
2504void
2505xfs_alloc_busy_insert(
2506 struct xfs_trans *tp,
2507 xfs_agnumber_t agno,
2508 xfs_agblock_t bno,
2509 xfs_extlen_t len,
2510 unsigned int flags)
2511{
2512 struct xfs_busy_extent *new;
2513 struct xfs_busy_extent *busyp;
2514 struct xfs_perag *pag;
2515 struct rb_node **rbp;
2516 struct rb_node *parent = NULL;
2517
2518 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
2519 if (!new) {
2520 /*
2521 * No Memory! Since it is now not possible to track the free
2522 * block, make this a synchronous transaction to insure that
2523 * the block is not reused before this transaction commits.
2524 */
2525 trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len);
2526 xfs_trans_set_sync(tp);
2527 return;
2528 }
2529
2530 new->agno = agno;
2531 new->bno = bno;
2532 new->length = len;
2533 INIT_LIST_HEAD(&new->list);
2534 new->flags = flags;
2535
2536 /* trace before insert to be able to see failed inserts */
2537 trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len);
2538
2539 pag = xfs_perag_get(tp->t_mountp, new->agno);
2540 spin_lock(&pag->pagb_lock);
2541 rbp = &pag->pagb_tree.rb_node;
2542 while (*rbp) {
2543 parent = *rbp;
2544 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
2545
2546 if (new->bno < busyp->bno) {
2547 rbp = &(*rbp)->rb_left;
2548 ASSERT(new->bno + new->length <= busyp->bno);
2549 } else if (new->bno > busyp->bno) {
2550 rbp = &(*rbp)->rb_right;
2551 ASSERT(bno >= busyp->bno + busyp->length);
2552 } else {
2553 ASSERT(0);
2554 }
2555 }
2556
2557 rb_link_node(&new->rb_node, parent, rbp);
2558 rb_insert_color(&new->rb_node, &pag->pagb_tree);
2559
2560 list_add(&new->list, &tp->t_busy);
2561 spin_unlock(&pag->pagb_lock);
2562 xfs_perag_put(pag);
2563}
2564
2565/*
2566 * Search for a busy extent within the range of the extent we are about to
2567 * allocate. You need to be holding the busy extent tree lock when calling
2568 * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy
2569 * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
2570 * match. This is done so that a non-zero return indicates an overlap that
2571 * will require a synchronous transaction, but it can still be
2572 * used to distinguish between a partial or exact match.
2573 */
2574int
2575xfs_alloc_busy_search(
2576 struct xfs_mount *mp,
2577 xfs_agnumber_t agno,
2578 xfs_agblock_t bno,
2579 xfs_extlen_t len)
2580{
2581 struct xfs_perag *pag;
2582 struct rb_node *rbp;
2583 struct xfs_busy_extent *busyp;
2584 int match = 0;
2585
2586 pag = xfs_perag_get(mp, agno);
2587 spin_lock(&pag->pagb_lock);
2588
2589 rbp = pag->pagb_tree.rb_node;
2590
2591 /* find closest start bno overlap */
2592 while (rbp) {
2593 busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node);
2594 if (bno < busyp->bno) {
2595 /* may overlap, but exact start block is lower */
2596 if (bno + len > busyp->bno)
2597 match = -1;
2598 rbp = rbp->rb_left;
2599 } else if (bno > busyp->bno) {
2600 /* may overlap, but exact start block is higher */
2601 if (bno < busyp->bno + busyp->length)
2602 match = -1;
2603 rbp = rbp->rb_right;
2604 } else {
2605 /* bno matches busyp, length determines exact match */
2606 match = (busyp->length == len) ? 1 : -1;
2607 break;
2608 }
2609 }
2610 spin_unlock(&pag->pagb_lock);
2611 xfs_perag_put(pag);
2612 return match;
2613}
2614
2615/*
2616 * The found free extent [fbno, fend] overlaps part or all of the given busy
2617 * extent. If the overlap covers the beginning, the end, or all of the busy
2618 * extent, the overlapping portion can be made unbusy and used for the
2619 * allocation. We can't split a busy extent because we can't modify a
2620 * transaction/CIL context busy list, but we can update an entries block
2621 * number or length.
2622 *
2623 * Returns true if the extent can safely be reused, or false if the search
2624 * needs to be restarted.
2625 */
2626STATIC bool
2627xfs_alloc_busy_update_extent(
2628 struct xfs_mount *mp,
2629 struct xfs_perag *pag,
2630 struct xfs_busy_extent *busyp,
2631 xfs_agblock_t fbno,
2632 xfs_extlen_t flen,
2633 bool userdata)
2634{
2635 xfs_agblock_t fend = fbno + flen;
2636 xfs_agblock_t bbno = busyp->bno;
2637 xfs_agblock_t bend = bbno + busyp->length;
2638
2639 /*
2640 * This extent is currently being discarded. Give the thread
2641 * performing the discard a chance to mark the extent unbusy
2642 * and retry.
2643 */
2644 if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) {
2645 spin_unlock(&pag->pagb_lock);
2646 delay(1);
2647 spin_lock(&pag->pagb_lock);
2648 return false;
2649 }
2650
2651 /*
2652 * If there is a busy extent overlapping a user allocation, we have
2653 * no choice but to force the log and retry the search.
2654 *
2655 * Fortunately this does not happen during normal operation, but
2656 * only if the filesystem is very low on space and has to dip into
2657 * the AGFL for normal allocations.
2658 */
2659 if (userdata)
2660 goto out_force_log;
2661
2662 if (bbno < fbno && bend > fend) {
2663 /*
2664 * Case 1:
2665 * bbno bend
2666 * +BBBBBBBBBBBBBBBBB+
2667 * +---------+
2668 * fbno fend
2669 */
2670
2671 /*
2672 * We would have to split the busy extent to be able to track
2673 * it correct, which we cannot do because we would have to
2674 * modify the list of busy extents attached to the transaction
2675 * or CIL context, which is immutable.
2676 *
2677 * Force out the log to clear the busy extent and retry the
2678 * search.
2679 */
2680 goto out_force_log;
2681 } else if (bbno >= fbno && bend <= fend) {
2682 /*
2683 * Case 2:
2684 * bbno bend
2685 * +BBBBBBBBBBBBBBBBB+
2686 * +-----------------+
2687 * fbno fend
2688 *
2689 * Case 3:
2690 * bbno bend
2691 * +BBBBBBBBBBBBBBBBB+
2692 * +--------------------------+
2693 * fbno fend
2694 *
2695 * Case 4:
2696 * bbno bend
2697 * +BBBBBBBBBBBBBBBBB+
2698 * +--------------------------+
2699 * fbno fend
2700 *
2701 * Case 5:
2702 * bbno bend
2703 * +BBBBBBBBBBBBBBBBB+
2704 * +-----------------------------------+
2705 * fbno fend
2706 *
2707 */
2708
2709 /*
2710 * The busy extent is fully covered by the extent we are
2711 * allocating, and can simply be removed from the rbtree.
2712 * However we cannot remove it from the immutable list
2713 * tracking busy extents in the transaction or CIL context,
2714 * so set the length to zero to mark it invalid.
2715 *
2716 * We also need to restart the busy extent search from the
2717 * tree root, because erasing the node can rearrange the
2718 * tree topology.
2719 */
2720 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2721 busyp->length = 0;
2722 return false;
2723 } else if (fend < bend) {
2724 /*
2725 * Case 6:
2726 * bbno bend
2727 * +BBBBBBBBBBBBBBBBB+
2728 * +---------+
2729 * fbno fend
2730 *
2731 * Case 7:
2732 * bbno bend
2733 * +BBBBBBBBBBBBBBBBB+
2734 * +------------------+
2735 * fbno fend
2736 *
2737 */
2738 busyp->bno = fend;
2739 } else if (bbno < fbno) {
2740 /*
2741 * Case 8:
2742 * bbno bend
2743 * +BBBBBBBBBBBBBBBBB+
2744 * +-------------+
2745 * fbno fend
2746 *
2747 * Case 9:
2748 * bbno bend
2749 * +BBBBBBBBBBBBBBBBB+
2750 * +----------------------+
2751 * fbno fend
2752 */
2753 busyp->length = fbno - busyp->bno;
2754 } else {
2755 ASSERT(0);
2756 }
2757
2758 trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen);
2759 return true;
2760
2761out_force_log:
2762 spin_unlock(&pag->pagb_lock);
2763 xfs_log_force(mp, XFS_LOG_SYNC);
2764 trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen);
2765 spin_lock(&pag->pagb_lock);
2766 return false;
2767}
2768
2769
2770/*
2771 * For a given extent [fbno, flen], make sure we can reuse it safely.
2772 */
2773void
2774xfs_alloc_busy_reuse(
2775 struct xfs_mount *mp,
2776 xfs_agnumber_t agno,
2777 xfs_agblock_t fbno,
2778 xfs_extlen_t flen,
2779 bool userdata)
2780{
2781 struct xfs_perag *pag;
2782 struct rb_node *rbp;
2783
2784 ASSERT(flen > 0);
2785
2786 pag = xfs_perag_get(mp, agno);
2787 spin_lock(&pag->pagb_lock);
2788restart:
2789 rbp = pag->pagb_tree.rb_node;
2790 while (rbp) {
2791 struct xfs_busy_extent *busyp =
2792 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2793 xfs_agblock_t bbno = busyp->bno;
2794 xfs_agblock_t bend = bbno + busyp->length;
2795
2796 if (fbno + flen <= bbno) {
2797 rbp = rbp->rb_left;
2798 continue;
2799 } else if (fbno >= bend) {
2800 rbp = rbp->rb_right;
2801 continue;
2802 }
2803
2804 if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen,
2805 userdata))
2806 goto restart;
2807 }
2808 spin_unlock(&pag->pagb_lock);
2809 xfs_perag_put(pag);
2810}
2811
2812/*
2813 * For a given extent [fbno, flen], search the busy extent list to find a
2814 * subset of the extent that is not busy. If *rlen is smaller than
2815 * args->minlen no suitable extent could be found, and the higher level
2816 * code needs to force out the log and retry the allocation.
2817 */
2818STATIC void
2819xfs_alloc_busy_trim(
2820 struct xfs_alloc_arg *args,
2821 xfs_agblock_t bno,
2822 xfs_extlen_t len,
2823 xfs_agblock_t *rbno,
2824 xfs_extlen_t *rlen)
2825{
2826 xfs_agblock_t fbno;
2827 xfs_extlen_t flen;
2828 struct rb_node *rbp;
2829
2830 ASSERT(len > 0);
2831
2832 spin_lock(&args->pag->pagb_lock);
2833restart:
2834 fbno = bno;
2835 flen = len;
2836 rbp = args->pag->pagb_tree.rb_node;
2837 while (rbp && flen >= args->minlen) {
2838 struct xfs_busy_extent *busyp =
2839 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2840 xfs_agblock_t fend = fbno + flen;
2841 xfs_agblock_t bbno = busyp->bno;
2842 xfs_agblock_t bend = bbno + busyp->length;
2843
2844 if (fend <= bbno) {
2845 rbp = rbp->rb_left;
2846 continue;
2847 } else if (fbno >= bend) {
2848 rbp = rbp->rb_right;
2849 continue;
2850 }
2851
2852 /*
2853 * If this is a metadata allocation, try to reuse the busy
2854 * extent instead of trimming the allocation.
2855 */
2856 if (!args->userdata &&
2857 !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) {
2858 if (!xfs_alloc_busy_update_extent(args->mp, args->pag,
2859 busyp, fbno, flen,
2860 false))
2861 goto restart;
2862 continue;
2863 }
2864
2865 if (bbno <= fbno) {
2866 /* start overlap */
2867
2868 /*
2869 * Case 1:
2870 * bbno bend
2871 * +BBBBBBBBBBBBBBBBB+
2872 * +---------+
2873 * fbno fend
2874 *
2875 * Case 2:
2876 * bbno bend
2877 * +BBBBBBBBBBBBBBBBB+
2878 * +-------------+
2879 * fbno fend
2880 *
2881 * Case 3:
2882 * bbno bend
2883 * +BBBBBBBBBBBBBBBBB+
2884 * +-------------+
2885 * fbno fend
2886 *
2887 * Case 4:
2888 * bbno bend
2889 * +BBBBBBBBBBBBBBBBB+
2890 * +-----------------+
2891 * fbno fend
2892 *
2893 * No unbusy region in extent, return failure.
2894 */
2895 if (fend <= bend)
2896 goto fail;
2897
2898 /*
2899 * Case 5:
2900 * bbno bend
2901 * +BBBBBBBBBBBBBBBBB+
2902 * +----------------------+
2903 * fbno fend
2904 *
2905 * Case 6:
2906 * bbno bend
2907 * +BBBBBBBBBBBBBBBBB+
2908 * +--------------------------+
2909 * fbno fend
2910 *
2911 * Needs to be trimmed to:
2912 * +-------+
2913 * fbno fend
2914 */
2915 fbno = bend;
2916 } else if (bend >= fend) {
2917 /* end overlap */
2918
2919 /*
2920 * Case 7:
2921 * bbno bend
2922 * +BBBBBBBBBBBBBBBBB+
2923 * +------------------+
2924 * fbno fend
2925 *
2926 * Case 8:
2927 * bbno bend
2928 * +BBBBBBBBBBBBBBBBB+
2929 * +--------------------------+
2930 * fbno fend
2931 *
2932 * Needs to be trimmed to:
2933 * +-------+
2934 * fbno fend
2935 */
2936 fend = bbno;
2937 } else {
2938 /* middle overlap */
2939
2940 /*
2941 * Case 9:
2942 * bbno bend
2943 * +BBBBBBBBBBBBBBBBB+
2944 * +-----------------------------------+
2945 * fbno fend
2946 *
2947 * Can be trimmed to:
2948 * +-------+ OR +-------+
2949 * fbno fend fbno fend
2950 *
2951 * Backward allocation leads to significant
2952 * fragmentation of directories, which degrades
2953 * directory performance, therefore we always want to
2954 * choose the option that produces forward allocation
2955 * patterns.
2956 * Preferring the lower bno extent will make the next
2957 * request use "fend" as the start of the next
2958 * allocation; if the segment is no longer busy at
2959 * that point, we'll get a contiguous allocation, but
2960 * even if it is still busy, we will get a forward
2961 * allocation.
2962 * We try to avoid choosing the segment at "bend",
2963 * because that can lead to the next allocation
2964 * taking the segment at "fbno", which would be a
2965 * backward allocation. We only use the segment at
2966 * "fbno" if it is much larger than the current
2967 * requested size, because in that case there's a
2968 * good chance subsequent allocations will be
2969 * contiguous.
2970 */
2971 if (bbno - fbno >= args->maxlen) {
2972 /* left candidate fits perfect */
2973 fend = bbno;
2974 } else if (fend - bend >= args->maxlen * 4) {
2975 /* right candidate has enough free space */
2976 fbno = bend;
2977 } else if (bbno - fbno >= args->minlen) {
2978 /* left candidate fits minimum requirement */
2979 fend = bbno;
2980 } else {
2981 goto fail;
2982 }
2983 }
2984
2985 flen = fend - fbno;
2986 }
2987 spin_unlock(&args->pag->pagb_lock);
2988
2989 if (fbno != bno || flen != len) {
2990 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
2991 fbno, flen);
2992 }
2993 *rbno = fbno;
2994 *rlen = flen;
2995 return;
2996fail:
2997 /*
2998 * Return a zero extent length as failure indications. All callers
2999 * re-check if the trimmed extent satisfies the minlen requirement.
3000 */
3001 spin_unlock(&args->pag->pagb_lock);
3002 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
3003 *rbno = fbno;
3004 *rlen = 0;
3005}
3006
3007static void
3008xfs_alloc_busy_clear_one(
3009 struct xfs_mount *mp,
3010 struct xfs_perag *pag,
3011 struct xfs_busy_extent *busyp)
3012{
3013 if (busyp->length) {
3014 trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno,
3015 busyp->length);
3016 rb_erase(&busyp->rb_node, &pag->pagb_tree);
3017 }
3018
3019 list_del_init(&busyp->list);
3020 kmem_free(busyp);
3021}
3022
3023/*
3024 * Remove all extents on the passed in list from the busy extents tree.
3025 * If do_discard is set skip extents that need to be discarded, and mark
3026 * these as undergoing a discard operation instead.
3027 */
3028void
3029xfs_alloc_busy_clear(
3030 struct xfs_mount *mp,
3031 struct list_head *list,
3032 bool do_discard)
3033{
3034 struct xfs_busy_extent *busyp, *n;
3035 struct xfs_perag *pag = NULL;
3036 xfs_agnumber_t agno = NULLAGNUMBER;
3037
3038 list_for_each_entry_safe(busyp, n, list, list) {
3039 if (busyp->agno != agno) {
3040 if (pag) {
3041 spin_unlock(&pag->pagb_lock);
3042 xfs_perag_put(pag);
3043 }
3044 pag = xfs_perag_get(mp, busyp->agno);
3045 spin_lock(&pag->pagb_lock);
3046 agno = busyp->agno;
3047 }
3048
3049 if (do_discard && busyp->length &&
3050 !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD))
3051 busyp->flags = XFS_ALLOC_BUSY_DISCARDED;
3052 else
3053 xfs_alloc_busy_clear_one(mp, pag, busyp);
3054 }
3055
3056 if (pag) {
3057 spin_unlock(&pag->pagb_lock);
3058 xfs_perag_put(pag);
3059 }
3060}
3061
3062/*
3063 * Callback for list_sort to sort busy extents by the AG they reside in.
3064 */
3065int
3066xfs_busy_extent_ag_cmp(
3067 void *priv,
3068 struct list_head *a,
3069 struct list_head *b)
3070{
3071 return container_of(a, struct xfs_busy_extent, list)->agno -
3072 container_of(b, struct xfs_busy_extent, list)->agno;
3073}