diff options
Diffstat (limited to 'fs/xfs/xfs_alloc.c')
-rw-r--r-- | fs/xfs/xfs_alloc.c | 1029 |
1 files changed, 699 insertions, 330 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index f3227984a9bf..95862bbff56b 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -41,19 +41,13 @@ | |||
41 | #define XFSA_FIXUP_BNO_OK 1 | 41 | #define XFSA_FIXUP_BNO_OK 1 |
42 | #define XFSA_FIXUP_CNT_OK 2 | 42 | #define XFSA_FIXUP_CNT_OK 2 |
43 | 43 | ||
44 | /* | ||
45 | * Prototypes for per-ag allocation routines | ||
46 | */ | ||
47 | |||
48 | STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); | 44 | STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); |
49 | STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); | 45 | STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); |
50 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); | 46 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); |
51 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, | 47 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, |
52 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); | 48 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); |
53 | 49 | STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, | |
54 | /* | 50 | xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); |
55 | * Internal functions. | ||
56 | */ | ||
57 | 51 | ||
58 | /* | 52 | /* |
59 | * Lookup the record equal to [bno, len] in the btree given by cur. | 53 | * Lookup the record equal to [bno, len] in the btree given by cur. |
@@ -147,27 +141,28 @@ xfs_alloc_get_rec( | |||
147 | */ | 141 | */ |
148 | STATIC void | 142 | STATIC void |
149 | xfs_alloc_compute_aligned( | 143 | xfs_alloc_compute_aligned( |
144 | xfs_alloc_arg_t *args, /* allocation argument structure */ | ||
150 | xfs_agblock_t foundbno, /* starting block in found extent */ | 145 | xfs_agblock_t foundbno, /* starting block in found extent */ |
151 | xfs_extlen_t foundlen, /* length in found extent */ | 146 | xfs_extlen_t foundlen, /* length in found extent */ |
152 | xfs_extlen_t alignment, /* alignment for allocation */ | ||
153 | xfs_extlen_t minlen, /* minimum length for allocation */ | ||
154 | xfs_agblock_t *resbno, /* result block number */ | 147 | xfs_agblock_t *resbno, /* result block number */ |
155 | xfs_extlen_t *reslen) /* result length */ | 148 | xfs_extlen_t *reslen) /* result length */ |
156 | { | 149 | { |
157 | xfs_agblock_t bno; | 150 | xfs_agblock_t bno; |
158 | xfs_extlen_t diff; | ||
159 | xfs_extlen_t len; | 151 | xfs_extlen_t len; |
160 | 152 | ||
161 | if (alignment > 1 && foundlen >= minlen) { | 153 | /* Trim busy sections out of found extent */ |
162 | bno = roundup(foundbno, alignment); | 154 | xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); |
163 | diff = bno - foundbno; | 155 | |
164 | len = diff >= foundlen ? 0 : foundlen - diff; | 156 | if (args->alignment > 1 && len >= args->minlen) { |
157 | xfs_agblock_t aligned_bno = roundup(bno, args->alignment); | ||
158 | xfs_extlen_t diff = aligned_bno - bno; | ||
159 | |||
160 | *resbno = aligned_bno; | ||
161 | *reslen = diff >= len ? 0 : len - diff; | ||
165 | } else { | 162 | } else { |
166 | bno = foundbno; | 163 | *resbno = bno; |
167 | len = foundlen; | 164 | *reslen = len; |
168 | } | 165 | } |
169 | *resbno = bno; | ||
170 | *reslen = len; | ||
171 | } | 166 | } |
172 | 167 | ||
173 | /* | 168 | /* |
@@ -281,7 +276,6 @@ xfs_alloc_fix_minleft( | |||
281 | return 1; | 276 | return 1; |
282 | agf = XFS_BUF_TO_AGF(args->agbp); | 277 | agf = XFS_BUF_TO_AGF(args->agbp); |
283 | diff = be32_to_cpu(agf->agf_freeblks) | 278 | diff = be32_to_cpu(agf->agf_freeblks) |
284 | + be32_to_cpu(agf->agf_flcount) | ||
285 | - args->len - args->minleft; | 279 | - args->len - args->minleft; |
286 | if (diff >= 0) | 280 | if (diff >= 0) |
287 | return 1; | 281 | return 1; |
@@ -464,6 +458,27 @@ xfs_alloc_read_agfl( | |||
464 | return 0; | 458 | return 0; |
465 | } | 459 | } |
466 | 460 | ||
461 | STATIC int | ||
462 | xfs_alloc_update_counters( | ||
463 | struct xfs_trans *tp, | ||
464 | struct xfs_perag *pag, | ||
465 | struct xfs_buf *agbp, | ||
466 | long len) | ||
467 | { | ||
468 | struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); | ||
469 | |||
470 | pag->pagf_freeblks += len; | ||
471 | be32_add_cpu(&agf->agf_freeblks, len); | ||
472 | |||
473 | xfs_trans_agblocks_delta(tp, len); | ||
474 | if (unlikely(be32_to_cpu(agf->agf_freeblks) > | ||
475 | be32_to_cpu(agf->agf_length))) | ||
476 | return EFSCORRUPTED; | ||
477 | |||
478 | xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); | ||
479 | return 0; | ||
480 | } | ||
481 | |||
467 | /* | 482 | /* |
468 | * Allocation group level functions. | 483 | * Allocation group level functions. |
469 | */ | 484 | */ |
@@ -505,49 +520,36 @@ xfs_alloc_ag_vextent( | |||
505 | ASSERT(0); | 520 | ASSERT(0); |
506 | /* NOTREACHED */ | 521 | /* NOTREACHED */ |
507 | } | 522 | } |
508 | if (error) | 523 | |
524 | if (error || args->agbno == NULLAGBLOCK) | ||
509 | return error; | 525 | return error; |
510 | /* | ||
511 | * If the allocation worked, need to change the agf structure | ||
512 | * (and log it), and the superblock. | ||
513 | */ | ||
514 | if (args->agbno != NULLAGBLOCK) { | ||
515 | xfs_agf_t *agf; /* allocation group freelist header */ | ||
516 | long slen = (long)args->len; | ||
517 | 526 | ||
518 | ASSERT(args->len >= args->minlen && args->len <= args->maxlen); | 527 | ASSERT(args->len >= args->minlen); |
519 | ASSERT(!(args->wasfromfl) || !args->isfl); | 528 | ASSERT(args->len <= args->maxlen); |
520 | ASSERT(args->agbno % args->alignment == 0); | 529 | ASSERT(!args->wasfromfl || !args->isfl); |
521 | if (!(args->wasfromfl)) { | 530 | ASSERT(args->agbno % args->alignment == 0); |
522 | 531 | ||
523 | agf = XFS_BUF_TO_AGF(args->agbp); | 532 | if (!args->wasfromfl) { |
524 | be32_add_cpu(&agf->agf_freeblks, -(args->len)); | 533 | error = xfs_alloc_update_counters(args->tp, args->pag, |
525 | xfs_trans_agblocks_delta(args->tp, | 534 | args->agbp, |
526 | -((long)(args->len))); | 535 | -((long)(args->len))); |
527 | args->pag->pagf_freeblks -= args->len; | 536 | if (error) |
528 | ASSERT(be32_to_cpu(agf->agf_freeblks) <= | 537 | return error; |
529 | be32_to_cpu(agf->agf_length)); | 538 | |
530 | xfs_alloc_log_agf(args->tp, args->agbp, | 539 | ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, |
531 | XFS_AGF_FREEBLKS); | 540 | args->agbno, args->len)); |
532 | /* | ||
533 | * Search the busylist for these blocks and mark the | ||
534 | * transaction as synchronous if blocks are found. This | ||
535 | * avoids the need to block due to a synchronous log | ||
536 | * force to ensure correct ordering as the synchronous | ||
537 | * transaction will guarantee that for us. | ||
538 | */ | ||
539 | if (xfs_alloc_busy_search(args->mp, args->agno, | ||
540 | args->agbno, args->len)) | ||
541 | xfs_trans_set_sync(args->tp); | ||
542 | } | ||
543 | if (!args->isfl) | ||
544 | xfs_trans_mod_sb(args->tp, | ||
545 | args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : | ||
546 | XFS_TRANS_SB_FDBLOCKS, -slen); | ||
547 | XFS_STATS_INC(xs_allocx); | ||
548 | XFS_STATS_ADD(xs_allocb, args->len); | ||
549 | } | 541 | } |
550 | return 0; | 542 | |
543 | if (!args->isfl) { | ||
544 | xfs_trans_mod_sb(args->tp, args->wasdel ? | ||
545 | XFS_TRANS_SB_RES_FDBLOCKS : | ||
546 | XFS_TRANS_SB_FDBLOCKS, | ||
547 | -((long)(args->len))); | ||
548 | } | ||
549 | |||
550 | XFS_STATS_INC(xs_allocx); | ||
551 | XFS_STATS_ADD(xs_allocb, args->len); | ||
552 | return error; | ||
551 | } | 553 | } |
552 | 554 | ||
553 | /* | 555 | /* |
@@ -562,14 +564,14 @@ xfs_alloc_ag_vextent_exact( | |||
562 | { | 564 | { |
563 | xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ | 565 | xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ |
564 | xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ | 566 | xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ |
565 | xfs_agblock_t end; /* end of allocated extent */ | ||
566 | int error; | 567 | int error; |
567 | xfs_agblock_t fbno; /* start block of found extent */ | 568 | xfs_agblock_t fbno; /* start block of found extent */ |
568 | xfs_agblock_t fend; /* end block of found extent */ | ||
569 | xfs_extlen_t flen; /* length of found extent */ | 569 | xfs_extlen_t flen; /* length of found extent */ |
570 | xfs_agblock_t tbno; /* start block of trimmed extent */ | ||
571 | xfs_extlen_t tlen; /* length of trimmed extent */ | ||
572 | xfs_agblock_t tend; /* end block of trimmed extent */ | ||
573 | xfs_agblock_t end; /* end of allocated extent */ | ||
570 | int i; /* success/failure of operation */ | 574 | int i; /* success/failure of operation */ |
571 | xfs_agblock_t maxend; /* end of maximal extent */ | ||
572 | xfs_agblock_t minend; /* end of minimal extent */ | ||
573 | xfs_extlen_t rlen; /* length of returned extent */ | 575 | xfs_extlen_t rlen; /* length of returned extent */ |
574 | 576 | ||
575 | ASSERT(args->alignment == 1); | 577 | ASSERT(args->alignment == 1); |
@@ -599,14 +601,22 @@ xfs_alloc_ag_vextent_exact( | |||
599 | goto error0; | 601 | goto error0; |
600 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 602 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
601 | ASSERT(fbno <= args->agbno); | 603 | ASSERT(fbno <= args->agbno); |
602 | minend = args->agbno + args->minlen; | ||
603 | maxend = args->agbno + args->maxlen; | ||
604 | fend = fbno + flen; | ||
605 | 604 | ||
606 | /* | 605 | /* |
607 | * Give up if the freespace isn't long enough for the minimum request. | 606 | * Check for overlapping busy extents. |
608 | */ | 607 | */ |
609 | if (fend < minend) | 608 | xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); |
609 | |||
610 | /* | ||
611 | * Give up if the start of the extent is busy, or the freespace isn't | ||
612 | * long enough for the minimum request. | ||
613 | */ | ||
614 | if (tbno > args->agbno) | ||
615 | goto not_found; | ||
616 | if (tlen < args->minlen) | ||
617 | goto not_found; | ||
618 | tend = tbno + tlen; | ||
619 | if (tend < args->agbno + args->minlen) | ||
610 | goto not_found; | 620 | goto not_found; |
611 | 621 | ||
612 | /* | 622 | /* |
@@ -615,14 +625,14 @@ xfs_alloc_ag_vextent_exact( | |||
615 | * | 625 | * |
616 | * Fix the length according to mod and prod if given. | 626 | * Fix the length according to mod and prod if given. |
617 | */ | 627 | */ |
618 | end = XFS_AGBLOCK_MIN(fend, maxend); | 628 | end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); |
619 | args->len = end - args->agbno; | 629 | args->len = end - args->agbno; |
620 | xfs_alloc_fix_len(args); | 630 | xfs_alloc_fix_len(args); |
621 | if (!xfs_alloc_fix_minleft(args)) | 631 | if (!xfs_alloc_fix_minleft(args)) |
622 | goto not_found; | 632 | goto not_found; |
623 | 633 | ||
624 | rlen = args->len; | 634 | rlen = args->len; |
625 | ASSERT(args->agbno + rlen <= fend); | 635 | ASSERT(args->agbno + rlen <= tend); |
626 | end = args->agbno + rlen; | 636 | end = args->agbno + rlen; |
627 | 637 | ||
628 | /* | 638 | /* |
@@ -671,11 +681,11 @@ xfs_alloc_find_best_extent( | |||
671 | struct xfs_btree_cur **scur, /* searching cursor */ | 681 | struct xfs_btree_cur **scur, /* searching cursor */ |
672 | xfs_agblock_t gdiff, /* difference for search comparison */ | 682 | xfs_agblock_t gdiff, /* difference for search comparison */ |
673 | xfs_agblock_t *sbno, /* extent found by search */ | 683 | xfs_agblock_t *sbno, /* extent found by search */ |
674 | xfs_extlen_t *slen, | 684 | xfs_extlen_t *slen, /* extent length */ |
675 | xfs_extlen_t *slena, /* aligned length */ | 685 | xfs_agblock_t *sbnoa, /* aligned extent found by search */ |
686 | xfs_extlen_t *slena, /* aligned extent length */ | ||
676 | int dir) /* 0 = search right, 1 = search left */ | 687 | int dir) /* 0 = search right, 1 = search left */ |
677 | { | 688 | { |
678 | xfs_agblock_t bno; | ||
679 | xfs_agblock_t new; | 689 | xfs_agblock_t new; |
680 | xfs_agblock_t sdiff; | 690 | xfs_agblock_t sdiff; |
681 | int error; | 691 | int error; |
@@ -693,17 +703,16 @@ xfs_alloc_find_best_extent( | |||
693 | if (error) | 703 | if (error) |
694 | goto error0; | 704 | goto error0; |
695 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 705 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
696 | xfs_alloc_compute_aligned(*sbno, *slen, args->alignment, | 706 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); |
697 | args->minlen, &bno, slena); | ||
698 | 707 | ||
699 | /* | 708 | /* |
700 | * The good extent is closer than this one. | 709 | * The good extent is closer than this one. |
701 | */ | 710 | */ |
702 | if (!dir) { | 711 | if (!dir) { |
703 | if (bno >= args->agbno + gdiff) | 712 | if (*sbnoa >= args->agbno + gdiff) |
704 | goto out_use_good; | 713 | goto out_use_good; |
705 | } else { | 714 | } else { |
706 | if (bno <= args->agbno - gdiff) | 715 | if (*sbnoa <= args->agbno - gdiff) |
707 | goto out_use_good; | 716 | goto out_use_good; |
708 | } | 717 | } |
709 | 718 | ||
@@ -715,8 +724,8 @@ xfs_alloc_find_best_extent( | |||
715 | xfs_alloc_fix_len(args); | 724 | xfs_alloc_fix_len(args); |
716 | 725 | ||
717 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 726 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
718 | args->alignment, *sbno, | 727 | args->alignment, *sbnoa, |
719 | *slen, &new); | 728 | *slena, &new); |
720 | 729 | ||
721 | /* | 730 | /* |
722 | * Choose closer size and invalidate other cursor. | 731 | * Choose closer size and invalidate other cursor. |
@@ -766,7 +775,7 @@ xfs_alloc_ag_vextent_near( | |||
766 | xfs_agblock_t gtbnoa; /* aligned ... */ | 775 | xfs_agblock_t gtbnoa; /* aligned ... */ |
767 | xfs_extlen_t gtdiff; /* difference to right side entry */ | 776 | xfs_extlen_t gtdiff; /* difference to right side entry */ |
768 | xfs_extlen_t gtlen; /* length of right side entry */ | 777 | xfs_extlen_t gtlen; /* length of right side entry */ |
769 | xfs_extlen_t gtlena = 0; /* aligned ... */ | 778 | xfs_extlen_t gtlena; /* aligned ... */ |
770 | xfs_agblock_t gtnew; /* useful start bno of right side */ | 779 | xfs_agblock_t gtnew; /* useful start bno of right side */ |
771 | int error; /* error code */ | 780 | int error; /* error code */ |
772 | int i; /* result code, temporary */ | 781 | int i; /* result code, temporary */ |
@@ -775,9 +784,10 @@ xfs_alloc_ag_vextent_near( | |||
775 | xfs_agblock_t ltbnoa; /* aligned ... */ | 784 | xfs_agblock_t ltbnoa; /* aligned ... */ |
776 | xfs_extlen_t ltdiff; /* difference to left side entry */ | 785 | xfs_extlen_t ltdiff; /* difference to left side entry */ |
777 | xfs_extlen_t ltlen; /* length of left side entry */ | 786 | xfs_extlen_t ltlen; /* length of left side entry */ |
778 | xfs_extlen_t ltlena = 0; /* aligned ... */ | 787 | xfs_extlen_t ltlena; /* aligned ... */ |
779 | xfs_agblock_t ltnew; /* useful start bno of left side */ | 788 | xfs_agblock_t ltnew; /* useful start bno of left side */ |
780 | xfs_extlen_t rlen; /* length of returned extent */ | 789 | xfs_extlen_t rlen; /* length of returned extent */ |
790 | int forced = 0; | ||
781 | #if defined(DEBUG) && defined(__KERNEL__) | 791 | #if defined(DEBUG) && defined(__KERNEL__) |
782 | /* | 792 | /* |
783 | * Randomly don't execute the first algorithm. | 793 | * Randomly don't execute the first algorithm. |
@@ -786,13 +796,20 @@ xfs_alloc_ag_vextent_near( | |||
786 | 796 | ||
787 | dofirst = random32() & 1; | 797 | dofirst = random32() & 1; |
788 | #endif | 798 | #endif |
799 | |||
800 | restart: | ||
801 | bno_cur_lt = NULL; | ||
802 | bno_cur_gt = NULL; | ||
803 | ltlen = 0; | ||
804 | gtlena = 0; | ||
805 | ltlena = 0; | ||
806 | |||
789 | /* | 807 | /* |
790 | * Get a cursor for the by-size btree. | 808 | * Get a cursor for the by-size btree. |
791 | */ | 809 | */ |
792 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 810 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
793 | args->agno, XFS_BTNUM_CNT); | 811 | args->agno, XFS_BTNUM_CNT); |
794 | ltlen = 0; | 812 | |
795 | bno_cur_lt = bno_cur_gt = NULL; | ||
796 | /* | 813 | /* |
797 | * See if there are any free extents as big as maxlen. | 814 | * See if there are any free extents as big as maxlen. |
798 | */ | 815 | */ |
@@ -808,11 +825,13 @@ xfs_alloc_ag_vextent_near( | |||
808 | goto error0; | 825 | goto error0; |
809 | if (i == 0 || ltlen == 0) { | 826 | if (i == 0 || ltlen == 0) { |
810 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 827 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
828 | trace_xfs_alloc_near_noentry(args); | ||
811 | return 0; | 829 | return 0; |
812 | } | 830 | } |
813 | ASSERT(i == 1); | 831 | ASSERT(i == 1); |
814 | } | 832 | } |
815 | args->wasfromfl = 0; | 833 | args->wasfromfl = 0; |
834 | |||
816 | /* | 835 | /* |
817 | * First algorithm. | 836 | * First algorithm. |
818 | * If the requested extent is large wrt the freespaces available | 837 | * If the requested extent is large wrt the freespaces available |
@@ -866,8 +885,8 @@ xfs_alloc_ag_vextent_near( | |||
866 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 885 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
867 | goto error0; | 886 | goto error0; |
868 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 887 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
869 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, | 888 | xfs_alloc_compute_aligned(args, ltbno, ltlen, |
870 | args->minlen, <bnoa, <lena); | 889 | <bnoa, <lena); |
871 | if (ltlena < args->minlen) | 890 | if (ltlena < args->minlen) |
872 | continue; | 891 | continue; |
873 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 892 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
@@ -876,7 +895,7 @@ xfs_alloc_ag_vextent_near( | |||
876 | if (args->len < blen) | 895 | if (args->len < blen) |
877 | continue; | 896 | continue; |
878 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 897 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
879 | args->alignment, ltbno, ltlen, <new); | 898 | args->alignment, ltbnoa, ltlena, <new); |
880 | if (ltnew != NULLAGBLOCK && | 899 | if (ltnew != NULLAGBLOCK && |
881 | (args->len > blen || ltdiff < bdiff)) { | 900 | (args->len > blen || ltdiff < bdiff)) { |
882 | bdiff = ltdiff; | 901 | bdiff = ltdiff; |
@@ -987,8 +1006,8 @@ xfs_alloc_ag_vextent_near( | |||
987 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | 1006 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) |
988 | goto error0; | 1007 | goto error0; |
989 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1008 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
990 | xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment, | 1009 | xfs_alloc_compute_aligned(args, ltbno, ltlen, |
991 | args->minlen, <bnoa, <lena); | 1010 | <bnoa, <lena); |
992 | if (ltlena >= args->minlen) | 1011 | if (ltlena >= args->minlen) |
993 | break; | 1012 | break; |
994 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) | 1013 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) |
@@ -1003,8 +1022,8 @@ xfs_alloc_ag_vextent_near( | |||
1003 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | 1022 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) |
1004 | goto error0; | 1023 | goto error0; |
1005 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1024 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1006 | xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment, | 1025 | xfs_alloc_compute_aligned(args, gtbno, gtlen, |
1007 | args->minlen, >bnoa, >lena); | 1026 | >bnoa, >lena); |
1008 | if (gtlena >= args->minlen) | 1027 | if (gtlena >= args->minlen) |
1009 | break; | 1028 | break; |
1010 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) | 1029 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) |
@@ -1028,11 +1047,12 @@ xfs_alloc_ag_vextent_near( | |||
1028 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 1047 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
1029 | xfs_alloc_fix_len(args); | 1048 | xfs_alloc_fix_len(args); |
1030 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1049 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
1031 | args->alignment, ltbno, ltlen, <new); | 1050 | args->alignment, ltbnoa, ltlena, <new); |
1032 | 1051 | ||
1033 | error = xfs_alloc_find_best_extent(args, | 1052 | error = xfs_alloc_find_best_extent(args, |
1034 | &bno_cur_lt, &bno_cur_gt, | 1053 | &bno_cur_lt, &bno_cur_gt, |
1035 | ltdiff, >bno, >len, >lena, | 1054 | ltdiff, >bno, >len, |
1055 | >bnoa, >lena, | ||
1036 | 0 /* search right */); | 1056 | 0 /* search right */); |
1037 | } else { | 1057 | } else { |
1038 | ASSERT(gtlena >= args->minlen); | 1058 | ASSERT(gtlena >= args->minlen); |
@@ -1043,11 +1063,12 @@ xfs_alloc_ag_vextent_near( | |||
1043 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | 1063 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); |
1044 | xfs_alloc_fix_len(args); | 1064 | xfs_alloc_fix_len(args); |
1045 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1065 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
1046 | args->alignment, gtbno, gtlen, >new); | 1066 | args->alignment, gtbnoa, gtlena, >new); |
1047 | 1067 | ||
1048 | error = xfs_alloc_find_best_extent(args, | 1068 | error = xfs_alloc_find_best_extent(args, |
1049 | &bno_cur_gt, &bno_cur_lt, | 1069 | &bno_cur_gt, &bno_cur_lt, |
1050 | gtdiff, <bno, <len, <lena, | 1070 | gtdiff, <bno, <len, |
1071 | <bnoa, <lena, | ||
1051 | 1 /* search left */); | 1072 | 1 /* search left */); |
1052 | } | 1073 | } |
1053 | 1074 | ||
@@ -1059,6 +1080,12 @@ xfs_alloc_ag_vextent_near( | |||
1059 | * If we couldn't get anything, give up. | 1080 | * If we couldn't get anything, give up. |
1060 | */ | 1081 | */ |
1061 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { | 1082 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { |
1083 | if (!forced++) { | ||
1084 | trace_xfs_alloc_near_busy(args); | ||
1085 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
1086 | goto restart; | ||
1087 | } | ||
1088 | |||
1062 | trace_xfs_alloc_size_neither(args); | 1089 | trace_xfs_alloc_size_neither(args); |
1063 | args->agbno = NULLAGBLOCK; | 1090 | args->agbno = NULLAGBLOCK; |
1064 | return 0; | 1091 | return 0; |
@@ -1093,12 +1120,13 @@ xfs_alloc_ag_vextent_near( | |||
1093 | return 0; | 1120 | return 0; |
1094 | } | 1121 | } |
1095 | rlen = args->len; | 1122 | rlen = args->len; |
1096 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, | 1123 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, |
1097 | ltlen, <new); | 1124 | ltbnoa, ltlena, <new); |
1098 | ASSERT(ltnew >= ltbno); | 1125 | ASSERT(ltnew >= ltbno); |
1099 | ASSERT(ltnew + rlen <= ltbno + ltlen); | 1126 | ASSERT(ltnew + rlen <= ltbnoa + ltlena); |
1100 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 1127 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
1101 | args->agbno = ltnew; | 1128 | args->agbno = ltnew; |
1129 | |||
1102 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, | 1130 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, |
1103 | ltnew, rlen, XFSA_FIXUP_BNO_OK))) | 1131 | ltnew, rlen, XFSA_FIXUP_BNO_OK))) |
1104 | goto error0; | 1132 | goto error0; |
@@ -1141,26 +1169,35 @@ xfs_alloc_ag_vextent_size( | |||
1141 | int i; /* temp status variable */ | 1169 | int i; /* temp status variable */ |
1142 | xfs_agblock_t rbno; /* returned block number */ | 1170 | xfs_agblock_t rbno; /* returned block number */ |
1143 | xfs_extlen_t rlen; /* length of returned extent */ | 1171 | xfs_extlen_t rlen; /* length of returned extent */ |
1172 | int forced = 0; | ||
1144 | 1173 | ||
1174 | restart: | ||
1145 | /* | 1175 | /* |
1146 | * Allocate and initialize a cursor for the by-size btree. | 1176 | * Allocate and initialize a cursor for the by-size btree. |
1147 | */ | 1177 | */ |
1148 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 1178 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
1149 | args->agno, XFS_BTNUM_CNT); | 1179 | args->agno, XFS_BTNUM_CNT); |
1150 | bno_cur = NULL; | 1180 | bno_cur = NULL; |
1181 | |||
1151 | /* | 1182 | /* |
1152 | * Look for an entry >= maxlen+alignment-1 blocks. | 1183 | * Look for an entry >= maxlen+alignment-1 blocks. |
1153 | */ | 1184 | */ |
1154 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, | 1185 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, |
1155 | args->maxlen + args->alignment - 1, &i))) | 1186 | args->maxlen + args->alignment - 1, &i))) |
1156 | goto error0; | 1187 | goto error0; |
1188 | |||
1157 | /* | 1189 | /* |
1158 | * If none, then pick up the last entry in the tree unless the | 1190 | * If none or we have busy extents that we cannot allocate from, then |
1159 | * tree is empty. | 1191 | * we have to settle for a smaller extent. In the case that there are |
1192 | * no large extents, this will return the last entry in the tree unless | ||
1193 | * the tree is empty. In the case that there are only busy large | ||
1194 | * extents, this will return the largest small extent unless there | ||
1195 | * are no smaller extents available. | ||
1160 | */ | 1196 | */ |
1161 | if (!i) { | 1197 | if (!i || forced > 1) { |
1162 | if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, | 1198 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, |
1163 | &flen, &i))) | 1199 | &fbno, &flen, &i); |
1200 | if (error) | ||
1164 | goto error0; | 1201 | goto error0; |
1165 | if (i == 0 || flen == 0) { | 1202 | if (i == 0 || flen == 0) { |
1166 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1203 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
@@ -1168,23 +1205,56 @@ xfs_alloc_ag_vextent_size( | |||
1168 | return 0; | 1205 | return 0; |
1169 | } | 1206 | } |
1170 | ASSERT(i == 1); | 1207 | ASSERT(i == 1); |
1208 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | ||
1209 | } else { | ||
1210 | /* | ||
1211 | * Search for a non-busy extent that is large enough. | ||
1212 | * If we are at low space, don't check, or if we fall of | ||
1213 | * the end of the btree, turn off the busy check and | ||
1214 | * restart. | ||
1215 | */ | ||
1216 | for (;;) { | ||
1217 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); | ||
1218 | if (error) | ||
1219 | goto error0; | ||
1220 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1221 | |||
1222 | xfs_alloc_compute_aligned(args, fbno, flen, | ||
1223 | &rbno, &rlen); | ||
1224 | |||
1225 | if (rlen >= args->maxlen) | ||
1226 | break; | ||
1227 | |||
1228 | error = xfs_btree_increment(cnt_cur, 0, &i); | ||
1229 | if (error) | ||
1230 | goto error0; | ||
1231 | if (i == 0) { | ||
1232 | /* | ||
1233 | * Our only valid extents must have been busy. | ||
1234 | * Make it unbusy by forcing the log out and | ||
1235 | * retrying. If we've been here before, forcing | ||
1236 | * the log isn't making the extents available, | ||
1237 | * which means they have probably been freed in | ||
1238 | * this transaction. In that case, we have to | ||
1239 | * give up on them and we'll attempt a minlen | ||
1240 | * allocation the next time around. | ||
1241 | */ | ||
1242 | xfs_btree_del_cursor(cnt_cur, | ||
1243 | XFS_BTREE_NOERROR); | ||
1244 | trace_xfs_alloc_size_busy(args); | ||
1245 | if (!forced++) | ||
1246 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
1247 | goto restart; | ||
1248 | } | ||
1249 | } | ||
1171 | } | 1250 | } |
1172 | /* | 1251 | |
1173 | * There's a freespace as big as maxlen+alignment-1, get it. | ||
1174 | */ | ||
1175 | else { | ||
1176 | if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) | ||
1177 | goto error0; | ||
1178 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
1179 | } | ||
1180 | /* | 1252 | /* |
1181 | * In the first case above, we got the last entry in the | 1253 | * In the first case above, we got the last entry in the |
1182 | * by-size btree. Now we check to see if the space hits maxlen | 1254 | * by-size btree. Now we check to see if the space hits maxlen |
1183 | * once aligned; if not, we search left for something better. | 1255 | * once aligned; if not, we search left for something better. |
1184 | * This can't happen in the second case above. | 1256 | * This can't happen in the second case above. |
1185 | */ | 1257 | */ |
1186 | xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, | ||
1187 | &rbno, &rlen); | ||
1188 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1258 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1189 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1259 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || |
1190 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); | 1260 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); |
@@ -1209,8 +1279,8 @@ xfs_alloc_ag_vextent_size( | |||
1209 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1279 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1210 | if (flen < bestrlen) | 1280 | if (flen < bestrlen) |
1211 | break; | 1281 | break; |
1212 | xfs_alloc_compute_aligned(fbno, flen, args->alignment, | 1282 | xfs_alloc_compute_aligned(args, fbno, flen, |
1213 | args->minlen, &rbno, &rlen); | 1283 | &rbno, &rlen); |
1214 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1284 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1215 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1285 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || |
1216 | (rlen <= flen && rbno + rlen <= fbno + flen), | 1286 | (rlen <= flen && rbno + rlen <= fbno + flen), |
@@ -1238,13 +1308,19 @@ xfs_alloc_ag_vextent_size( | |||
1238 | * Fix up the length. | 1308 | * Fix up the length. |
1239 | */ | 1309 | */ |
1240 | args->len = rlen; | 1310 | args->len = rlen; |
1241 | xfs_alloc_fix_len(args); | 1311 | if (rlen < args->minlen) { |
1242 | if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { | 1312 | if (!forced++) { |
1243 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1313 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1244 | trace_xfs_alloc_size_nominleft(args); | 1314 | trace_xfs_alloc_size_busy(args); |
1245 | args->agbno = NULLAGBLOCK; | 1315 | xfs_log_force(args->mp, XFS_LOG_SYNC); |
1246 | return 0; | 1316 | goto restart; |
1317 | } | ||
1318 | goto out_nominleft; | ||
1247 | } | 1319 | } |
1320 | xfs_alloc_fix_len(args); | ||
1321 | |||
1322 | if (!xfs_alloc_fix_minleft(args)) | ||
1323 | goto out_nominleft; | ||
1248 | rlen = args->len; | 1324 | rlen = args->len; |
1249 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); | 1325 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); |
1250 | /* | 1326 | /* |
@@ -1274,6 +1350,12 @@ error0: | |||
1274 | if (bno_cur) | 1350 | if (bno_cur) |
1275 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); | 1351 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); |
1276 | return error; | 1352 | return error; |
1353 | |||
1354 | out_nominleft: | ||
1355 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
1356 | trace_xfs_alloc_size_nominleft(args); | ||
1357 | args->agbno = NULLAGBLOCK; | ||
1358 | return 0; | ||
1277 | } | 1359 | } |
1278 | 1360 | ||
1279 | /* | 1361 | /* |
@@ -1313,6 +1395,9 @@ xfs_alloc_ag_vextent_small( | |||
1313 | if (error) | 1395 | if (error) |
1314 | goto error0; | 1396 | goto error0; |
1315 | if (fbno != NULLAGBLOCK) { | 1397 | if (fbno != NULLAGBLOCK) { |
1398 | xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, | ||
1399 | args->userdata); | ||
1400 | |||
1316 | if (args->userdata) { | 1401 | if (args->userdata) { |
1317 | xfs_buf_t *bp; | 1402 | xfs_buf_t *bp; |
1318 | 1403 | ||
@@ -1388,6 +1473,7 @@ xfs_free_ag_extent( | |||
1388 | xfs_mount_t *mp; /* mount point struct for filesystem */ | 1473 | xfs_mount_t *mp; /* mount point struct for filesystem */ |
1389 | xfs_agblock_t nbno; /* new starting block of freespace */ | 1474 | xfs_agblock_t nbno; /* new starting block of freespace */ |
1390 | xfs_extlen_t nlen; /* new length of freespace */ | 1475 | xfs_extlen_t nlen; /* new length of freespace */ |
1476 | xfs_perag_t *pag; /* per allocation group data */ | ||
1391 | 1477 | ||
1392 | mp = tp->t_mountp; | 1478 | mp = tp->t_mountp; |
1393 | /* | 1479 | /* |
@@ -1586,45 +1672,23 @@ xfs_free_ag_extent( | |||
1586 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 1672 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
1587 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1673 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1588 | cnt_cur = NULL; | 1674 | cnt_cur = NULL; |
1675 | |||
1589 | /* | 1676 | /* |
1590 | * Update the freespace totals in the ag and superblock. | 1677 | * Update the freespace totals in the ag and superblock. |
1591 | */ | 1678 | */ |
1592 | { | 1679 | pag = xfs_perag_get(mp, agno); |
1593 | xfs_agf_t *agf; | 1680 | error = xfs_alloc_update_counters(tp, pag, agbp, len); |
1594 | xfs_perag_t *pag; /* per allocation group data */ | 1681 | xfs_perag_put(pag); |
1595 | 1682 | if (error) | |
1596 | pag = xfs_perag_get(mp, agno); | 1683 | goto error0; |
1597 | pag->pagf_freeblks += len; | ||
1598 | xfs_perag_put(pag); | ||
1599 | 1684 | ||
1600 | agf = XFS_BUF_TO_AGF(agbp); | 1685 | if (!isfl) |
1601 | be32_add_cpu(&agf->agf_freeblks, len); | 1686 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); |
1602 | xfs_trans_agblocks_delta(tp, len); | 1687 | XFS_STATS_INC(xs_freex); |
1603 | XFS_WANT_CORRUPTED_GOTO( | 1688 | XFS_STATS_ADD(xs_freeb, len); |
1604 | be32_to_cpu(agf->agf_freeblks) <= | ||
1605 | be32_to_cpu(agf->agf_length), | ||
1606 | error0); | ||
1607 | xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); | ||
1608 | if (!isfl) | ||
1609 | xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); | ||
1610 | XFS_STATS_INC(xs_freex); | ||
1611 | XFS_STATS_ADD(xs_freeb, len); | ||
1612 | } | ||
1613 | 1689 | ||
1614 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); | 1690 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); |
1615 | 1691 | ||
1616 | /* | ||
1617 | * Since blocks move to the free list without the coordination | ||
1618 | * used in xfs_bmap_finish, we can't allow block to be available | ||
1619 | * for reallocation and non-transaction writing (user data) | ||
1620 | * until we know that the transaction that moved it to the free | ||
1621 | * list is permanently on disk. We track the blocks by declaring | ||
1622 | * these blocks as "busy"; the busy list is maintained on a per-ag | ||
1623 | * basis and each transaction records which entries should be removed | ||
1624 | * when the iclog commits to disk. If a busy block is allocated, | ||
1625 | * the iclog is pushed up to the LSN that freed the block. | ||
1626 | */ | ||
1627 | xfs_alloc_busy_insert(tp, agno, bno, len); | ||
1628 | return 0; | 1692 | return 0; |
1629 | 1693 | ||
1630 | error0: | 1694 | error0: |
@@ -1919,21 +1983,6 @@ xfs_alloc_get_freelist( | |||
1919 | xfs_alloc_log_agf(tp, agbp, logflags); | 1983 | xfs_alloc_log_agf(tp, agbp, logflags); |
1920 | *bnop = bno; | 1984 | *bnop = bno; |
1921 | 1985 | ||
1922 | /* | ||
1923 | * As blocks are freed, they are added to the per-ag busy list and | ||
1924 | * remain there until the freeing transaction is committed to disk. | ||
1925 | * Now that we have allocated blocks, this list must be searched to see | ||
1926 | * if a block is being reused. If one is, then the freeing transaction | ||
1927 | * must be pushed to disk before this transaction. | ||
1928 | * | ||
1929 | * We do this by setting the current transaction to a sync transaction | ||
1930 | * which guarantees that the freeing transaction is on disk before this | ||
1931 | * transaction. This is done instead of a synchronous log force here so | ||
1932 | * that we don't sit and wait with the AGF locked in the transaction | ||
1933 | * during the log force. | ||
1934 | */ | ||
1935 | if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) | ||
1936 | xfs_trans_set_sync(tp); | ||
1937 | return 0; | 1986 | return 0; |
1938 | } | 1987 | } |
1939 | 1988 | ||
@@ -2391,131 +2440,54 @@ xfs_free_extent( | |||
2391 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); | 2440 | memset(&args, 0, sizeof(xfs_alloc_arg_t)); |
2392 | args.tp = tp; | 2441 | args.tp = tp; |
2393 | args.mp = tp->t_mountp; | 2442 | args.mp = tp->t_mountp; |
2443 | |||
2444 | /* | ||
2445 | * validate that the block number is legal - the enables us to detect | ||
2446 | * and handle a silent filesystem corruption rather than crashing. | ||
2447 | */ | ||
2394 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); | 2448 | args.agno = XFS_FSB_TO_AGNO(args.mp, bno); |
2395 | ASSERT(args.agno < args.mp->m_sb.sb_agcount); | 2449 | if (args.agno >= args.mp->m_sb.sb_agcount) |
2450 | return EFSCORRUPTED; | ||
2451 | |||
2396 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); | 2452 | args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); |
2453 | if (args.agbno >= args.mp->m_sb.sb_agblocks) | ||
2454 | return EFSCORRUPTED; | ||
2455 | |||
2397 | args.pag = xfs_perag_get(args.mp, args.agno); | 2456 | args.pag = xfs_perag_get(args.mp, args.agno); |
2398 | if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) | 2457 | ASSERT(args.pag); |
2458 | |||
2459 | error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); | ||
2460 | if (error) | ||
2399 | goto error0; | 2461 | goto error0; |
2400 | #ifdef DEBUG | 2462 | |
2401 | ASSERT(args.agbp != NULL); | 2463 | /* validate the extent size is legal now we have the agf locked */ |
2402 | ASSERT((args.agbno + len) <= | 2464 | if (args.agbno + len > |
2403 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); | 2465 | be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { |
2404 | #endif | 2466 | error = EFSCORRUPTED; |
2467 | goto error0; | ||
2468 | } | ||
2469 | |||
2405 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | 2470 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
2471 | if (!error) | ||
2472 | xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0); | ||
2406 | error0: | 2473 | error0: |
2407 | xfs_perag_put(args.pag); | 2474 | xfs_perag_put(args.pag); |
2408 | return error; | 2475 | return error; |
2409 | } | 2476 | } |
2410 | 2477 | ||
2411 | |||
2412 | /* | ||
2413 | * AG Busy list management | ||
2414 | * The busy list contains block ranges that have been freed but whose | ||
2415 | * transactions have not yet hit disk. If any block listed in a busy | ||
2416 | * list is reused, the transaction that freed it must be forced to disk | ||
2417 | * before continuing to use the block. | ||
2418 | * | ||
2419 | * xfs_alloc_busy_insert - add to the per-ag busy list | ||
2420 | * xfs_alloc_busy_clear - remove an item from the per-ag busy list | ||
2421 | * xfs_alloc_busy_search - search for a busy extent | ||
2422 | */ | ||
2423 | |||
2424 | /* | ||
2425 | * Insert a new extent into the busy tree. | ||
2426 | * | ||
2427 | * The busy extent tree is indexed by the start block of the busy extent. | ||
2428 | * there can be multiple overlapping ranges in the busy extent tree but only | ||
2429 | * ever one entry at a given start block. The reason for this is that | ||
2430 | * multi-block extents can be freed, then smaller chunks of that extent | ||
2431 | * allocated and freed again before the first transaction commit is on disk. | ||
2432 | * If the exact same start block is freed a second time, we have to wait for | ||
2433 | * that busy extent to pass out of the tree before the new extent is inserted. | ||
2434 | * There are two main cases we have to handle here. | ||
2435 | * | ||
2436 | * The first case is a transaction that triggers a "free - allocate - free" | ||
2437 | * cycle. This can occur during btree manipulations as a btree block is freed | ||
2438 | * to the freelist, then allocated from the free list, then freed again. In | ||
2439 | * this case, the second extxpnet free is what triggers the duplicate and as | ||
2440 | * such the transaction IDs should match. Because the extent was allocated in | ||
2441 | * this transaction, the transaction must be marked as synchronous. This is | ||
2442 | * true for all cases where the free/alloc/free occurs in the one transaction, | ||
2443 | * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. | ||
2444 | * This serves to catch violations of the second case quite effectively. | ||
2445 | * | ||
2446 | * The second case is where the free/alloc/free occur in different | ||
2447 | * transactions. In this case, the thread freeing the extent the second time | ||
2448 | * can't mark the extent busy immediately because it is already tracked in a | ||
2449 | * transaction that may be committing. When the log commit for the existing | ||
2450 | * busy extent completes, the busy extent will be removed from the tree. If we | ||
2451 | * allow the second busy insert to continue using that busy extent structure, | ||
2452 | * it can be freed before this transaction is safely in the log. Hence our | ||
2453 | * only option in this case is to force the log to remove the existing busy | ||
2454 | * extent from the list before we insert the new one with the current | ||
2455 | * transaction ID. | ||
2456 | * | ||
2457 | * The problem we are trying to avoid in the free-alloc-free in separate | ||
2458 | * transactions is most easily described with a timeline: | ||
2459 | * | ||
2460 | * Thread 1 Thread 2 Thread 3 xfslogd | ||
2461 | * xact alloc | ||
2462 | * free X | ||
2463 | * mark busy | ||
2464 | * commit xact | ||
2465 | * free xact | ||
2466 | * xact alloc | ||
2467 | * alloc X | ||
2468 | * busy search | ||
2469 | * mark xact sync | ||
2470 | * commit xact | ||
2471 | * free xact | ||
2472 | * force log | ||
2473 | * checkpoint starts | ||
2474 | * .... | ||
2475 | * xact alloc | ||
2476 | * free X | ||
2477 | * mark busy | ||
2478 | * finds match | ||
2479 | * *** KABOOM! *** | ||
2480 | * .... | ||
2481 | * log IO completes | ||
2482 | * unbusy X | ||
2483 | * checkpoint completes | ||
2484 | * | ||
2485 | * By issuing a log force in thread 3 @ "KABOOM", the thread will block until | ||
2486 | * the checkpoint completes, and the busy extent it matched will have been | ||
2487 | * removed from the tree when it is woken. Hence it can then continue safely. | ||
2488 | * | ||
2489 | * However, to ensure this matching process is robust, we need to use the | ||
2490 | * transaction ID for identifying transaction, as delayed logging results in | ||
2491 | * the busy extent and transaction lifecycles being different. i.e. the busy | ||
2492 | * extent is active for a lot longer than the transaction. Hence the | ||
2493 | * transaction structure can be freed and reallocated, then mark the same | ||
2494 | * extent busy again in the new transaction. In this case the new transaction | ||
2495 | * will have a different tid but can have the same address, and hence we need | ||
2496 | * to check against the tid. | ||
2497 | * | ||
2498 | * Future: for delayed logging, we could avoid the log force if the extent was | ||
2499 | * first freed in the current checkpoint sequence. This, however, requires the | ||
2500 | * ability to pin the current checkpoint in memory until this transaction | ||
2501 | * commits to ensure that both the original free and the current one combine | ||
2502 | * logically into the one checkpoint. If the checkpoint sequences are | ||
2503 | * different, however, we still need to wait on a log force. | ||
2504 | */ | ||
2505 | void | 2478 | void |
2506 | xfs_alloc_busy_insert( | 2479 | xfs_alloc_busy_insert( |
2507 | struct xfs_trans *tp, | 2480 | struct xfs_trans *tp, |
2508 | xfs_agnumber_t agno, | 2481 | xfs_agnumber_t agno, |
2509 | xfs_agblock_t bno, | 2482 | xfs_agblock_t bno, |
2510 | xfs_extlen_t len) | 2483 | xfs_extlen_t len, |
2484 | unsigned int flags) | ||
2511 | { | 2485 | { |
2512 | struct xfs_busy_extent *new; | 2486 | struct xfs_busy_extent *new; |
2513 | struct xfs_busy_extent *busyp; | 2487 | struct xfs_busy_extent *busyp; |
2514 | struct xfs_perag *pag; | 2488 | struct xfs_perag *pag; |
2515 | struct rb_node **rbp; | 2489 | struct rb_node **rbp; |
2516 | struct rb_node *parent; | 2490 | struct rb_node *parent = NULL; |
2517 | int match; | ||
2518 | |||
2519 | 2491 | ||
2520 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); | 2492 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); |
2521 | if (!new) { | 2493 | if (!new) { |
@@ -2524,7 +2496,7 @@ xfs_alloc_busy_insert( | |||
2524 | * block, make this a synchronous transaction to insure that | 2496 | * block, make this a synchronous transaction to insure that |
2525 | * the block is not reused before this transaction commits. | 2497 | * the block is not reused before this transaction commits. |
2526 | */ | 2498 | */ |
2527 | trace_xfs_alloc_busy(tp, agno, bno, len, 1); | 2499 | trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); |
2528 | xfs_trans_set_sync(tp); | 2500 | xfs_trans_set_sync(tp); |
2529 | return; | 2501 | return; |
2530 | } | 2502 | } |
@@ -2532,66 +2504,29 @@ xfs_alloc_busy_insert( | |||
2532 | new->agno = agno; | 2504 | new->agno = agno; |
2533 | new->bno = bno; | 2505 | new->bno = bno; |
2534 | new->length = len; | 2506 | new->length = len; |
2535 | new->tid = xfs_log_get_trans_ident(tp); | ||
2536 | |||
2537 | INIT_LIST_HEAD(&new->list); | 2507 | INIT_LIST_HEAD(&new->list); |
2508 | new->flags = flags; | ||
2538 | 2509 | ||
2539 | /* trace before insert to be able to see failed inserts */ | 2510 | /* trace before insert to be able to see failed inserts */ |
2540 | trace_xfs_alloc_busy(tp, agno, bno, len, 0); | 2511 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); |
2541 | 2512 | ||
2542 | pag = xfs_perag_get(tp->t_mountp, new->agno); | 2513 | pag = xfs_perag_get(tp->t_mountp, new->agno); |
2543 | restart: | ||
2544 | spin_lock(&pag->pagb_lock); | 2514 | spin_lock(&pag->pagb_lock); |
2545 | rbp = &pag->pagb_tree.rb_node; | 2515 | rbp = &pag->pagb_tree.rb_node; |
2546 | parent = NULL; | 2516 | while (*rbp) { |
2547 | busyp = NULL; | ||
2548 | match = 0; | ||
2549 | while (*rbp && match >= 0) { | ||
2550 | parent = *rbp; | 2517 | parent = *rbp; |
2551 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | 2518 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); |
2552 | 2519 | ||
2553 | if (new->bno < busyp->bno) { | 2520 | if (new->bno < busyp->bno) { |
2554 | /* may overlap, but exact start block is lower */ | ||
2555 | rbp = &(*rbp)->rb_left; | 2521 | rbp = &(*rbp)->rb_left; |
2556 | if (new->bno + new->length > busyp->bno) | 2522 | ASSERT(new->bno + new->length <= busyp->bno); |
2557 | match = busyp->tid == new->tid ? 1 : -1; | ||
2558 | } else if (new->bno > busyp->bno) { | 2523 | } else if (new->bno > busyp->bno) { |
2559 | /* may overlap, but exact start block is higher */ | ||
2560 | rbp = &(*rbp)->rb_right; | 2524 | rbp = &(*rbp)->rb_right; |
2561 | if (bno < busyp->bno + busyp->length) | 2525 | ASSERT(bno >= busyp->bno + busyp->length); |
2562 | match = busyp->tid == new->tid ? 1 : -1; | ||
2563 | } else { | 2526 | } else { |
2564 | match = busyp->tid == new->tid ? 1 : -1; | 2527 | ASSERT(0); |
2565 | break; | ||
2566 | } | 2528 | } |
2567 | } | 2529 | } |
2568 | if (match < 0) { | ||
2569 | /* overlap marked busy in different transaction */ | ||
2570 | spin_unlock(&pag->pagb_lock); | ||
2571 | xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); | ||
2572 | goto restart; | ||
2573 | } | ||
2574 | if (match > 0) { | ||
2575 | /* | ||
2576 | * overlap marked busy in same transaction. Update if exact | ||
2577 | * start block match, otherwise combine the busy extents into | ||
2578 | * a single range. | ||
2579 | */ | ||
2580 | if (busyp->bno == new->bno) { | ||
2581 | busyp->length = max(busyp->length, new->length); | ||
2582 | spin_unlock(&pag->pagb_lock); | ||
2583 | ASSERT(tp->t_flags & XFS_TRANS_SYNC); | ||
2584 | xfs_perag_put(pag); | ||
2585 | kmem_free(new); | ||
2586 | return; | ||
2587 | } | ||
2588 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2589 | new->length = max(busyp->bno + busyp->length, | ||
2590 | new->bno + new->length) - | ||
2591 | min(busyp->bno, new->bno); | ||
2592 | new->bno = min(busyp->bno, new->bno); | ||
2593 | } else | ||
2594 | busyp = NULL; | ||
2595 | 2530 | ||
2596 | rb_link_node(&new->rb_node, parent, rbp); | 2531 | rb_link_node(&new->rb_node, parent, rbp); |
2597 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | 2532 | rb_insert_color(&new->rb_node, &pag->pagb_tree); |
@@ -2599,7 +2534,6 @@ restart: | |||
2599 | list_add(&new->list, &tp->t_busy); | 2534 | list_add(&new->list, &tp->t_busy); |
2600 | spin_unlock(&pag->pagb_lock); | 2535 | spin_unlock(&pag->pagb_lock); |
2601 | xfs_perag_put(pag); | 2536 | xfs_perag_put(pag); |
2602 | kmem_free(busyp); | ||
2603 | } | 2537 | } |
2604 | 2538 | ||
2605 | /* | 2539 | /* |
@@ -2648,31 +2582,466 @@ xfs_alloc_busy_search( | |||
2648 | } | 2582 | } |
2649 | } | 2583 | } |
2650 | spin_unlock(&pag->pagb_lock); | 2584 | spin_unlock(&pag->pagb_lock); |
2651 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); | ||
2652 | xfs_perag_put(pag); | 2585 | xfs_perag_put(pag); |
2653 | return match; | 2586 | return match; |
2654 | } | 2587 | } |
2655 | 2588 | ||
2589 | /* | ||
2590 | * The found free extent [fbno, fend] overlaps part or all of the given busy | ||
2591 | * extent. If the overlap covers the beginning, the end, or all of the busy | ||
2592 | * extent, the overlapping portion can be made unbusy and used for the | ||
2593 | * allocation. We can't split a busy extent because we can't modify a | ||
2594 | * transaction/CIL context busy list, but we can update an entries block | ||
2595 | * number or length. | ||
2596 | * | ||
2597 | * Returns true if the extent can safely be reused, or false if the search | ||
2598 | * needs to be restarted. | ||
2599 | */ | ||
2600 | STATIC bool | ||
2601 | xfs_alloc_busy_update_extent( | ||
2602 | struct xfs_mount *mp, | ||
2603 | struct xfs_perag *pag, | ||
2604 | struct xfs_busy_extent *busyp, | ||
2605 | xfs_agblock_t fbno, | ||
2606 | xfs_extlen_t flen, | ||
2607 | bool userdata) | ||
2608 | { | ||
2609 | xfs_agblock_t fend = fbno + flen; | ||
2610 | xfs_agblock_t bbno = busyp->bno; | ||
2611 | xfs_agblock_t bend = bbno + busyp->length; | ||
2612 | |||
2613 | /* | ||
2614 | * This extent is currently being discarded. Give the thread | ||
2615 | * performing the discard a chance to mark the extent unbusy | ||
2616 | * and retry. | ||
2617 | */ | ||
2618 | if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) { | ||
2619 | spin_unlock(&pag->pagb_lock); | ||
2620 | delay(1); | ||
2621 | spin_lock(&pag->pagb_lock); | ||
2622 | return false; | ||
2623 | } | ||
2624 | |||
2625 | /* | ||
2626 | * If there is a busy extent overlapping a user allocation, we have | ||
2627 | * no choice but to force the log and retry the search. | ||
2628 | * | ||
2629 | * Fortunately this does not happen during normal operation, but | ||
2630 | * only if the filesystem is very low on space and has to dip into | ||
2631 | * the AGFL for normal allocations. | ||
2632 | */ | ||
2633 | if (userdata) | ||
2634 | goto out_force_log; | ||
2635 | |||
2636 | if (bbno < fbno && bend > fend) { | ||
2637 | /* | ||
2638 | * Case 1: | ||
2639 | * bbno bend | ||
2640 | * +BBBBBBBBBBBBBBBBB+ | ||
2641 | * +---------+ | ||
2642 | * fbno fend | ||
2643 | */ | ||
2644 | |||
2645 | /* | ||
2646 | * We would have to split the busy extent to be able to track | ||
2647 | * it correct, which we cannot do because we would have to | ||
2648 | * modify the list of busy extents attached to the transaction | ||
2649 | * or CIL context, which is immutable. | ||
2650 | * | ||
2651 | * Force out the log to clear the busy extent and retry the | ||
2652 | * search. | ||
2653 | */ | ||
2654 | goto out_force_log; | ||
2655 | } else if (bbno >= fbno && bend <= fend) { | ||
2656 | /* | ||
2657 | * Case 2: | ||
2658 | * bbno bend | ||
2659 | * +BBBBBBBBBBBBBBBBB+ | ||
2660 | * +-----------------+ | ||
2661 | * fbno fend | ||
2662 | * | ||
2663 | * Case 3: | ||
2664 | * bbno bend | ||
2665 | * +BBBBBBBBBBBBBBBBB+ | ||
2666 | * +--------------------------+ | ||
2667 | * fbno fend | ||
2668 | * | ||
2669 | * Case 4: | ||
2670 | * bbno bend | ||
2671 | * +BBBBBBBBBBBBBBBBB+ | ||
2672 | * +--------------------------+ | ||
2673 | * fbno fend | ||
2674 | * | ||
2675 | * Case 5: | ||
2676 | * bbno bend | ||
2677 | * +BBBBBBBBBBBBBBBBB+ | ||
2678 | * +-----------------------------------+ | ||
2679 | * fbno fend | ||
2680 | * | ||
2681 | */ | ||
2682 | |||
2683 | /* | ||
2684 | * The busy extent is fully covered by the extent we are | ||
2685 | * allocating, and can simply be removed from the rbtree. | ||
2686 | * However we cannot remove it from the immutable list | ||
2687 | * tracking busy extents in the transaction or CIL context, | ||
2688 | * so set the length to zero to mark it invalid. | ||
2689 | * | ||
2690 | * We also need to restart the busy extent search from the | ||
2691 | * tree root, because erasing the node can rearrange the | ||
2692 | * tree topology. | ||
2693 | */ | ||
2694 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2695 | busyp->length = 0; | ||
2696 | return false; | ||
2697 | } else if (fend < bend) { | ||
2698 | /* | ||
2699 | * Case 6: | ||
2700 | * bbno bend | ||
2701 | * +BBBBBBBBBBBBBBBBB+ | ||
2702 | * +---------+ | ||
2703 | * fbno fend | ||
2704 | * | ||
2705 | * Case 7: | ||
2706 | * bbno bend | ||
2707 | * +BBBBBBBBBBBBBBBBB+ | ||
2708 | * +------------------+ | ||
2709 | * fbno fend | ||
2710 | * | ||
2711 | */ | ||
2712 | busyp->bno = fend; | ||
2713 | } else if (bbno < fbno) { | ||
2714 | /* | ||
2715 | * Case 8: | ||
2716 | * bbno bend | ||
2717 | * +BBBBBBBBBBBBBBBBB+ | ||
2718 | * +-------------+ | ||
2719 | * fbno fend | ||
2720 | * | ||
2721 | * Case 9: | ||
2722 | * bbno bend | ||
2723 | * +BBBBBBBBBBBBBBBBB+ | ||
2724 | * +----------------------+ | ||
2725 | * fbno fend | ||
2726 | */ | ||
2727 | busyp->length = fbno - busyp->bno; | ||
2728 | } else { | ||
2729 | ASSERT(0); | ||
2730 | } | ||
2731 | |||
2732 | trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); | ||
2733 | return true; | ||
2734 | |||
2735 | out_force_log: | ||
2736 | spin_unlock(&pag->pagb_lock); | ||
2737 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
2738 | trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); | ||
2739 | spin_lock(&pag->pagb_lock); | ||
2740 | return false; | ||
2741 | } | ||
2742 | |||
2743 | |||
2744 | /* | ||
2745 | * For a given extent [fbno, flen], make sure we can reuse it safely. | ||
2746 | */ | ||
2656 | void | 2747 | void |
2657 | xfs_alloc_busy_clear( | 2748 | xfs_alloc_busy_reuse( |
2658 | struct xfs_mount *mp, | 2749 | struct xfs_mount *mp, |
2659 | struct xfs_busy_extent *busyp) | 2750 | xfs_agnumber_t agno, |
2751 | xfs_agblock_t fbno, | ||
2752 | xfs_extlen_t flen, | ||
2753 | bool userdata) | ||
2660 | { | 2754 | { |
2661 | struct xfs_perag *pag; | 2755 | struct xfs_perag *pag; |
2756 | struct rb_node *rbp; | ||
2662 | 2757 | ||
2663 | trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, | 2758 | ASSERT(flen > 0); |
2664 | busyp->length); | ||
2665 | 2759 | ||
2666 | ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, | 2760 | pag = xfs_perag_get(mp, agno); |
2667 | busyp->length) == 1); | 2761 | spin_lock(&pag->pagb_lock); |
2762 | restart: | ||
2763 | rbp = pag->pagb_tree.rb_node; | ||
2764 | while (rbp) { | ||
2765 | struct xfs_busy_extent *busyp = | ||
2766 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2767 | xfs_agblock_t bbno = busyp->bno; | ||
2768 | xfs_agblock_t bend = bbno + busyp->length; | ||
2668 | 2769 | ||
2669 | list_del_init(&busyp->list); | 2770 | if (fbno + flen <= bbno) { |
2771 | rbp = rbp->rb_left; | ||
2772 | continue; | ||
2773 | } else if (fbno >= bend) { | ||
2774 | rbp = rbp->rb_right; | ||
2775 | continue; | ||
2776 | } | ||
2670 | 2777 | ||
2671 | pag = xfs_perag_get(mp, busyp->agno); | 2778 | if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, |
2672 | spin_lock(&pag->pagb_lock); | 2779 | userdata)) |
2673 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | 2780 | goto restart; |
2781 | } | ||
2674 | spin_unlock(&pag->pagb_lock); | 2782 | spin_unlock(&pag->pagb_lock); |
2675 | xfs_perag_put(pag); | 2783 | xfs_perag_put(pag); |
2784 | } | ||
2785 | |||
2786 | /* | ||
2787 | * For a given extent [fbno, flen], search the busy extent list to find a | ||
2788 | * subset of the extent that is not busy. If *rlen is smaller than | ||
2789 | * args->minlen no suitable extent could be found, and the higher level | ||
2790 | * code needs to force out the log and retry the allocation. | ||
2791 | */ | ||
2792 | STATIC void | ||
2793 | xfs_alloc_busy_trim( | ||
2794 | struct xfs_alloc_arg *args, | ||
2795 | xfs_agblock_t bno, | ||
2796 | xfs_extlen_t len, | ||
2797 | xfs_agblock_t *rbno, | ||
2798 | xfs_extlen_t *rlen) | ||
2799 | { | ||
2800 | xfs_agblock_t fbno; | ||
2801 | xfs_extlen_t flen; | ||
2802 | struct rb_node *rbp; | ||
2803 | |||
2804 | ASSERT(len > 0); | ||
2805 | |||
2806 | spin_lock(&args->pag->pagb_lock); | ||
2807 | restart: | ||
2808 | fbno = bno; | ||
2809 | flen = len; | ||
2810 | rbp = args->pag->pagb_tree.rb_node; | ||
2811 | while (rbp && flen >= args->minlen) { | ||
2812 | struct xfs_busy_extent *busyp = | ||
2813 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
2814 | xfs_agblock_t fend = fbno + flen; | ||
2815 | xfs_agblock_t bbno = busyp->bno; | ||
2816 | xfs_agblock_t bend = bbno + busyp->length; | ||
2817 | |||
2818 | if (fend <= bbno) { | ||
2819 | rbp = rbp->rb_left; | ||
2820 | continue; | ||
2821 | } else if (fbno >= bend) { | ||
2822 | rbp = rbp->rb_right; | ||
2823 | continue; | ||
2824 | } | ||
2825 | |||
2826 | /* | ||
2827 | * If this is a metadata allocation, try to reuse the busy | ||
2828 | * extent instead of trimming the allocation. | ||
2829 | */ | ||
2830 | if (!args->userdata && | ||
2831 | !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) { | ||
2832 | if (!xfs_alloc_busy_update_extent(args->mp, args->pag, | ||
2833 | busyp, fbno, flen, | ||
2834 | false)) | ||
2835 | goto restart; | ||
2836 | continue; | ||
2837 | } | ||
2838 | |||
2839 | if (bbno <= fbno) { | ||
2840 | /* start overlap */ | ||
2841 | |||
2842 | /* | ||
2843 | * Case 1: | ||
2844 | * bbno bend | ||
2845 | * +BBBBBBBBBBBBBBBBB+ | ||
2846 | * +---------+ | ||
2847 | * fbno fend | ||
2848 | * | ||
2849 | * Case 2: | ||
2850 | * bbno bend | ||
2851 | * +BBBBBBBBBBBBBBBBB+ | ||
2852 | * +-------------+ | ||
2853 | * fbno fend | ||
2854 | * | ||
2855 | * Case 3: | ||
2856 | * bbno bend | ||
2857 | * +BBBBBBBBBBBBBBBBB+ | ||
2858 | * +-------------+ | ||
2859 | * fbno fend | ||
2860 | * | ||
2861 | * Case 4: | ||
2862 | * bbno bend | ||
2863 | * +BBBBBBBBBBBBBBBBB+ | ||
2864 | * +-----------------+ | ||
2865 | * fbno fend | ||
2866 | * | ||
2867 | * No unbusy region in extent, return failure. | ||
2868 | */ | ||
2869 | if (fend <= bend) | ||
2870 | goto fail; | ||
2871 | |||
2872 | /* | ||
2873 | * Case 5: | ||
2874 | * bbno bend | ||
2875 | * +BBBBBBBBBBBBBBBBB+ | ||
2876 | * +----------------------+ | ||
2877 | * fbno fend | ||
2878 | * | ||
2879 | * Case 6: | ||
2880 | * bbno bend | ||
2881 | * +BBBBBBBBBBBBBBBBB+ | ||
2882 | * +--------------------------+ | ||
2883 | * fbno fend | ||
2884 | * | ||
2885 | * Needs to be trimmed to: | ||
2886 | * +-------+ | ||
2887 | * fbno fend | ||
2888 | */ | ||
2889 | fbno = bend; | ||
2890 | } else if (bend >= fend) { | ||
2891 | /* end overlap */ | ||
2892 | |||
2893 | /* | ||
2894 | * Case 7: | ||
2895 | * bbno bend | ||
2896 | * +BBBBBBBBBBBBBBBBB+ | ||
2897 | * +------------------+ | ||
2898 | * fbno fend | ||
2899 | * | ||
2900 | * Case 8: | ||
2901 | * bbno bend | ||
2902 | * +BBBBBBBBBBBBBBBBB+ | ||
2903 | * +--------------------------+ | ||
2904 | * fbno fend | ||
2905 | * | ||
2906 | * Needs to be trimmed to: | ||
2907 | * +-------+ | ||
2908 | * fbno fend | ||
2909 | */ | ||
2910 | fend = bbno; | ||
2911 | } else { | ||
2912 | /* middle overlap */ | ||
2913 | |||
2914 | /* | ||
2915 | * Case 9: | ||
2916 | * bbno bend | ||
2917 | * +BBBBBBBBBBBBBBBBB+ | ||
2918 | * +-----------------------------------+ | ||
2919 | * fbno fend | ||
2920 | * | ||
2921 | * Can be trimmed to: | ||
2922 | * +-------+ OR +-------+ | ||
2923 | * fbno fend fbno fend | ||
2924 | * | ||
2925 | * Backward allocation leads to significant | ||
2926 | * fragmentation of directories, which degrades | ||
2927 | * directory performance, therefore we always want to | ||
2928 | * choose the option that produces forward allocation | ||
2929 | * patterns. | ||
2930 | * Preferring the lower bno extent will make the next | ||
2931 | * request use "fend" as the start of the next | ||
2932 | * allocation; if the segment is no longer busy at | ||
2933 | * that point, we'll get a contiguous allocation, but | ||
2934 | * even if it is still busy, we will get a forward | ||
2935 | * allocation. | ||
2936 | * We try to avoid choosing the segment at "bend", | ||
2937 | * because that can lead to the next allocation | ||
2938 | * taking the segment at "fbno", which would be a | ||
2939 | * backward allocation. We only use the segment at | ||
2940 | * "fbno" if it is much larger than the current | ||
2941 | * requested size, because in that case there's a | ||
2942 | * good chance subsequent allocations will be | ||
2943 | * contiguous. | ||
2944 | */ | ||
2945 | if (bbno - fbno >= args->maxlen) { | ||
2946 | /* left candidate fits perfect */ | ||
2947 | fend = bbno; | ||
2948 | } else if (fend - bend >= args->maxlen * 4) { | ||
2949 | /* right candidate has enough free space */ | ||
2950 | fbno = bend; | ||
2951 | } else if (bbno - fbno >= args->minlen) { | ||
2952 | /* left candidate fits minimum requirement */ | ||
2953 | fend = bbno; | ||
2954 | } else { | ||
2955 | goto fail; | ||
2956 | } | ||
2957 | } | ||
2958 | |||
2959 | flen = fend - fbno; | ||
2960 | } | ||
2961 | spin_unlock(&args->pag->pagb_lock); | ||
2962 | |||
2963 | if (fbno != bno || flen != len) { | ||
2964 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, | ||
2965 | fbno, flen); | ||
2966 | } | ||
2967 | *rbno = fbno; | ||
2968 | *rlen = flen; | ||
2969 | return; | ||
2970 | fail: | ||
2971 | /* | ||
2972 | * Return a zero extent length as failure indications. All callers | ||
2973 | * re-check if the trimmed extent satisfies the minlen requirement. | ||
2974 | */ | ||
2975 | spin_unlock(&args->pag->pagb_lock); | ||
2976 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | ||
2977 | *rbno = fbno; | ||
2978 | *rlen = 0; | ||
2979 | } | ||
2980 | |||
2981 | static void | ||
2982 | xfs_alloc_busy_clear_one( | ||
2983 | struct xfs_mount *mp, | ||
2984 | struct xfs_perag *pag, | ||
2985 | struct xfs_busy_extent *busyp) | ||
2986 | { | ||
2987 | if (busyp->length) { | ||
2988 | trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno, | ||
2989 | busyp->length); | ||
2990 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
2991 | } | ||
2676 | 2992 | ||
2993 | list_del_init(&busyp->list); | ||
2677 | kmem_free(busyp); | 2994 | kmem_free(busyp); |
2678 | } | 2995 | } |
2996 | |||
2997 | /* | ||
2998 | * Remove all extents on the passed in list from the busy extents tree. | ||
2999 | * If do_discard is set skip extents that need to be discarded, and mark | ||
3000 | * these as undergoing a discard operation instead. | ||
3001 | */ | ||
3002 | void | ||
3003 | xfs_alloc_busy_clear( | ||
3004 | struct xfs_mount *mp, | ||
3005 | struct list_head *list, | ||
3006 | bool do_discard) | ||
3007 | { | ||
3008 | struct xfs_busy_extent *busyp, *n; | ||
3009 | struct xfs_perag *pag = NULL; | ||
3010 | xfs_agnumber_t agno = NULLAGNUMBER; | ||
3011 | |||
3012 | list_for_each_entry_safe(busyp, n, list, list) { | ||
3013 | if (busyp->agno != agno) { | ||
3014 | if (pag) { | ||
3015 | spin_unlock(&pag->pagb_lock); | ||
3016 | xfs_perag_put(pag); | ||
3017 | } | ||
3018 | pag = xfs_perag_get(mp, busyp->agno); | ||
3019 | spin_lock(&pag->pagb_lock); | ||
3020 | agno = busyp->agno; | ||
3021 | } | ||
3022 | |||
3023 | if (do_discard && busyp->length && | ||
3024 | !(busyp->flags & XFS_ALLOC_BUSY_SKIP_DISCARD)) | ||
3025 | busyp->flags = XFS_ALLOC_BUSY_DISCARDED; | ||
3026 | else | ||
3027 | xfs_alloc_busy_clear_one(mp, pag, busyp); | ||
3028 | } | ||
3029 | |||
3030 | if (pag) { | ||
3031 | spin_unlock(&pag->pagb_lock); | ||
3032 | xfs_perag_put(pag); | ||
3033 | } | ||
3034 | } | ||
3035 | |||
3036 | /* | ||
3037 | * Callback for list_sort to sort busy extents by the AG they reside in. | ||
3038 | */ | ||
3039 | int | ||
3040 | xfs_busy_extent_ag_cmp( | ||
3041 | void *priv, | ||
3042 | struct list_head *a, | ||
3043 | struct list_head *b) | ||
3044 | { | ||
3045 | return container_of(a, struct xfs_busy_extent, list)->agno - | ||
3046 | container_of(b, struct xfs_busy_extent, list)->agno; | ||
3047 | } | ||