aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2011-04-24 15:06:15 -0400
committerAlex Elder <aelder@sgi.com>2011-04-28 14:18:01 -0400
commite26f0501cf743a4289603501413f97ffcb4612f2 (patch)
tree6327f071c53ddb494d59ce04b3cf8a760aceb987 /fs/xfs
parenta870acd9b2671de56514a430edfa7867823c31c9 (diff)
xfs: do not immediately reuse busy extent ranges
Every time we reallocate a busy extent, we cause a synchronous log force to occur to ensure the freeing transaction is on disk before we continue and use the newly allocated extent. This is extremely sub-optimal as we have to mark every transaction with blocks that get reused as synchronous. Instead of searching the busy extent list after deciding on the extent to allocate, check each candidate extent during the allocation decisions as to whether they are in the busy list. If they are in the busy list, we trim the busy range out of the extent we have found and determine if that trimmed range is still OK for allocation. In many cases, this check can be incorporated into the allocation extent alignment code which already does trimming of the found extent before determining if it is a valid candidate for allocation. Based on earlier patches from Dave Chinner. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h33
-rw-r--r--fs/xfs/xfs_alloc.c407
2 files changed, 361 insertions, 79 deletions
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 2d0bcb479075..e5252c4d95c9 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -1241,6 +1241,36 @@ TRACE_EVENT(xfs_alloc_busysearch,
1241 __print_symbolic(__entry->found, XFS_BUSY_STATES)) 1241 __print_symbolic(__entry->found, XFS_BUSY_STATES))
1242); 1242);
1243 1243
1244TRACE_EVENT(xfs_alloc_busy_trim,
1245 TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
1246 xfs_agblock_t agbno, xfs_extlen_t len,
1247 xfs_agblock_t tbno, xfs_extlen_t tlen),
1248 TP_ARGS(mp, agno, agbno, len, tbno, tlen),
1249 TP_STRUCT__entry(
1250 __field(dev_t, dev)
1251 __field(xfs_agnumber_t, agno)
1252 __field(xfs_agblock_t, agbno)
1253 __field(xfs_extlen_t, len)
1254 __field(xfs_agblock_t, tbno)
1255 __field(xfs_extlen_t, tlen)
1256 ),
1257 TP_fast_assign(
1258 __entry->dev = mp->m_super->s_dev;
1259 __entry->agno = agno;
1260 __entry->agbno = agbno;
1261 __entry->len = len;
1262 __entry->tbno = tbno;
1263 __entry->tlen = tlen;
1264 ),
1265 TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u",
1266 MAJOR(__entry->dev), MINOR(__entry->dev),
1267 __entry->agno,
1268 __entry->agbno,
1269 __entry->len,
1270 __entry->tbno,
1271 __entry->tlen)
1272);
1273
1244TRACE_EVENT(xfs_trans_commit_lsn, 1274TRACE_EVENT(xfs_trans_commit_lsn,
1245 TP_PROTO(struct xfs_trans *trans), 1275 TP_PROTO(struct xfs_trans *trans),
1246 TP_ARGS(trans), 1276 TP_ARGS(trans),
@@ -1433,11 +1463,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
1433DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); 1463DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
1434DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); 1464DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
1435DEFINE_ALLOC_EVENT(xfs_alloc_near_error); 1465DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
1466DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
1467DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
1436DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); 1468DEFINE_ALLOC_EVENT(xfs_alloc_size_neither);
1437DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); 1469DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry);
1438DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); 1470DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft);
1439DEFINE_ALLOC_EVENT(xfs_alloc_size_done); 1471DEFINE_ALLOC_EVENT(xfs_alloc_size_done);
1440DEFINE_ALLOC_EVENT(xfs_alloc_size_error); 1472DEFINE_ALLOC_EVENT(xfs_alloc_size_error);
1473DEFINE_ALLOC_EVENT(xfs_alloc_size_busy);
1441DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); 1474DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist);
1442DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); 1475DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough);
1443DEFINE_ALLOC_EVENT(xfs_alloc_small_done); 1476DEFINE_ALLOC_EVENT(xfs_alloc_small_done);
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 37c4952c2f53..ff54ef428348 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -41,19 +41,13 @@
41#define XFSA_FIXUP_BNO_OK 1 41#define XFSA_FIXUP_BNO_OK 1
42#define XFSA_FIXUP_CNT_OK 2 42#define XFSA_FIXUP_CNT_OK 2
43 43
44/*
45 * Prototypes for per-ag allocation routines
46 */
47
48STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); 44STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
49STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); 45STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
50STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); 46STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, 47STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
52 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); 48 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
53 49STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *,
54/* 50 xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *);
55 * Internal functions.
56 */
57 51
58/* 52/*
59 * Lookup the record equal to [bno, len] in the btree given by cur. 53 * Lookup the record equal to [bno, len] in the btree given by cur.
@@ -154,19 +148,21 @@ xfs_alloc_compute_aligned(
154 xfs_extlen_t *reslen) /* result length */ 148 xfs_extlen_t *reslen) /* result length */
155{ 149{
156 xfs_agblock_t bno; 150 xfs_agblock_t bno;
157 xfs_extlen_t diff;
158 xfs_extlen_t len; 151 xfs_extlen_t len;
159 152
160 if (args->alignment > 1 && foundlen >= args->minlen) { 153 /* Trim busy sections out of found extent */
161 bno = roundup(foundbno, args->alignment); 154 xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len);
162 diff = bno - foundbno; 155
163 len = diff >= foundlen ? 0 : foundlen - diff; 156 if (args->alignment > 1 && len >= args->minlen) {
157 xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
158 xfs_extlen_t diff = aligned_bno - bno;
159
160 *resbno = aligned_bno;
161 *reslen = diff >= len ? 0 : len - diff;
164 } else { 162 } else {
165 bno = foundbno; 163 *resbno = bno;
166 len = foundlen; 164 *reslen = len;
167 } 165 }
168 *resbno = bno;
169 *reslen = len;
170} 166}
171 167
172/* 168/*
@@ -541,16 +537,8 @@ xfs_alloc_ag_vextent(
541 if (error) 537 if (error)
542 return error; 538 return error;
543 539
544 /* 540 ASSERT(!xfs_alloc_busy_search(args->mp, args->agno,
545 * Search the busylist for these blocks and mark the 541 args->agbno, args->len));
546 * transaction as synchronous if blocks are found. This
547 * avoids the need to block due to a synchronous log
548 * force to ensure correct ordering as the synchronous
549 * transaction will guarantee that for us.
550 */
551 if (xfs_alloc_busy_search(args->mp, args->agno,
552 args->agbno, args->len))
553 xfs_trans_set_sync(args->tp);
554 } 542 }
555 543
556 if (!args->isfl) { 544 if (!args->isfl) {
@@ -577,14 +565,14 @@ xfs_alloc_ag_vextent_exact(
577{ 565{
578 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ 566 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
579 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ 567 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
580 xfs_agblock_t end; /* end of allocated extent */
581 int error; 568 int error;
582 xfs_agblock_t fbno; /* start block of found extent */ 569 xfs_agblock_t fbno; /* start block of found extent */
583 xfs_agblock_t fend; /* end block of found extent */
584 xfs_extlen_t flen; /* length of found extent */ 570 xfs_extlen_t flen; /* length of found extent */
571 xfs_agblock_t tbno; /* start block of trimmed extent */
572 xfs_extlen_t tlen; /* length of trimmed extent */
573 xfs_agblock_t tend; /* end block of trimmed extent */
574 xfs_agblock_t end; /* end of allocated extent */
585 int i; /* success/failure of operation */ 575 int i; /* success/failure of operation */
586 xfs_agblock_t maxend; /* end of maximal extent */
587 xfs_agblock_t minend; /* end of minimal extent */
588 xfs_extlen_t rlen; /* length of returned extent */ 576 xfs_extlen_t rlen; /* length of returned extent */
589 577
590 ASSERT(args->alignment == 1); 578 ASSERT(args->alignment == 1);
@@ -614,14 +602,22 @@ xfs_alloc_ag_vextent_exact(
614 goto error0; 602 goto error0;
615 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 603 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
616 ASSERT(fbno <= args->agbno); 604 ASSERT(fbno <= args->agbno);
617 minend = args->agbno + args->minlen;
618 maxend = args->agbno + args->maxlen;
619 fend = fbno + flen;
620 605
621 /* 606 /*
622 * Give up if the freespace isn't long enough for the minimum request. 607 * Check for overlapping busy extents.
623 */ 608 */
624 if (fend < minend) 609 xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen);
610
611 /*
612 * Give up if the start of the extent is busy, or the freespace isn't
613 * long enough for the minimum request.
614 */
615 if (tbno > args->agbno)
616 goto not_found;
617 if (tlen < args->minlen)
618 goto not_found;
619 tend = tbno + tlen;
620 if (tend < args->agbno + args->minlen)
625 goto not_found; 621 goto not_found;
626 622
627 /* 623 /*
@@ -630,14 +626,14 @@ xfs_alloc_ag_vextent_exact(
630 * 626 *
631 * Fix the length according to mod and prod if given. 627 * Fix the length according to mod and prod if given.
632 */ 628 */
633 end = XFS_AGBLOCK_MIN(fend, maxend); 629 end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen);
634 args->len = end - args->agbno; 630 args->len = end - args->agbno;
635 xfs_alloc_fix_len(args); 631 xfs_alloc_fix_len(args);
636 if (!xfs_alloc_fix_minleft(args)) 632 if (!xfs_alloc_fix_minleft(args))
637 goto not_found; 633 goto not_found;
638 634
639 rlen = args->len; 635 rlen = args->len;
640 ASSERT(args->agbno + rlen <= fend); 636 ASSERT(args->agbno + rlen <= tend);
641 end = args->agbno + rlen; 637 end = args->agbno + rlen;
642 638
643 /* 639 /*
@@ -686,11 +682,11 @@ xfs_alloc_find_best_extent(
686 struct xfs_btree_cur **scur, /* searching cursor */ 682 struct xfs_btree_cur **scur, /* searching cursor */
687 xfs_agblock_t gdiff, /* difference for search comparison */ 683 xfs_agblock_t gdiff, /* difference for search comparison */
688 xfs_agblock_t *sbno, /* extent found by search */ 684 xfs_agblock_t *sbno, /* extent found by search */
689 xfs_extlen_t *slen, 685 xfs_extlen_t *slen, /* extent length */
690 xfs_extlen_t *slena, /* aligned length */ 686 xfs_agblock_t *sbnoa, /* aligned extent found by search */
687 xfs_extlen_t *slena, /* aligned extent length */
691 int dir) /* 0 = search right, 1 = search left */ 688 int dir) /* 0 = search right, 1 = search left */
692{ 689{
693 xfs_agblock_t bno;
694 xfs_agblock_t new; 690 xfs_agblock_t new;
695 xfs_agblock_t sdiff; 691 xfs_agblock_t sdiff;
696 int error; 692 int error;
@@ -708,16 +704,16 @@ xfs_alloc_find_best_extent(
708 if (error) 704 if (error)
709 goto error0; 705 goto error0;
710 XFS_WANT_CORRUPTED_GOTO(i == 1, error0); 706 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
711 xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); 707 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena);
712 708
713 /* 709 /*
714 * The good extent is closer than this one. 710 * The good extent is closer than this one.
715 */ 711 */
716 if (!dir) { 712 if (!dir) {
717 if (bno >= args->agbno + gdiff) 713 if (*sbnoa >= args->agbno + gdiff)
718 goto out_use_good; 714 goto out_use_good;
719 } else { 715 } else {
720 if (bno <= args->agbno - gdiff) 716 if (*sbnoa <= args->agbno - gdiff)
721 goto out_use_good; 717 goto out_use_good;
722 } 718 }
723 719
@@ -729,8 +725,8 @@ xfs_alloc_find_best_extent(
729 xfs_alloc_fix_len(args); 725 xfs_alloc_fix_len(args);
730 726
731 sdiff = xfs_alloc_compute_diff(args->agbno, args->len, 727 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
732 args->alignment, *sbno, 728 args->alignment, *sbnoa,
733 *slen, &new); 729 *slena, &new);
734 730
735 /* 731 /*
736 * Choose closer size and invalidate other cursor. 732 * Choose closer size and invalidate other cursor.
@@ -780,7 +776,7 @@ xfs_alloc_ag_vextent_near(
780 xfs_agblock_t gtbnoa; /* aligned ... */ 776 xfs_agblock_t gtbnoa; /* aligned ... */
781 xfs_extlen_t gtdiff; /* difference to right side entry */ 777 xfs_extlen_t gtdiff; /* difference to right side entry */
782 xfs_extlen_t gtlen; /* length of right side entry */ 778 xfs_extlen_t gtlen; /* length of right side entry */
783 xfs_extlen_t gtlena = 0; /* aligned ... */ 779 xfs_extlen_t gtlena; /* aligned ... */
784 xfs_agblock_t gtnew; /* useful start bno of right side */ 780 xfs_agblock_t gtnew; /* useful start bno of right side */
785 int error; /* error code */ 781 int error; /* error code */
786 int i; /* result code, temporary */ 782 int i; /* result code, temporary */
@@ -789,9 +785,10 @@ xfs_alloc_ag_vextent_near(
789 xfs_agblock_t ltbnoa; /* aligned ... */ 785 xfs_agblock_t ltbnoa; /* aligned ... */
790 xfs_extlen_t ltdiff; /* difference to left side entry */ 786 xfs_extlen_t ltdiff; /* difference to left side entry */
791 xfs_extlen_t ltlen; /* length of left side entry */ 787 xfs_extlen_t ltlen; /* length of left side entry */
792 xfs_extlen_t ltlena = 0; /* aligned ... */ 788 xfs_extlen_t ltlena; /* aligned ... */
793 xfs_agblock_t ltnew; /* useful start bno of left side */ 789 xfs_agblock_t ltnew; /* useful start bno of left side */
794 xfs_extlen_t rlen; /* length of returned extent */ 790 xfs_extlen_t rlen; /* length of returned extent */
791 int forced = 0;
795#if defined(DEBUG) && defined(__KERNEL__) 792#if defined(DEBUG) && defined(__KERNEL__)
796 /* 793 /*
797 * Randomly don't execute the first algorithm. 794 * Randomly don't execute the first algorithm.
@@ -800,13 +797,20 @@ xfs_alloc_ag_vextent_near(
800 797
801 dofirst = random32() & 1; 798 dofirst = random32() & 1;
802#endif 799#endif
800
801restart:
802 bno_cur_lt = NULL;
803 bno_cur_gt = NULL;
804 ltlen = 0;
805 gtlena = 0;
806 ltlena = 0;
807
803 /* 808 /*
804 * Get a cursor for the by-size btree. 809 * Get a cursor for the by-size btree.
805 */ 810 */
806 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 811 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
807 args->agno, XFS_BTNUM_CNT); 812 args->agno, XFS_BTNUM_CNT);
808 ltlen = 0; 813
809 bno_cur_lt = bno_cur_gt = NULL;
810 /* 814 /*
811 * See if there are any free extents as big as maxlen. 815 * See if there are any free extents as big as maxlen.
812 */ 816 */
@@ -822,11 +826,13 @@ xfs_alloc_ag_vextent_near(
822 goto error0; 826 goto error0;
823 if (i == 0 || ltlen == 0) { 827 if (i == 0 || ltlen == 0) {
824 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 828 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
829 trace_xfs_alloc_near_noentry(args);
825 return 0; 830 return 0;
826 } 831 }
827 ASSERT(i == 1); 832 ASSERT(i == 1);
828 } 833 }
829 args->wasfromfl = 0; 834 args->wasfromfl = 0;
835
830 /* 836 /*
831 * First algorithm. 837 * First algorithm.
832 * If the requested extent is large wrt the freespaces available 838 * If the requested extent is large wrt the freespaces available
@@ -890,7 +896,7 @@ xfs_alloc_ag_vextent_near(
890 if (args->len < blen) 896 if (args->len < blen)
891 continue; 897 continue;
892 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 898 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
893 args->alignment, ltbno, ltlen, &ltnew); 899 args->alignment, ltbnoa, ltlena, &ltnew);
894 if (ltnew != NULLAGBLOCK && 900 if (ltnew != NULLAGBLOCK &&
895 (args->len > blen || ltdiff < bdiff)) { 901 (args->len > blen || ltdiff < bdiff)) {
896 bdiff = ltdiff; 902 bdiff = ltdiff;
@@ -1042,11 +1048,12 @@ xfs_alloc_ag_vextent_near(
1042 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); 1048 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1043 xfs_alloc_fix_len(args); 1049 xfs_alloc_fix_len(args);
1044 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1050 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1045 args->alignment, ltbno, ltlen, &ltnew); 1051 args->alignment, ltbnoa, ltlena, &ltnew);
1046 1052
1047 error = xfs_alloc_find_best_extent(args, 1053 error = xfs_alloc_find_best_extent(args,
1048 &bno_cur_lt, &bno_cur_gt, 1054 &bno_cur_lt, &bno_cur_gt,
1049 ltdiff, &gtbno, &gtlen, &gtlena, 1055 ltdiff, &gtbno, &gtlen,
1056 &gtbnoa, &gtlena,
1050 0 /* search right */); 1057 0 /* search right */);
1051 } else { 1058 } else {
1052 ASSERT(gtlena >= args->minlen); 1059 ASSERT(gtlena >= args->minlen);
@@ -1057,11 +1064,12 @@ xfs_alloc_ag_vextent_near(
1057 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); 1064 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1058 xfs_alloc_fix_len(args); 1065 xfs_alloc_fix_len(args);
1059 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, 1066 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
1060 args->alignment, gtbno, gtlen, &gtnew); 1067 args->alignment, gtbnoa, gtlena, &gtnew);
1061 1068
1062 error = xfs_alloc_find_best_extent(args, 1069 error = xfs_alloc_find_best_extent(args,
1063 &bno_cur_gt, &bno_cur_lt, 1070 &bno_cur_gt, &bno_cur_lt,
1064 gtdiff, &ltbno, &ltlen, &ltlena, 1071 gtdiff, &ltbno, &ltlen,
1072 &ltbnoa, &ltlena,
1065 1 /* search left */); 1073 1 /* search left */);
1066 } 1074 }
1067 1075
@@ -1073,6 +1081,12 @@ xfs_alloc_ag_vextent_near(
1073 * If we couldn't get anything, give up. 1081 * If we couldn't get anything, give up.
1074 */ 1082 */
1075 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1083 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1084 if (!forced++) {
1085 trace_xfs_alloc_near_busy(args);
1086 xfs_log_force(args->mp, XFS_LOG_SYNC);
1087 goto restart;
1088 }
1089
1076 trace_xfs_alloc_size_neither(args); 1090 trace_xfs_alloc_size_neither(args);
1077 args->agbno = NULLAGBLOCK; 1091 args->agbno = NULLAGBLOCK;
1078 return 0; 1092 return 0;
@@ -1107,12 +1121,13 @@ xfs_alloc_ag_vextent_near(
1107 return 0; 1121 return 0;
1108 } 1122 }
1109 rlen = args->len; 1123 rlen = args->len;
1110 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, 1124 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
1111 ltlen, &ltnew); 1125 ltbnoa, ltlena, &ltnew);
1112 ASSERT(ltnew >= ltbno); 1126 ASSERT(ltnew >= ltbno);
1113 ASSERT(ltnew + rlen <= ltbno + ltlen); 1127 ASSERT(ltnew + rlen <= ltbnoa + ltlena);
1114 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); 1128 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
1115 args->agbno = ltnew; 1129 args->agbno = ltnew;
1130
1116 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, 1131 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1117 ltnew, rlen, XFSA_FIXUP_BNO_OK))) 1132 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1118 goto error0; 1133 goto error0;
@@ -1155,26 +1170,35 @@ xfs_alloc_ag_vextent_size(
1155 int i; /* temp status variable */ 1170 int i; /* temp status variable */
1156 xfs_agblock_t rbno; /* returned block number */ 1171 xfs_agblock_t rbno; /* returned block number */
1157 xfs_extlen_t rlen; /* length of returned extent */ 1172 xfs_extlen_t rlen; /* length of returned extent */
1173 int forced = 0;
1158 1174
1175restart:
1159 /* 1176 /*
1160 * Allocate and initialize a cursor for the by-size btree. 1177 * Allocate and initialize a cursor for the by-size btree.
1161 */ 1178 */
1162 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1179 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1163 args->agno, XFS_BTNUM_CNT); 1180 args->agno, XFS_BTNUM_CNT);
1164 bno_cur = NULL; 1181 bno_cur = NULL;
1182
1165 /* 1183 /*
1166 * Look for an entry >= maxlen+alignment-1 blocks. 1184 * Look for an entry >= maxlen+alignment-1 blocks.
1167 */ 1185 */
1168 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, 1186 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1169 args->maxlen + args->alignment - 1, &i))) 1187 args->maxlen + args->alignment - 1, &i)))
1170 goto error0; 1188 goto error0;
1189
1171 /* 1190 /*
1172 * If none, then pick up the last entry in the tree unless the 1191 * If none or we have busy extents that we cannot allocate from, then
1173 * tree is empty. 1192 * we have to settle for a smaller extent. In the case that there are
1193 * no large extents, this will return the last entry in the tree unless
1194 * the tree is empty. In the case that there are only busy large
1195 * extents, this will return the largest small extent unless there
1196 * are no smaller extents available.
1174 */ 1197 */
1175 if (!i) { 1198 if (!i || forced > 1) {
1176 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, 1199 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1177 &flen, &i))) 1200 &fbno, &flen, &i);
1201 if (error)
1178 goto error0; 1202 goto error0;
1179 if (i == 0 || flen == 0) { 1203 if (i == 0 || flen == 0) {
1180 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1204 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
@@ -1182,22 +1206,56 @@ xfs_alloc_ag_vextent_size(
1182 return 0; 1206 return 0;
1183 } 1207 }
1184 ASSERT(i == 1); 1208 ASSERT(i == 1);
1209 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1210 } else {
1211 /*
1212 * Search for a non-busy extent that is large enough.
1213 * If we are at low space, don't check, or if we fall of
1214 * the end of the btree, turn off the busy check and
1215 * restart.
1216 */
1217 for (;;) {
1218 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1219 if (error)
1220 goto error0;
1221 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1222
1223 xfs_alloc_compute_aligned(args, fbno, flen,
1224 &rbno, &rlen);
1225
1226 if (rlen >= args->maxlen)
1227 break;
1228
1229 error = xfs_btree_increment(cnt_cur, 0, &i);
1230 if (error)
1231 goto error0;
1232 if (i == 0) {
1233 /*
1234 * Our only valid extents must have been busy.
1235 * Make it unbusy by forcing the log out and
1236 * retrying. If we've been here before, forcing
1237 * the log isn't making the extents available,
1238 * which means they have probably been freed in
1239 * this transaction. In that case, we have to
1240 * give up on them and we'll attempt a minlen
1241 * allocation the next time around.
1242 */
1243 xfs_btree_del_cursor(cnt_cur,
1244 XFS_BTREE_NOERROR);
1245 trace_xfs_alloc_size_busy(args);
1246 if (!forced++)
1247 xfs_log_force(args->mp, XFS_LOG_SYNC);
1248 goto restart;
1249 }
1250 }
1185 } 1251 }
1186 /* 1252
1187 * There's a freespace as big as maxlen+alignment-1, get it.
1188 */
1189 else {
1190 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1191 goto error0;
1192 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1193 }
1194 /* 1253 /*
1195 * In the first case above, we got the last entry in the 1254 * In the first case above, we got the last entry in the
1196 * by-size btree. Now we check to see if the space hits maxlen 1255 * by-size btree. Now we check to see if the space hits maxlen
1197 * once aligned; if not, we search left for something better. 1256 * once aligned; if not, we search left for something better.
1198 * This can't happen in the second case above. 1257 * This can't happen in the second case above.
1199 */ 1258 */
1200 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
1201 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1259 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1202 XFS_WANT_CORRUPTED_GOTO(rlen == 0 || 1260 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1203 (rlen <= flen && rbno + rlen <= fbno + flen), error0); 1261 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
@@ -1251,13 +1309,19 @@ xfs_alloc_ag_vextent_size(
1251 * Fix up the length. 1309 * Fix up the length.
1252 */ 1310 */
1253 args->len = rlen; 1311 args->len = rlen;
1254 xfs_alloc_fix_len(args); 1312 if (rlen < args->minlen) {
1255 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { 1313 if (!forced++) {
1256 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1314 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1257 trace_xfs_alloc_size_nominleft(args); 1315 trace_xfs_alloc_size_busy(args);
1258 args->agbno = NULLAGBLOCK; 1316 xfs_log_force(args->mp, XFS_LOG_SYNC);
1259 return 0; 1317 goto restart;
1318 }
1319 goto out_nominleft;
1260 } 1320 }
1321 xfs_alloc_fix_len(args);
1322
1323 if (!xfs_alloc_fix_minleft(args))
1324 goto out_nominleft;
1261 rlen = args->len; 1325 rlen = args->len;
1262 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); 1326 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1263 /* 1327 /*
@@ -1287,6 +1351,12 @@ error0:
1287 if (bno_cur) 1351 if (bno_cur)
1288 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); 1352 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1289 return error; 1353 return error;
1354
1355out_nominleft:
1356 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1357 trace_xfs_alloc_size_nominleft(args);
1358 args->agbno = NULLAGBLOCK;
1359 return 0;
1290} 1360}
1291 1361
1292/* 1362/*
@@ -2650,6 +2720,185 @@ xfs_alloc_busy_search(
2650 return match; 2720 return match;
2651} 2721}
2652 2722
2723/*
2724 * For a given extent [fbno, flen], search the busy extent list to find a
2725 * subset of the extent that is not busy. If *rlen is smaller than
2726 * args->minlen no suitable extent could be found, and the higher level
2727 * code needs to force out the log and retry the allocation.
2728 */
2729STATIC void
2730xfs_alloc_busy_trim(
2731 struct xfs_alloc_arg *args,
2732 xfs_agblock_t bno,
2733 xfs_extlen_t len,
2734 xfs_agblock_t *rbno,
2735 xfs_extlen_t *rlen)
2736{
2737 xfs_agblock_t fbno = bno;
2738 xfs_extlen_t flen = len;
2739 struct rb_node *rbp;
2740
2741 ASSERT(flen > 0);
2742
2743 spin_lock(&args->pag->pagb_lock);
2744 rbp = args->pag->pagb_tree.rb_node;
2745 while (rbp && flen >= args->minlen) {
2746 struct xfs_busy_extent *busyp =
2747 rb_entry(rbp, struct xfs_busy_extent, rb_node);
2748 xfs_agblock_t fend = fbno + flen;
2749 xfs_agblock_t bbno = busyp->bno;
2750 xfs_agblock_t bend = bbno + busyp->length;
2751
2752 if (fend <= bbno) {
2753 rbp = rbp->rb_left;
2754 continue;
2755 } else if (fbno >= bend) {
2756 rbp = rbp->rb_right;
2757 continue;
2758 }
2759
2760 if (bbno <= fbno) {
2761 /* start overlap */
2762
2763 /*
2764 * Case 1:
2765 * bbno bend
2766 * +BBBBBBBBBBBBBBBBB+
2767 * +---------+
2768 * fbno fend
2769 *
2770 * Case 2:
2771 * bbno bend
2772 * +BBBBBBBBBBBBBBBBB+
2773 * +-------------+
2774 * fbno fend
2775 *
2776 * Case 3:
2777 * bbno bend
2778 * +BBBBBBBBBBBBBBBBB+
2779 * +-------------+
2780 * fbno fend
2781 *
2782 * Case 4:
2783 * bbno bend
2784 * +BBBBBBBBBBBBBBBBB+
2785 * +-----------------+
2786 * fbno fend
2787 *
2788 * No unbusy region in extent, return failure.
2789 */
2790 if (fend <= bend)
2791 goto fail;
2792
2793 /*
2794 * Case 5:
2795 * bbno bend
2796 * +BBBBBBBBBBBBBBBBB+
2797 * +----------------------+
2798 * fbno fend
2799 *
2800 * Case 6:
2801 * bbno bend
2802 * +BBBBBBBBBBBBBBBBB+
2803 * +--------------------------+
2804 * fbno fend
2805 *
2806 * Needs to be trimmed to:
2807 * +-------+
2808 * fbno fend
2809 */
2810 fbno = bend;
2811 } else if (bend >= fend) {
2812 /* end overlap */
2813
2814 /*
2815 * Case 7:
2816 * bbno bend
2817 * +BBBBBBBBBBBBBBBBB+
2818 * +------------------+
2819 * fbno fend
2820 *
2821 * Case 8:
2822 * bbno bend
2823 * +BBBBBBBBBBBBBBBBB+
2824 * +--------------------------+
2825 * fbno fend
2826 *
2827 * Needs to be trimmed to:
2828 * +-------+
2829 * fbno fend
2830 */
2831 fend = bbno;
2832 } else {
2833 /* middle overlap */
2834
2835 /*
2836 * Case 9:
2837 * bbno bend
2838 * +BBBBBBBBBBBBBBBBB+
2839 * +-----------------------------------+
2840 * fbno fend
2841 *
2842 * Can be trimmed to:
2843 * +-------+ OR +-------+
2844 * fbno fend fbno fend
2845 *
2846 * Backward allocation leads to significant
2847 * fragmentation of directories, which degrades
2848 * directory performance, therefore we always want to
2849 * choose the option that produces forward allocation
2850 * patterns.
2851 * Preferring the lower bno extent will make the next
2852 * request use "fend" as the start of the next
2853 * allocation; if the segment is no longer busy at
2854 * that point, we'll get a contiguous allocation, but
2855 * even if it is still busy, we will get a forward
2856 * allocation.
2857 * We try to avoid choosing the segment at "bend",
2858 * because that can lead to the next allocation
2859 * taking the segment at "fbno", which would be a
2860 * backward allocation. We only use the segment at
2861 * "fbno" if it is much larger than the current
2862 * requested size, because in that case there's a
2863 * good chance subsequent allocations will be
2864 * contiguous.
2865 */
2866 if (bbno - fbno >= args->maxlen) {
2867 /* left candidate fits perfect */
2868 fend = bbno;
2869 } else if (fend - bend >= args->maxlen * 4) {
2870 /* right candidate has enough free space */
2871 fbno = bend;
2872 } else if (bbno - fbno >= args->minlen) {
2873 /* left candidate fits minimum requirement */
2874 fend = bbno;
2875 } else {
2876 goto fail;
2877 }
2878 }
2879
2880 flen = fend - fbno;
2881 }
2882 spin_unlock(&args->pag->pagb_lock);
2883
2884 if (fbno != bno || flen != len) {
2885 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len,
2886 fbno, flen);
2887 }
2888 *rbno = fbno;
2889 *rlen = flen;
2890 return;
2891fail:
2892 /*
2893 * Return a zero extent length as failure indications. All callers
2894 * re-check if the trimmed extent satisfies the minlen requirement.
2895 */
2896 spin_unlock(&args->pag->pagb_lock);
2897 trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0);
2898 *rbno = fbno;
2899 *rlen = 0;
2900}
2901
2653void 2902void
2654xfs_alloc_busy_clear( 2903xfs_alloc_busy_clear(
2655 struct xfs_mount *mp, 2904 struct xfs_mount *mp,