diff options
author | Christoph Hellwig <hch@lst.de> | 2017-02-07 17:06:57 -0500 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2017-02-09 13:50:25 -0500 |
commit | ebf55872616c7d4754db5a318591a72a8d5e6896 (patch) | |
tree | fd88d14b7765f0834273807a14ee931519029438 /fs/xfs/libxfs | |
parent | 5e30c23d13919a718b22d4921dc5c0accc59da27 (diff) |
xfs: improve handling of busy extents in the low-level allocator
Currently we force the log and simply try again if we hit a busy extent,
but especially with online discard enabled it might take a while after
the log force for the busy extents to disappear, and we might have
already completed our second pass.
So instead we add a new waitqueue and a generation counter to the pag
structure so that we can do wakeups once we've removed busy extents,
and we replace the single retry with an unconditional one - after
all we hold the AGF buffer lock, so no other allocations or frees
can be racing with us in this AG.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Diffstat (limited to 'fs/xfs/libxfs')
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 93 |
1 files changed, 49 insertions, 44 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 9f06a211e157..fe98fbc4adf1 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -221,20 +221,22 @@ xfs_alloc_get_rec( | |||
221 | * Compute aligned version of the found extent. | 221 | * Compute aligned version of the found extent. |
222 | * Takes alignment and min length into account. | 222 | * Takes alignment and min length into account. |
223 | */ | 223 | */ |
224 | STATIC void | 224 | STATIC bool |
225 | xfs_alloc_compute_aligned( | 225 | xfs_alloc_compute_aligned( |
226 | xfs_alloc_arg_t *args, /* allocation argument structure */ | 226 | xfs_alloc_arg_t *args, /* allocation argument structure */ |
227 | xfs_agblock_t foundbno, /* starting block in found extent */ | 227 | xfs_agblock_t foundbno, /* starting block in found extent */ |
228 | xfs_extlen_t foundlen, /* length in found extent */ | 228 | xfs_extlen_t foundlen, /* length in found extent */ |
229 | xfs_agblock_t *resbno, /* result block number */ | 229 | xfs_agblock_t *resbno, /* result block number */ |
230 | xfs_extlen_t *reslen) /* result length */ | 230 | xfs_extlen_t *reslen, /* result length */ |
231 | unsigned *busy_gen) | ||
231 | { | 232 | { |
232 | xfs_agblock_t bno; | 233 | xfs_agblock_t bno = foundbno; |
233 | xfs_extlen_t len; | 234 | xfs_extlen_t len = foundlen; |
234 | xfs_extlen_t diff; | 235 | xfs_extlen_t diff; |
236 | bool busy; | ||
235 | 237 | ||
236 | /* Trim busy sections out of found extent */ | 238 | /* Trim busy sections out of found extent */ |
237 | xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); | 239 | busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); |
238 | 240 | ||
239 | /* | 241 | /* |
240 | * If we have a largish extent that happens to start before min_agbno, | 242 | * If we have a largish extent that happens to start before min_agbno, |
@@ -259,6 +261,8 @@ xfs_alloc_compute_aligned( | |||
259 | *resbno = bno; | 261 | *resbno = bno; |
260 | *reslen = len; | 262 | *reslen = len; |
261 | } | 263 | } |
264 | |||
265 | return busy; | ||
262 | } | 266 | } |
263 | 267 | ||
264 | /* | 268 | /* |
@@ -737,10 +741,11 @@ xfs_alloc_ag_vextent_exact( | |||
737 | int error; | 741 | int error; |
738 | xfs_agblock_t fbno; /* start block of found extent */ | 742 | xfs_agblock_t fbno; /* start block of found extent */ |
739 | xfs_extlen_t flen; /* length of found extent */ | 743 | xfs_extlen_t flen; /* length of found extent */ |
740 | xfs_agblock_t tbno; /* start block of trimmed extent */ | 744 | xfs_agblock_t tbno; /* start block of busy extent */ |
741 | xfs_extlen_t tlen; /* length of trimmed extent */ | 745 | xfs_extlen_t tlen; /* length of busy extent */ |
742 | xfs_agblock_t tend; /* end block of trimmed extent */ | 746 | xfs_agblock_t tend; /* end block of busy extent */ |
743 | int i; /* success/failure of operation */ | 747 | int i; /* success/failure of operation */ |
748 | unsigned busy_gen; | ||
744 | 749 | ||
745 | ASSERT(args->alignment == 1); | 750 | ASSERT(args->alignment == 1); |
746 | 751 | ||
@@ -773,7 +778,9 @@ xfs_alloc_ag_vextent_exact( | |||
773 | /* | 778 | /* |
774 | * Check for overlapping busy extents. | 779 | * Check for overlapping busy extents. |
775 | */ | 780 | */ |
776 | xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); | 781 | tbno = fbno; |
782 | tlen = flen; | ||
783 | xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); | ||
777 | 784 | ||
778 | /* | 785 | /* |
779 | * Give up if the start of the extent is busy, or the freespace isn't | 786 | * Give up if the start of the extent is busy, or the freespace isn't |
@@ -853,6 +860,7 @@ xfs_alloc_find_best_extent( | |||
853 | xfs_agblock_t sdiff; | 860 | xfs_agblock_t sdiff; |
854 | int error; | 861 | int error; |
855 | int i; | 862 | int i; |
863 | unsigned busy_gen; | ||
856 | 864 | ||
857 | /* The good extent is perfect, no need to search. */ | 865 | /* The good extent is perfect, no need to search. */ |
858 | if (!gdiff) | 866 | if (!gdiff) |
@@ -866,7 +874,8 @@ xfs_alloc_find_best_extent( | |||
866 | if (error) | 874 | if (error) |
867 | goto error0; | 875 | goto error0; |
868 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 876 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
869 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); | 877 | xfs_alloc_compute_aligned(args, *sbno, *slen, |
878 | sbnoa, slena, &busy_gen); | ||
870 | 879 | ||
871 | /* | 880 | /* |
872 | * The good extent is closer than this one. | 881 | * The good extent is closer than this one. |
@@ -955,7 +964,8 @@ xfs_alloc_ag_vextent_near( | |||
955 | xfs_extlen_t ltlena; /* aligned ... */ | 964 | xfs_extlen_t ltlena; /* aligned ... */ |
956 | xfs_agblock_t ltnew; /* useful start bno of left side */ | 965 | xfs_agblock_t ltnew; /* useful start bno of left side */ |
957 | xfs_extlen_t rlen; /* length of returned extent */ | 966 | xfs_extlen_t rlen; /* length of returned extent */ |
958 | int forced = 0; | 967 | bool busy; |
968 | unsigned busy_gen; | ||
959 | #ifdef DEBUG | 969 | #ifdef DEBUG |
960 | /* | 970 | /* |
961 | * Randomly don't execute the first algorithm. | 971 | * Randomly don't execute the first algorithm. |
@@ -982,6 +992,7 @@ restart: | |||
982 | ltlen = 0; | 992 | ltlen = 0; |
983 | gtlena = 0; | 993 | gtlena = 0; |
984 | ltlena = 0; | 994 | ltlena = 0; |
995 | busy = false; | ||
985 | 996 | ||
986 | /* | 997 | /* |
987 | * Get a cursor for the by-size btree. | 998 | * Get a cursor for the by-size btree. |
@@ -1064,8 +1075,8 @@ restart: | |||
1064 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 1075 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
1065 | goto error0; | 1076 | goto error0; |
1066 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1077 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1067 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | 1078 | busy = xfs_alloc_compute_aligned(args, ltbno, ltlen, |
1068 | <bnoa, <lena); | 1079 | <bnoa, <lena, &busy_gen); |
1069 | if (ltlena < args->minlen) | 1080 | if (ltlena < args->minlen) |
1070 | continue; | 1081 | continue; |
1071 | if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) | 1082 | if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) |
@@ -1183,8 +1194,8 @@ restart: | |||
1183 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | 1194 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) |
1184 | goto error0; | 1195 | goto error0; |
1185 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1196 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1186 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | 1197 | busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen, |
1187 | <bnoa, <lena); | 1198 | <bnoa, <lena, &busy_gen); |
1188 | if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) | 1199 | if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) |
1189 | break; | 1200 | break; |
1190 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) | 1201 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) |
@@ -1199,8 +1210,8 @@ restart: | |||
1199 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | 1210 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) |
1200 | goto error0; | 1211 | goto error0; |
1201 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1212 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1202 | xfs_alloc_compute_aligned(args, gtbno, gtlen, | 1213 | busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen, |
1203 | >bnoa, >lena); | 1214 | >bnoa, >lena, &busy_gen); |
1204 | if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) | 1215 | if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) |
1205 | break; | 1216 | break; |
1206 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) | 1217 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) |
@@ -1261,9 +1272,9 @@ restart: | |||
1261 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { | 1272 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { |
1262 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1273 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1263 | 1274 | ||
1264 | if (!forced++) { | 1275 | if (busy) { |
1265 | trace_xfs_alloc_near_busy(args); | 1276 | trace_xfs_alloc_near_busy(args); |
1266 | xfs_log_force(args->mp, XFS_LOG_SYNC); | 1277 | xfs_extent_busy_flush(args->mp, args->pag, busy_gen); |
1267 | goto restart; | 1278 | goto restart; |
1268 | } | 1279 | } |
1269 | trace_xfs_alloc_size_neither(args); | 1280 | trace_xfs_alloc_size_neither(args); |
@@ -1344,7 +1355,8 @@ xfs_alloc_ag_vextent_size( | |||
1344 | int i; /* temp status variable */ | 1355 | int i; /* temp status variable */ |
1345 | xfs_agblock_t rbno; /* returned block number */ | 1356 | xfs_agblock_t rbno; /* returned block number */ |
1346 | xfs_extlen_t rlen; /* length of returned extent */ | 1357 | xfs_extlen_t rlen; /* length of returned extent */ |
1347 | int forced = 0; | 1358 | bool busy; |
1359 | unsigned busy_gen; | ||
1348 | 1360 | ||
1349 | restart: | 1361 | restart: |
1350 | /* | 1362 | /* |
@@ -1353,6 +1365,7 @@ restart: | |||
1353 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 1365 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
1354 | args->agno, XFS_BTNUM_CNT); | 1366 | args->agno, XFS_BTNUM_CNT); |
1355 | bno_cur = NULL; | 1367 | bno_cur = NULL; |
1368 | busy = false; | ||
1356 | 1369 | ||
1357 | /* | 1370 | /* |
1358 | * Look for an entry >= maxlen+alignment-1 blocks. | 1371 | * Look for an entry >= maxlen+alignment-1 blocks. |
@@ -1362,14 +1375,13 @@ restart: | |||
1362 | goto error0; | 1375 | goto error0; |
1363 | 1376 | ||
1364 | /* | 1377 | /* |
1365 | * If none or we have busy extents that we cannot allocate from, then | 1378 | * If none then we have to settle for a smaller extent. In the case that |
1366 | * we have to settle for a smaller extent. In the case that there are | 1379 | * there are no large extents, this will return the last entry in the |
1367 | * no large extents, this will return the last entry in the tree unless | 1380 | * tree unless the tree is empty. In the case that there are only busy |
1368 | * the tree is empty. In the case that there are only busy large | 1381 | * large extents, this will return the largest small extent unless there |
1369 | * extents, this will return the largest small extent unless there | ||
1370 | * are no smaller extents available. | 1382 | * are no smaller extents available. |
1371 | */ | 1383 | */ |
1372 | if (!i || forced > 1) { | 1384 | if (!i) { |
1373 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, | 1385 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, |
1374 | &fbno, &flen, &i); | 1386 | &fbno, &flen, &i); |
1375 | if (error) | 1387 | if (error) |
@@ -1380,13 +1392,11 @@ restart: | |||
1380 | return 0; | 1392 | return 0; |
1381 | } | 1393 | } |
1382 | ASSERT(i == 1); | 1394 | ASSERT(i == 1); |
1383 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | 1395 | busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, |
1396 | &rlen, &busy_gen); | ||
1384 | } else { | 1397 | } else { |
1385 | /* | 1398 | /* |
1386 | * Search for a non-busy extent that is large enough. | 1399 | * Search for a non-busy extent that is large enough. |
1387 | * If we are at low space, don't check, or if we fall of | ||
1388 | * the end of the btree, turn off the busy check and | ||
1389 | * restart. | ||
1390 | */ | 1400 | */ |
1391 | for (;;) { | 1401 | for (;;) { |
1392 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); | 1402 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); |
@@ -1394,8 +1404,8 @@ restart: | |||
1394 | goto error0; | 1404 | goto error0; |
1395 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1405 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1396 | 1406 | ||
1397 | xfs_alloc_compute_aligned(args, fbno, flen, | 1407 | busy = xfs_alloc_compute_aligned(args, fbno, flen, |
1398 | &rbno, &rlen); | 1408 | &rbno, &rlen, &busy_gen); |
1399 | 1409 | ||
1400 | if (rlen >= args->maxlen) | 1410 | if (rlen >= args->maxlen) |
1401 | break; | 1411 | break; |
@@ -1407,18 +1417,13 @@ restart: | |||
1407 | /* | 1417 | /* |
1408 | * Our only valid extents must have been busy. | 1418 | * Our only valid extents must have been busy. |
1409 | * Make it unbusy by forcing the log out and | 1419 | * Make it unbusy by forcing the log out and |
1410 | * retrying. If we've been here before, forcing | 1420 | * retrying. |
1411 | * the log isn't making the extents available, | ||
1412 | * which means they have probably been freed in | ||
1413 | * this transaction. In that case, we have to | ||
1414 | * give up on them and we'll attempt a minlen | ||
1415 | * allocation the next time around. | ||
1416 | */ | 1421 | */ |
1417 | xfs_btree_del_cursor(cnt_cur, | 1422 | xfs_btree_del_cursor(cnt_cur, |
1418 | XFS_BTREE_NOERROR); | 1423 | XFS_BTREE_NOERROR); |
1419 | trace_xfs_alloc_size_busy(args); | 1424 | trace_xfs_alloc_size_busy(args); |
1420 | if (!forced++) | 1425 | xfs_extent_busy_flush(args->mp, |
1421 | xfs_log_force(args->mp, XFS_LOG_SYNC); | 1426 | args->pag, busy_gen); |
1422 | goto restart; | 1427 | goto restart; |
1423 | } | 1428 | } |
1424 | } | 1429 | } |
@@ -1454,8 +1459,8 @@ restart: | |||
1454 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1459 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1455 | if (flen < bestrlen) | 1460 | if (flen < bestrlen) |
1456 | break; | 1461 | break; |
1457 | xfs_alloc_compute_aligned(args, fbno, flen, | 1462 | busy = xfs_alloc_compute_aligned(args, fbno, flen, |
1458 | &rbno, &rlen); | 1463 | &rbno, &rlen, &busy_gen); |
1459 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1464 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1460 | XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || | 1465 | XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || |
1461 | (rlen <= flen && rbno + rlen <= fbno + flen), | 1466 | (rlen <= flen && rbno + rlen <= fbno + flen), |
@@ -1484,10 +1489,10 @@ restart: | |||
1484 | */ | 1489 | */ |
1485 | args->len = rlen; | 1490 | args->len = rlen; |
1486 | if (rlen < args->minlen) { | 1491 | if (rlen < args->minlen) { |
1487 | if (!forced++) { | 1492 | if (busy) { |
1488 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1493 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1489 | trace_xfs_alloc_size_busy(args); | 1494 | trace_xfs_alloc_size_busy(args); |
1490 | xfs_log_force(args->mp, XFS_LOG_SYNC); | 1495 | xfs_extent_busy_flush(args->mp, args->pag, busy_gen); |
1491 | goto restart; | 1496 | goto restart; |
1492 | } | 1497 | } |
1493 | goto out_nominleft; | 1498 | goto out_nominleft; |