diff options
author | Christoph Hellwig <hch@lst.de> | 2017-02-07 17:06:57 -0500 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2017-02-09 13:50:25 -0500 |
commit | ebf55872616c7d4754db5a318591a72a8d5e6896 (patch) | |
tree | fd88d14b7765f0834273807a14ee931519029438 | |
parent | 5e30c23d13919a718b22d4921dc5c0accc59da27 (diff) |
xfs: improve handling of busy extents in the low-level allocator
Currently we force the log and simply try again if we hit a busy extent,
but especially with online discard enabled it might take a while after
the log force for the busy extents to disappear, and we might have
already completed our second pass.
So instead we add a new waitqueue and a generation counter to the pag
structure so that we can do wakeups once we've removed busy extents,
and we replace the single retry with an unconditional one - after
all we hold the AGF buffer lock, so no other allocations or frees
can be racing with us in this AG.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/libxfs/xfs_alloc.c | 93 | ||||
-rw-r--r-- | fs/xfs/xfs_extent_busy.c | 125 | ||||
-rw-r--r-- | fs/xfs/xfs_extent_busy.h | 11 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 8 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 2 |
5 files changed, 166 insertions, 73 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 9f06a211e157..fe98fbc4adf1 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c | |||
@@ -221,20 +221,22 @@ xfs_alloc_get_rec( | |||
221 | * Compute aligned version of the found extent. | 221 | * Compute aligned version of the found extent. |
222 | * Takes alignment and min length into account. | 222 | * Takes alignment and min length into account. |
223 | */ | 223 | */ |
224 | STATIC void | 224 | STATIC bool |
225 | xfs_alloc_compute_aligned( | 225 | xfs_alloc_compute_aligned( |
226 | xfs_alloc_arg_t *args, /* allocation argument structure */ | 226 | xfs_alloc_arg_t *args, /* allocation argument structure */ |
227 | xfs_agblock_t foundbno, /* starting block in found extent */ | 227 | xfs_agblock_t foundbno, /* starting block in found extent */ |
228 | xfs_extlen_t foundlen, /* length in found extent */ | 228 | xfs_extlen_t foundlen, /* length in found extent */ |
229 | xfs_agblock_t *resbno, /* result block number */ | 229 | xfs_agblock_t *resbno, /* result block number */ |
230 | xfs_extlen_t *reslen) /* result length */ | 230 | xfs_extlen_t *reslen, /* result length */ |
231 | unsigned *busy_gen) | ||
231 | { | 232 | { |
232 | xfs_agblock_t bno; | 233 | xfs_agblock_t bno = foundbno; |
233 | xfs_extlen_t len; | 234 | xfs_extlen_t len = foundlen; |
234 | xfs_extlen_t diff; | 235 | xfs_extlen_t diff; |
236 | bool busy; | ||
235 | 237 | ||
236 | /* Trim busy sections out of found extent */ | 238 | /* Trim busy sections out of found extent */ |
237 | xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); | 239 | busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen); |
238 | 240 | ||
239 | /* | 241 | /* |
240 | * If we have a largish extent that happens to start before min_agbno, | 242 | * If we have a largish extent that happens to start before min_agbno, |
@@ -259,6 +261,8 @@ xfs_alloc_compute_aligned( | |||
259 | *resbno = bno; | 261 | *resbno = bno; |
260 | *reslen = len; | 262 | *reslen = len; |
261 | } | 263 | } |
264 | |||
265 | return busy; | ||
262 | } | 266 | } |
263 | 267 | ||
264 | /* | 268 | /* |
@@ -737,10 +741,11 @@ xfs_alloc_ag_vextent_exact( | |||
737 | int error; | 741 | int error; |
738 | xfs_agblock_t fbno; /* start block of found extent */ | 742 | xfs_agblock_t fbno; /* start block of found extent */ |
739 | xfs_extlen_t flen; /* length of found extent */ | 743 | xfs_extlen_t flen; /* length of found extent */ |
740 | xfs_agblock_t tbno; /* start block of trimmed extent */ | 744 | xfs_agblock_t tbno; /* start block of busy extent */ |
741 | xfs_extlen_t tlen; /* length of trimmed extent */ | 745 | xfs_extlen_t tlen; /* length of busy extent */ |
742 | xfs_agblock_t tend; /* end block of trimmed extent */ | 746 | xfs_agblock_t tend; /* end block of busy extent */ |
743 | int i; /* success/failure of operation */ | 747 | int i; /* success/failure of operation */ |
748 | unsigned busy_gen; | ||
744 | 749 | ||
745 | ASSERT(args->alignment == 1); | 750 | ASSERT(args->alignment == 1); |
746 | 751 | ||
@@ -773,7 +778,9 @@ xfs_alloc_ag_vextent_exact( | |||
773 | /* | 778 | /* |
774 | * Check for overlapping busy extents. | 779 | * Check for overlapping busy extents. |
775 | */ | 780 | */ |
776 | xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); | 781 | tbno = fbno; |
782 | tlen = flen; | ||
783 | xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen); | ||
777 | 784 | ||
778 | /* | 785 | /* |
779 | * Give up if the start of the extent is busy, or the freespace isn't | 786 | * Give up if the start of the extent is busy, or the freespace isn't |
@@ -853,6 +860,7 @@ xfs_alloc_find_best_extent( | |||
853 | xfs_agblock_t sdiff; | 860 | xfs_agblock_t sdiff; |
854 | int error; | 861 | int error; |
855 | int i; | 862 | int i; |
863 | unsigned busy_gen; | ||
856 | 864 | ||
857 | /* The good extent is perfect, no need to search. */ | 865 | /* The good extent is perfect, no need to search. */ |
858 | if (!gdiff) | 866 | if (!gdiff) |
@@ -866,7 +874,8 @@ xfs_alloc_find_best_extent( | |||
866 | if (error) | 874 | if (error) |
867 | goto error0; | 875 | goto error0; |
868 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 876 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
869 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); | 877 | xfs_alloc_compute_aligned(args, *sbno, *slen, |
878 | sbnoa, slena, &busy_gen); | ||
870 | 879 | ||
871 | /* | 880 | /* |
872 | * The good extent is closer than this one. | 881 | * The good extent is closer than this one. |
@@ -955,7 +964,8 @@ xfs_alloc_ag_vextent_near( | |||
955 | xfs_extlen_t ltlena; /* aligned ... */ | 964 | xfs_extlen_t ltlena; /* aligned ... */ |
956 | xfs_agblock_t ltnew; /* useful start bno of left side */ | 965 | xfs_agblock_t ltnew; /* useful start bno of left side */ |
957 | xfs_extlen_t rlen; /* length of returned extent */ | 966 | xfs_extlen_t rlen; /* length of returned extent */ |
958 | int forced = 0; | 967 | bool busy; |
968 | unsigned busy_gen; | ||
959 | #ifdef DEBUG | 969 | #ifdef DEBUG |
960 | /* | 970 | /* |
961 | * Randomly don't execute the first algorithm. | 971 | * Randomly don't execute the first algorithm. |
@@ -982,6 +992,7 @@ restart: | |||
982 | ltlen = 0; | 992 | ltlen = 0; |
983 | gtlena = 0; | 993 | gtlena = 0; |
984 | ltlena = 0; | 994 | ltlena = 0; |
995 | busy = false; | ||
985 | 996 | ||
986 | /* | 997 | /* |
987 | * Get a cursor for the by-size btree. | 998 | * Get a cursor for the by-size btree. |
@@ -1064,8 +1075,8 @@ restart: | |||
1064 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) | 1075 | if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) |
1065 | goto error0; | 1076 | goto error0; |
1066 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1077 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1067 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | 1078 | busy = xfs_alloc_compute_aligned(args, ltbno, ltlen, |
1068 | <bnoa, <lena); | 1079 | <bnoa, <lena, &busy_gen); |
1069 | if (ltlena < args->minlen) | 1080 | if (ltlena < args->minlen) |
1070 | continue; | 1081 | continue; |
1071 | if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) | 1082 | if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) |
@@ -1183,8 +1194,8 @@ restart: | |||
1183 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) | 1194 | if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) |
1184 | goto error0; | 1195 | goto error0; |
1185 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1196 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1186 | xfs_alloc_compute_aligned(args, ltbno, ltlen, | 1197 | busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen, |
1187 | <bnoa, <lena); | 1198 | <bnoa, <lena, &busy_gen); |
1188 | if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) | 1199 | if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) |
1189 | break; | 1200 | break; |
1190 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) | 1201 | if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) |
@@ -1199,8 +1210,8 @@ restart: | |||
1199 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) | 1210 | if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) |
1200 | goto error0; | 1211 | goto error0; |
1201 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1212 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1202 | xfs_alloc_compute_aligned(args, gtbno, gtlen, | 1213 | busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen, |
1203 | >bnoa, >lena); | 1214 | >bnoa, >lena, &busy_gen); |
1204 | if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) | 1215 | if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) |
1205 | break; | 1216 | break; |
1206 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) | 1217 | if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) |
@@ -1261,9 +1272,9 @@ restart: | |||
1261 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { | 1272 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { |
1262 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1273 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1263 | 1274 | ||
1264 | if (!forced++) { | 1275 | if (busy) { |
1265 | trace_xfs_alloc_near_busy(args); | 1276 | trace_xfs_alloc_near_busy(args); |
1266 | xfs_log_force(args->mp, XFS_LOG_SYNC); | 1277 | xfs_extent_busy_flush(args->mp, args->pag, busy_gen); |
1267 | goto restart; | 1278 | goto restart; |
1268 | } | 1279 | } |
1269 | trace_xfs_alloc_size_neither(args); | 1280 | trace_xfs_alloc_size_neither(args); |
@@ -1344,7 +1355,8 @@ xfs_alloc_ag_vextent_size( | |||
1344 | int i; /* temp status variable */ | 1355 | int i; /* temp status variable */ |
1345 | xfs_agblock_t rbno; /* returned block number */ | 1356 | xfs_agblock_t rbno; /* returned block number */ |
1346 | xfs_extlen_t rlen; /* length of returned extent */ | 1357 | xfs_extlen_t rlen; /* length of returned extent */ |
1347 | int forced = 0; | 1358 | bool busy; |
1359 | unsigned busy_gen; | ||
1348 | 1360 | ||
1349 | restart: | 1361 | restart: |
1350 | /* | 1362 | /* |
@@ -1353,6 +1365,7 @@ restart: | |||
1353 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 1365 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
1354 | args->agno, XFS_BTNUM_CNT); | 1366 | args->agno, XFS_BTNUM_CNT); |
1355 | bno_cur = NULL; | 1367 | bno_cur = NULL; |
1368 | busy = false; | ||
1356 | 1369 | ||
1357 | /* | 1370 | /* |
1358 | * Look for an entry >= maxlen+alignment-1 blocks. | 1371 | * Look for an entry >= maxlen+alignment-1 blocks. |
@@ -1362,14 +1375,13 @@ restart: | |||
1362 | goto error0; | 1375 | goto error0; |
1363 | 1376 | ||
1364 | /* | 1377 | /* |
1365 | * If none or we have busy extents that we cannot allocate from, then | 1378 | * If none then we have to settle for a smaller extent. In the case that |
1366 | * we have to settle for a smaller extent. In the case that there are | 1379 | * there are no large extents, this will return the last entry in the |
1367 | * no large extents, this will return the last entry in the tree unless | 1380 | * tree unless the tree is empty. In the case that there are only busy |
1368 | * the tree is empty. In the case that there are only busy large | 1381 | * large extents, this will return the largest small extent unless there |
1369 | * extents, this will return the largest small extent unless there | ||
1370 | * are no smaller extents available. | 1382 | * are no smaller extents available. |
1371 | */ | 1383 | */ |
1372 | if (!i || forced > 1) { | 1384 | if (!i) { |
1373 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, | 1385 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, |
1374 | &fbno, &flen, &i); | 1386 | &fbno, &flen, &i); |
1375 | if (error) | 1387 | if (error) |
@@ -1380,13 +1392,11 @@ restart: | |||
1380 | return 0; | 1392 | return 0; |
1381 | } | 1393 | } |
1382 | ASSERT(i == 1); | 1394 | ASSERT(i == 1); |
1383 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | 1395 | busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno, |
1396 | &rlen, &busy_gen); | ||
1384 | } else { | 1397 | } else { |
1385 | /* | 1398 | /* |
1386 | * Search for a non-busy extent that is large enough. | 1399 | * Search for a non-busy extent that is large enough. |
1387 | * If we are at low space, don't check, or if we fall of | ||
1388 | * the end of the btree, turn off the busy check and | ||
1389 | * restart. | ||
1390 | */ | 1400 | */ |
1391 | for (;;) { | 1401 | for (;;) { |
1392 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); | 1402 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); |
@@ -1394,8 +1404,8 @@ restart: | |||
1394 | goto error0; | 1404 | goto error0; |
1395 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1405 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1396 | 1406 | ||
1397 | xfs_alloc_compute_aligned(args, fbno, flen, | 1407 | busy = xfs_alloc_compute_aligned(args, fbno, flen, |
1398 | &rbno, &rlen); | 1408 | &rbno, &rlen, &busy_gen); |
1399 | 1409 | ||
1400 | if (rlen >= args->maxlen) | 1410 | if (rlen >= args->maxlen) |
1401 | break; | 1411 | break; |
@@ -1407,18 +1417,13 @@ restart: | |||
1407 | /* | 1417 | /* |
1408 | * Our only valid extents must have been busy. | 1418 | * Our only valid extents must have been busy. |
1409 | * Make it unbusy by forcing the log out and | 1419 | * Make it unbusy by forcing the log out and |
1410 | * retrying. If we've been here before, forcing | 1420 | * retrying. |
1411 | * the log isn't making the extents available, | ||
1412 | * which means they have probably been freed in | ||
1413 | * this transaction. In that case, we have to | ||
1414 | * give up on them and we'll attempt a minlen | ||
1415 | * allocation the next time around. | ||
1416 | */ | 1421 | */ |
1417 | xfs_btree_del_cursor(cnt_cur, | 1422 | xfs_btree_del_cursor(cnt_cur, |
1418 | XFS_BTREE_NOERROR); | 1423 | XFS_BTREE_NOERROR); |
1419 | trace_xfs_alloc_size_busy(args); | 1424 | trace_xfs_alloc_size_busy(args); |
1420 | if (!forced++) | 1425 | xfs_extent_busy_flush(args->mp, |
1421 | xfs_log_force(args->mp, XFS_LOG_SYNC); | 1426 | args->pag, busy_gen); |
1422 | goto restart; | 1427 | goto restart; |
1423 | } | 1428 | } |
1424 | } | 1429 | } |
@@ -1454,8 +1459,8 @@ restart: | |||
1454 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); | 1459 | XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); |
1455 | if (flen < bestrlen) | 1460 | if (flen < bestrlen) |
1456 | break; | 1461 | break; |
1457 | xfs_alloc_compute_aligned(args, fbno, flen, | 1462 | busy = xfs_alloc_compute_aligned(args, fbno, flen, |
1458 | &rbno, &rlen); | 1463 | &rbno, &rlen, &busy_gen); |
1459 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1464 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
1460 | XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || | 1465 | XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || |
1461 | (rlen <= flen && rbno + rlen <= fbno + flen), | 1466 | (rlen <= flen && rbno + rlen <= fbno + flen), |
@@ -1484,10 +1489,10 @@ restart: | |||
1484 | */ | 1489 | */ |
1485 | args->len = rlen; | 1490 | args->len = rlen; |
1486 | if (rlen < args->minlen) { | 1491 | if (rlen < args->minlen) { |
1487 | if (!forced++) { | 1492 | if (busy) { |
1488 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1493 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
1489 | trace_xfs_alloc_size_busy(args); | 1494 | trace_xfs_alloc_size_busy(args); |
1490 | xfs_log_force(args->mp, XFS_LOG_SYNC); | 1495 | xfs_extent_busy_flush(args->mp, args->pag, busy_gen); |
1491 | goto restart; | 1496 | goto restart; |
1492 | } | 1497 | } |
1493 | goto out_nominleft; | 1498 | goto out_nominleft; |
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index 29c2f997aedf..4d850e27095e 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c | |||
@@ -334,25 +334,31 @@ restart: | |||
334 | * subset of the extent that is not busy. If *rlen is smaller than | 334 | * subset of the extent that is not busy. If *rlen is smaller than |
335 | * args->minlen no suitable extent could be found, and the higher level | 335 | * args->minlen no suitable extent could be found, and the higher level |
336 | * code needs to force out the log and retry the allocation. | 336 | * code needs to force out the log and retry the allocation. |
337 | * | ||
338 | * Return the current busy generation for the AG if the extent is busy. This | ||
339 | * value can be used to wait for at least one of the currently busy extents | ||
340 | * to be cleared. Note that the busy list is not guaranteed to be empty after | ||
341 | * the gen is woken. The state of a specific extent must always be confirmed | ||
342 | * with another call to xfs_extent_busy_trim() before it can be used. | ||
337 | */ | 343 | */ |
338 | void | 344 | bool |
339 | xfs_extent_busy_trim( | 345 | xfs_extent_busy_trim( |
340 | struct xfs_alloc_arg *args, | 346 | struct xfs_alloc_arg *args, |
341 | xfs_agblock_t bno, | 347 | xfs_agblock_t *bno, |
342 | xfs_extlen_t len, | 348 | xfs_extlen_t *len, |
343 | xfs_agblock_t *rbno, | 349 | unsigned *busy_gen) |
344 | xfs_extlen_t *rlen) | ||
345 | { | 350 | { |
346 | xfs_agblock_t fbno; | 351 | xfs_agblock_t fbno; |
347 | xfs_extlen_t flen; | 352 | xfs_extlen_t flen; |
348 | struct rb_node *rbp; | 353 | struct rb_node *rbp; |
354 | bool ret = false; | ||
349 | 355 | ||
350 | ASSERT(len > 0); | 356 | ASSERT(len > 0); |
351 | 357 | ||
352 | spin_lock(&args->pag->pagb_lock); | 358 | spin_lock(&args->pag->pagb_lock); |
353 | restart: | 359 | restart: |
354 | fbno = bno; | 360 | fbno = *bno; |
355 | flen = len; | 361 | flen = *len; |
356 | rbp = args->pag->pagb_tree.rb_node; | 362 | rbp = args->pag->pagb_tree.rb_node; |
357 | while (rbp && flen >= args->minlen) { | 363 | while (rbp && flen >= args->minlen) { |
358 | struct xfs_extent_busy *busyp = | 364 | struct xfs_extent_busy *busyp = |
@@ -504,24 +510,25 @@ restart: | |||
504 | 510 | ||
505 | flen = fend - fbno; | 511 | flen = fend - fbno; |
506 | } | 512 | } |
507 | spin_unlock(&args->pag->pagb_lock); | 513 | out: |
508 | 514 | ||
509 | if (fbno != bno || flen != len) { | 515 | if (fbno != *bno || flen != *len) { |
510 | trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, | 516 | trace_xfs_extent_busy_trim(args->mp, args->agno, *bno, *len, |
511 | fbno, flen); | 517 | fbno, flen); |
518 | *bno = fbno; | ||
519 | *len = flen; | ||
520 | *busy_gen = args->pag->pagb_gen; | ||
521 | ret = true; | ||
512 | } | 522 | } |
513 | *rbno = fbno; | 523 | spin_unlock(&args->pag->pagb_lock); |
514 | *rlen = flen; | 524 | return ret; |
515 | return; | ||
516 | fail: | 525 | fail: |
517 | /* | 526 | /* |
518 | * Return a zero extent length as failure indications. All callers | 527 | * Return a zero extent length as failure indications. All callers |
519 | * re-check if the trimmed extent satisfies the minlen requirement. | 528 | * re-check if the trimmed extent satisfies the minlen requirement. |
520 | */ | 529 | */ |
521 | spin_unlock(&args->pag->pagb_lock); | 530 | flen = 0; |
522 | trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | 531 | goto out; |
523 | *rbno = fbno; | ||
524 | *rlen = 0; | ||
525 | } | 532 | } |
526 | 533 | ||
527 | STATIC void | 534 | STATIC void |
@@ -540,6 +547,21 @@ xfs_extent_busy_clear_one( | |||
540 | kmem_free(busyp); | 547 | kmem_free(busyp); |
541 | } | 548 | } |
542 | 549 | ||
550 | static void | ||
551 | xfs_extent_busy_put_pag( | ||
552 | struct xfs_perag *pag, | ||
553 | bool wakeup) | ||
554 | __releases(pag->pagb_lock) | ||
555 | { | ||
556 | if (wakeup) { | ||
557 | pag->pagb_gen++; | ||
558 | wake_up_all(&pag->pagb_wait); | ||
559 | } | ||
560 | |||
561 | spin_unlock(&pag->pagb_lock); | ||
562 | xfs_perag_put(pag); | ||
563 | } | ||
564 | |||
543 | /* | 565 | /* |
544 | * Remove all extents on the passed in list from the busy extents tree. | 566 | * Remove all extents on the passed in list from the busy extents tree. |
545 | * If do_discard is set skip extents that need to be discarded, and mark | 567 | * If do_discard is set skip extents that need to be discarded, and mark |
@@ -554,27 +576,76 @@ xfs_extent_busy_clear( | |||
554 | struct xfs_extent_busy *busyp, *n; | 576 | struct xfs_extent_busy *busyp, *n; |
555 | struct xfs_perag *pag = NULL; | 577 | struct xfs_perag *pag = NULL; |
556 | xfs_agnumber_t agno = NULLAGNUMBER; | 578 | xfs_agnumber_t agno = NULLAGNUMBER; |
579 | bool wakeup = false; | ||
557 | 580 | ||
558 | list_for_each_entry_safe(busyp, n, list, list) { | 581 | list_for_each_entry_safe(busyp, n, list, list) { |
559 | if (busyp->agno != agno) { | 582 | if (busyp->agno != agno) { |
560 | if (pag) { | 583 | if (pag) |
561 | spin_unlock(&pag->pagb_lock); | 584 | xfs_extent_busy_put_pag(pag, wakeup); |
562 | xfs_perag_put(pag); | ||
563 | } | ||
564 | pag = xfs_perag_get(mp, busyp->agno); | ||
565 | spin_lock(&pag->pagb_lock); | ||
566 | agno = busyp->agno; | 585 | agno = busyp->agno; |
586 | pag = xfs_perag_get(mp, agno); | ||
587 | spin_lock(&pag->pagb_lock); | ||
588 | wakeup = false; | ||
567 | } | 589 | } |
568 | 590 | ||
569 | if (do_discard && busyp->length && | 591 | if (do_discard && busyp->length && |
570 | !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) | 592 | !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) { |
571 | busyp->flags = XFS_EXTENT_BUSY_DISCARDED; | 593 | busyp->flags = XFS_EXTENT_BUSY_DISCARDED; |
572 | else | 594 | } else { |
573 | xfs_extent_busy_clear_one(mp, pag, busyp); | 595 | xfs_extent_busy_clear_one(mp, pag, busyp); |
596 | wakeup = true; | ||
597 | } | ||
574 | } | 598 | } |
575 | 599 | ||
576 | if (pag) { | 600 | if (pag) |
577 | spin_unlock(&pag->pagb_lock); | 601 | xfs_extent_busy_put_pag(pag, wakeup); |
602 | } | ||
603 | |||
604 | /* | ||
605 | * Flush out all busy extents for this AG. | ||
606 | */ | ||
607 | void | ||
608 | xfs_extent_busy_flush( | ||
609 | struct xfs_mount *mp, | ||
610 | struct xfs_perag *pag, | ||
611 | unsigned busy_gen) | ||
612 | { | ||
613 | DEFINE_WAIT (wait); | ||
614 | int log_flushed = 0, error; | ||
615 | |||
616 | trace_xfs_log_force(mp, 0, _THIS_IP_); | ||
617 | error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed); | ||
618 | if (error) | ||
619 | return; | ||
620 | |||
621 | do { | ||
622 | prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); | ||
623 | if (busy_gen != READ_ONCE(pag->pagb_gen)) | ||
624 | break; | ||
625 | schedule(); | ||
626 | } while (1); | ||
627 | |||
628 | finish_wait(&pag->pagb_wait, &wait); | ||
629 | } | ||
630 | |||
631 | void | ||
632 | xfs_extent_busy_wait_all( | ||
633 | struct xfs_mount *mp) | ||
634 | { | ||
635 | DEFINE_WAIT (wait); | ||
636 | xfs_agnumber_t agno; | ||
637 | |||
638 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | ||
639 | struct xfs_perag *pag = xfs_perag_get(mp, agno); | ||
640 | |||
641 | do { | ||
642 | prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE); | ||
643 | if (RB_EMPTY_ROOT(&pag->pagb_tree)) | ||
644 | break; | ||
645 | schedule(); | ||
646 | } while (1); | ||
647 | finish_wait(&pag->pagb_wait, &wait); | ||
648 | |||
578 | xfs_perag_put(pag); | 649 | xfs_perag_put(pag); |
579 | } | 650 | } |
580 | } | 651 | } |
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index bfff284d2dcc..60195ea1b84a 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h | |||
@@ -58,9 +58,16 @@ void | |||
58 | xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, | 58 | xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, |
59 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | 59 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); |
60 | 60 | ||
61 | bool | ||
62 | xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno, | ||
63 | xfs_extlen_t *len, unsigned *busy_gen); | ||
64 | |||
65 | void | ||
66 | xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag, | ||
67 | unsigned busy_gen); | ||
68 | |||
61 | void | 69 | void |
62 | xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno, | 70 | xfs_extent_busy_wait_all(struct xfs_mount *mp); |
63 | xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen); | ||
64 | 71 | ||
65 | int | 72 | int |
66 | xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); | 73 | xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 1f1e4ae44150..566eb79666ad 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include "xfs_rmap_btree.h" | 45 | #include "xfs_rmap_btree.h" |
46 | #include "xfs_refcount_btree.h" | 46 | #include "xfs_refcount_btree.h" |
47 | #include "xfs_reflink.h" | 47 | #include "xfs_reflink.h" |
48 | #include "xfs_extent_busy.h" | ||
48 | 49 | ||
49 | 50 | ||
50 | static DEFINE_MUTEX(xfs_uuid_table_mutex); | 51 | static DEFINE_MUTEX(xfs_uuid_table_mutex); |
@@ -213,6 +214,7 @@ xfs_initialize_perag( | |||
213 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | 214 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
214 | if (xfs_buf_hash_init(pag)) | 215 | if (xfs_buf_hash_init(pag)) |
215 | goto out_free_pag; | 216 | goto out_free_pag; |
217 | init_waitqueue_head(&pag->pagb_wait); | ||
216 | 218 | ||
217 | if (radix_tree_preload(GFP_NOFS)) | 219 | if (radix_tree_preload(GFP_NOFS)) |
218 | goto out_hash_destroy; | 220 | goto out_hash_destroy; |
@@ -1079,6 +1081,12 @@ xfs_unmountfs( | |||
1079 | xfs_log_force(mp, XFS_LOG_SYNC); | 1081 | xfs_log_force(mp, XFS_LOG_SYNC); |
1080 | 1082 | ||
1081 | /* | 1083 | /* |
1084 | * Wait for all busy extents to be freed, including completion of | ||
1085 | * any discard operation. | ||
1086 | */ | ||
1087 | xfs_extent_busy_wait_all(mp); | ||
1088 | |||
1089 | /* | ||
1082 | * We now need to tell the world we are unmounting. This will allow | 1090 | * We now need to tell the world we are unmounting. This will allow |
1083 | * us to detect that the filesystem is going away and we should error | 1091 | * us to detect that the filesystem is going away and we should error |
1084 | * out anything that we have been retrying in the background. This will | 1092 | * out anything that we have been retrying in the background. This will |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 7f351f706b7a..20e2981a4aec 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -384,6 +384,8 @@ typedef struct xfs_perag { | |||
384 | xfs_agino_t pagl_rightrec; | 384 | xfs_agino_t pagl_rightrec; |
385 | spinlock_t pagb_lock; /* lock for pagb_tree */ | 385 | spinlock_t pagb_lock; /* lock for pagb_tree */ |
386 | struct rb_root pagb_tree; /* ordered tree of busy extents */ | 386 | struct rb_root pagb_tree; /* ordered tree of busy extents */ |
387 | unsigned int pagb_gen; /* generation count for pagb_tree */ | ||
388 | wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */ | ||
387 | 389 | ||
388 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ | 390 | atomic_t pagf_fstrms; /* # of filestreams active in this AG */ |
389 | 391 | ||