aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2017-02-07 17:06:57 -0500
committerDarrick J. Wong <darrick.wong@oracle.com>2017-02-09 13:50:25 -0500
commitebf55872616c7d4754db5a318591a72a8d5e6896 (patch)
treefd88d14b7765f0834273807a14ee931519029438
parent5e30c23d13919a718b22d4921dc5c0accc59da27 (diff)
xfs: improve handling of busy extents in the low-level allocator
Currently we force the log and simply try again if we hit a busy extent, but especially with online discard enabled it might take a while after the log force for the busy extents to disappear, and we might have already completed our second pass. So instead we add a new waitqueue and a generation counter to the pag structure so that we can do wakeups once we've removed busy extents, and we replace the single retry with an unconditional one - after all we hold the AGF buffer lock, so no other allocations or frees can be racing with us in this AG. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com> Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c93
-rw-r--r--fs/xfs/xfs_extent_busy.c125
-rw-r--r--fs/xfs/xfs_extent_busy.h11
-rw-r--r--fs/xfs/xfs_mount.c8
-rw-r--r--fs/xfs/xfs_mount.h2
5 files changed, 166 insertions, 73 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 9f06a211e157..fe98fbc4adf1 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -221,20 +221,22 @@ xfs_alloc_get_rec(
221 * Compute aligned version of the found extent. 221 * Compute aligned version of the found extent.
222 * Takes alignment and min length into account. 222 * Takes alignment and min length into account.
223 */ 223 */
224STATIC void 224STATIC bool
225xfs_alloc_compute_aligned( 225xfs_alloc_compute_aligned(
226 xfs_alloc_arg_t *args, /* allocation argument structure */ 226 xfs_alloc_arg_t *args, /* allocation argument structure */
227 xfs_agblock_t foundbno, /* starting block in found extent */ 227 xfs_agblock_t foundbno, /* starting block in found extent */
228 xfs_extlen_t foundlen, /* length in found extent */ 228 xfs_extlen_t foundlen, /* length in found extent */
229 xfs_agblock_t *resbno, /* result block number */ 229 xfs_agblock_t *resbno, /* result block number */
230 xfs_extlen_t *reslen) /* result length */ 230 xfs_extlen_t *reslen, /* result length */
231 unsigned *busy_gen)
231{ 232{
232 xfs_agblock_t bno; 233 xfs_agblock_t bno = foundbno;
233 xfs_extlen_t len; 234 xfs_extlen_t len = foundlen;
234 xfs_extlen_t diff; 235 xfs_extlen_t diff;
236 bool busy;
235 237
236 /* Trim busy sections out of found extent */ 238 /* Trim busy sections out of found extent */
237 xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); 239 busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
238 240
239 /* 241 /*
240 * If we have a largish extent that happens to start before min_agbno, 242 * If we have a largish extent that happens to start before min_agbno,
@@ -259,6 +261,8 @@ xfs_alloc_compute_aligned(
259 *resbno = bno; 261 *resbno = bno;
260 *reslen = len; 262 *reslen = len;
261 } 263 }
264
265 return busy;
262} 266}
263 267
264/* 268/*
@@ -737,10 +741,11 @@ xfs_alloc_ag_vextent_exact(
737 int error; 741 int error;
738 xfs_agblock_t fbno; /* start block of found extent */ 742 xfs_agblock_t fbno; /* start block of found extent */
739 xfs_extlen_t flen; /* length of found extent */ 743 xfs_extlen_t flen; /* length of found extent */
740 xfs_agblock_t tbno; /* start block of trimmed extent */ 744 xfs_agblock_t tbno; /* start block of busy extent */
741 xfs_extlen_t tlen; /* length of trimmed extent */ 745 xfs_extlen_t tlen; /* length of busy extent */
742 xfs_agblock_t tend; /* end block of trimmed extent */ 746 xfs_agblock_t tend; /* end block of busy extent */
743 int i; /* success/failure of operation */ 747 int i; /* success/failure of operation */
748 unsigned busy_gen;
744 749
745 ASSERT(args->alignment == 1); 750 ASSERT(args->alignment == 1);
746 751
@@ -773,7 +778,9 @@ xfs_alloc_ag_vextent_exact(
773 /* 778 /*
774 * Check for overlapping busy extents. 779 * Check for overlapping busy extents.
775 */ 780 */
776 xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); 781 tbno = fbno;
782 tlen = flen;
783 xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
777 784
778 /* 785 /*
779 * Give up if the start of the extent is busy, or the freespace isn't 786 * Give up if the start of the extent is busy, or the freespace isn't
@@ -853,6 +860,7 @@ xfs_alloc_find_best_extent(
853 xfs_agblock_t sdiff; 860 xfs_agblock_t sdiff;
854 int error; 861 int error;
855 int i; 862 int i;
863 unsigned busy_gen;
856 864
857 /* The good extent is perfect, no need to search. */ 865 /* The good extent is perfect, no need to search. */
858 if (!gdiff) 866 if (!gdiff)
@@ -866,7 +874,8 @@ xfs_alloc_find_best_extent(
866 if (error) 874 if (error)
867 goto error0; 875 goto error0;
868 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 876 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
869 xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); 877 xfs_alloc_compute_aligned(args, *sbno, *slen,
878 sbnoa, slena, &busy_gen);
870 879
871 /* 880 /*
872 * The good extent is closer than this one. 881 * The good extent is closer than this one.
@@ -955,7 +964,8 @@ xfs_alloc_ag_vextent_near(
955 xfs_extlen_t ltlena; /* aligned ... */ 964 xfs_extlen_t ltlena; /* aligned ... */
956 xfs_agblock_t ltnew; /* useful start bno of left side */ 965 xfs_agblock_t ltnew; /* useful start bno of left side */
957 xfs_extlen_t rlen; /* length of returned extent */ 966 xfs_extlen_t rlen; /* length of returned extent */
958 int forced = 0; 967 bool busy;
968 unsigned busy_gen;
959#ifdef DEBUG 969#ifdef DEBUG
960 /* 970 /*
961 * Randomly don't execute the first algorithm. 971 * Randomly don't execute the first algorithm.
@@ -982,6 +992,7 @@ restart:
982 ltlen = 0; 992 ltlen = 0;
983 gtlena = 0; 993 gtlena = 0;
984 ltlena = 0; 994 ltlena = 0;
995 busy = false;
985 996
986 /* 997 /*
987 * Get a cursor for the by-size btree. 998 * Get a cursor for the by-size btree.
@@ -1064,8 +1075,8 @@ restart:
1064 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i))) 1075 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
1065 goto error0; 1076 goto error0;
1066 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1077 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1067 xfs_alloc_compute_aligned(args, ltbno, ltlen, 1078 busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
1068 &ltbnoa, &ltlena); 1079 &ltbnoa, &ltlena, &busy_gen);
1069 if (ltlena < args->minlen) 1080 if (ltlena < args->minlen)
1070 continue; 1081 continue;
1071 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno) 1082 if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
@@ -1183,8 +1194,8 @@ restart:
1183 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i))) 1194 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
1184 goto error0; 1195 goto error0;
1185 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1196 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1186 xfs_alloc_compute_aligned(args, ltbno, ltlen, 1197 busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
1187 &ltbnoa, &ltlena); 1198 &ltbnoa, &ltlena, &busy_gen);
1188 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno) 1199 if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
1189 break; 1200 break;
1190 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) 1201 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
@@ -1199,8 +1210,8 @@ restart:
1199 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i))) 1210 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1200 goto error0; 1211 goto error0;
1201 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1212 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1202 xfs_alloc_compute_aligned(args, gtbno, gtlen, 1213 busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
1203 &gtbnoa, &gtlena); 1214 &gtbnoa, &gtlena, &busy_gen);
1204 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno) 1215 if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
1205 break; 1216 break;
1206 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) 1217 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
@@ -1261,9 +1272,9 @@ restart:
1261 if (bno_cur_lt == NULL && bno_cur_gt == NULL) { 1272 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
1262 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1273 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1263 1274
1264 if (!forced++) { 1275 if (busy) {
1265 trace_xfs_alloc_near_busy(args); 1276 trace_xfs_alloc_near_busy(args);
1266 xfs_log_force(args->mp, XFS_LOG_SYNC); 1277 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1267 goto restart; 1278 goto restart;
1268 } 1279 }
1269 trace_xfs_alloc_size_neither(args); 1280 trace_xfs_alloc_size_neither(args);
@@ -1344,7 +1355,8 @@ xfs_alloc_ag_vextent_size(
1344 int i; /* temp status variable */ 1355 int i; /* temp status variable */
1345 xfs_agblock_t rbno; /* returned block number */ 1356 xfs_agblock_t rbno; /* returned block number */
1346 xfs_extlen_t rlen; /* length of returned extent */ 1357 xfs_extlen_t rlen; /* length of returned extent */
1347 int forced = 0; 1358 bool busy;
1359 unsigned busy_gen;
1348 1360
1349restart: 1361restart:
1350 /* 1362 /*
@@ -1353,6 +1365,7 @@ restart:
1353 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, 1365 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1354 args->agno, XFS_BTNUM_CNT); 1366 args->agno, XFS_BTNUM_CNT);
1355 bno_cur = NULL; 1367 bno_cur = NULL;
1368 busy = false;
1356 1369
1357 /* 1370 /*
1358 * Look for an entry >= maxlen+alignment-1 blocks. 1371 * Look for an entry >= maxlen+alignment-1 blocks.
@@ -1362,14 +1375,13 @@ restart:
1362 goto error0; 1375 goto error0;
1363 1376
1364 /* 1377 /*
1365 * If none or we have busy extents that we cannot allocate from, then 1378 * If none then we have to settle for a smaller extent. In the case that
1366 * we have to settle for a smaller extent. In the case that there are 1379 * there are no large extents, this will return the last entry in the
1367 * no large extents, this will return the last entry in the tree unless 1380 * tree unless the tree is empty. In the case that there are only busy
1368 * the tree is empty. In the case that there are only busy large 1381 * large extents, this will return the largest small extent unless there
1369 * extents, this will return the largest small extent unless there
1370 * are no smaller extents available. 1382 * are no smaller extents available.
1371 */ 1383 */
1372 if (!i || forced > 1) { 1384 if (!i) {
1373 error = xfs_alloc_ag_vextent_small(args, cnt_cur, 1385 error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1374 &fbno, &flen, &i); 1386 &fbno, &flen, &i);
1375 if (error) 1387 if (error)
@@ -1380,13 +1392,11 @@ restart:
1380 return 0; 1392 return 0;
1381 } 1393 }
1382 ASSERT(i == 1); 1394 ASSERT(i == 1);
1383 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); 1395 busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1396 &rlen, &busy_gen);
1384 } else { 1397 } else {
1385 /* 1398 /*
1386 * Search for a non-busy extent that is large enough. 1399 * Search for a non-busy extent that is large enough.
1387 * If we are at low space, don't check, or if we fall of
1388 * the end of the btree, turn off the busy check and
1389 * restart.
1390 */ 1400 */
1391 for (;;) { 1401 for (;;) {
1392 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); 1402 error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
@@ -1394,8 +1404,8 @@ restart:
1394 goto error0; 1404 goto error0;
1395 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1405 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1396 1406
1397 xfs_alloc_compute_aligned(args, fbno, flen, 1407 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1398 &rbno, &rlen); 1408 &rbno, &rlen, &busy_gen);
1399 1409
1400 if (rlen >= args->maxlen) 1410 if (rlen >= args->maxlen)
1401 break; 1411 break;
@@ -1407,18 +1417,13 @@ restart:
1407 /* 1417 /*
1408 * Our only valid extents must have been busy. 1418 * Our only valid extents must have been busy.
1409 * Make it unbusy by forcing the log out and 1419 * Make it unbusy by forcing the log out and
1410 * retrying. If we've been here before, forcing 1420 * retrying.
1411 * the log isn't making the extents available,
1412 * which means they have probably been freed in
1413 * this transaction. In that case, we have to
1414 * give up on them and we'll attempt a minlen
1415 * allocation the next time around.
1416 */ 1421 */
1417 xfs_btree_del_cursor(cnt_cur, 1422 xfs_btree_del_cursor(cnt_cur,
1418 XFS_BTREE_NOERROR); 1423 XFS_BTREE_NOERROR);
1419 trace_xfs_alloc_size_busy(args); 1424 trace_xfs_alloc_size_busy(args);
1420 if (!forced++) 1425 xfs_extent_busy_flush(args->mp,
1421 xfs_log_force(args->mp, XFS_LOG_SYNC); 1426 args->pag, busy_gen);
1422 goto restart; 1427 goto restart;
1423 } 1428 }
1424 } 1429 }
@@ -1454,8 +1459,8 @@ restart:
1454 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0); 1459 XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
1455 if (flen < bestrlen) 1460 if (flen < bestrlen)
1456 break; 1461 break;
1457 xfs_alloc_compute_aligned(args, fbno, flen, 1462 busy = xfs_alloc_compute_aligned(args, fbno, flen,
1458 &rbno, &rlen); 1463 &rbno, &rlen, &busy_gen);
1459 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); 1464 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1460 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 || 1465 XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
1461 (rlen <= flen && rbno + rlen <= fbno + flen), 1466 (rlen <= flen && rbno + rlen <= fbno + flen),
@@ -1484,10 +1489,10 @@ restart:
1484 */ 1489 */
1485 args->len = rlen; 1490 args->len = rlen;
1486 if (rlen < args->minlen) { 1491 if (rlen < args->minlen) {
1487 if (!forced++) { 1492 if (busy) {
1488 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); 1493 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1489 trace_xfs_alloc_size_busy(args); 1494 trace_xfs_alloc_size_busy(args);
1490 xfs_log_force(args->mp, XFS_LOG_SYNC); 1495 xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
1491 goto restart; 1496 goto restart;
1492 } 1497 }
1493 goto out_nominleft; 1498 goto out_nominleft;
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 29c2f997aedf..4d850e27095e 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -334,25 +334,31 @@ restart:
334 * subset of the extent that is not busy. If *rlen is smaller than 334 * subset of the extent that is not busy. If *rlen is smaller than
335 * args->minlen no suitable extent could be found, and the higher level 335 * args->minlen no suitable extent could be found, and the higher level
336 * code needs to force out the log and retry the allocation. 336 * code needs to force out the log and retry the allocation.
337 *
338 * Return the current busy generation for the AG if the extent is busy. This
339 * value can be used to wait for at least one of the currently busy extents
340 * to be cleared. Note that the busy list is not guaranteed to be empty after
341 * the gen is woken. The state of a specific extent must always be confirmed
342 * with another call to xfs_extent_busy_trim() before it can be used.
337 */ 343 */
338void 344bool
339xfs_extent_busy_trim( 345xfs_extent_busy_trim(
340 struct xfs_alloc_arg *args, 346 struct xfs_alloc_arg *args,
341 xfs_agblock_t bno, 347 xfs_agblock_t *bno,
342 xfs_extlen_t len, 348 xfs_extlen_t *len,
343 xfs_agblock_t *rbno, 349 unsigned *busy_gen)
344 xfs_extlen_t *rlen)
345{ 350{
346 xfs_agblock_t fbno; 351 xfs_agblock_t fbno;
347 xfs_extlen_t flen; 352 xfs_extlen_t flen;
348 struct rb_node *rbp; 353 struct rb_node *rbp;
354 bool ret = false;
349 355
350 ASSERT(len > 0); 356 ASSERT(len > 0);
351 357
352 spin_lock(&args->pag->pagb_lock); 358 spin_lock(&args->pag->pagb_lock);
353restart: 359restart:
354 fbno = bno; 360 fbno = *bno;
355 flen = len; 361 flen = *len;
356 rbp = args->pag->pagb_tree.rb_node; 362 rbp = args->pag->pagb_tree.rb_node;
357 while (rbp && flen >= args->minlen) { 363 while (rbp && flen >= args->minlen) {
358 struct xfs_extent_busy *busyp = 364 struct xfs_extent_busy *busyp =
@@ -504,24 +510,25 @@ restart:
504 510
505 flen = fend - fbno; 511 flen = fend - fbno;
506 } 512 }
507 spin_unlock(&args->pag->pagb_lock); 513out:
508 514
509 if (fbno != bno || flen != len) { 515 if (fbno != *bno || flen != *len) {
510 trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, 516 trace_xfs_extent_busy_trim(args->mp, args->agno, *bno, *len,
511 fbno, flen); 517 fbno, flen);
518 *bno = fbno;
519 *len = flen;
520 *busy_gen = args->pag->pagb_gen;
521 ret = true;
512 } 522 }
513 *rbno = fbno; 523 spin_unlock(&args->pag->pagb_lock);
514 *rlen = flen; 524 return ret;
515 return;
516fail: 525fail:
517 /* 526 /*
518 * Return a zero extent length as failure indications. All callers 527 * Return a zero extent length as failure indications. All callers
519 * re-check if the trimmed extent satisfies the minlen requirement. 528 * re-check if the trimmed extent satisfies the minlen requirement.
520 */ 529 */
521 spin_unlock(&args->pag->pagb_lock); 530 flen = 0;
522 trace_xfs_extent_busy_trim(args->mp, args->agno, bno, len, fbno, 0); 531 goto out;
523 *rbno = fbno;
524 *rlen = 0;
525} 532}
526 533
527STATIC void 534STATIC void
@@ -540,6 +547,21 @@ xfs_extent_busy_clear_one(
540 kmem_free(busyp); 547 kmem_free(busyp);
541} 548}
542 549
550static void
551xfs_extent_busy_put_pag(
552 struct xfs_perag *pag,
553 bool wakeup)
554 __releases(pag->pagb_lock)
555{
556 if (wakeup) {
557 pag->pagb_gen++;
558 wake_up_all(&pag->pagb_wait);
559 }
560
561 spin_unlock(&pag->pagb_lock);
562 xfs_perag_put(pag);
563}
564
543/* 565/*
544 * Remove all extents on the passed in list from the busy extents tree. 566 * Remove all extents on the passed in list from the busy extents tree.
545 * If do_discard is set skip extents that need to be discarded, and mark 567 * If do_discard is set skip extents that need to be discarded, and mark
@@ -554,27 +576,76 @@ xfs_extent_busy_clear(
554 struct xfs_extent_busy *busyp, *n; 576 struct xfs_extent_busy *busyp, *n;
555 struct xfs_perag *pag = NULL; 577 struct xfs_perag *pag = NULL;
556 xfs_agnumber_t agno = NULLAGNUMBER; 578 xfs_agnumber_t agno = NULLAGNUMBER;
579 bool wakeup = false;
557 580
558 list_for_each_entry_safe(busyp, n, list, list) { 581 list_for_each_entry_safe(busyp, n, list, list) {
559 if (busyp->agno != agno) { 582 if (busyp->agno != agno) {
560 if (pag) { 583 if (pag)
561 spin_unlock(&pag->pagb_lock); 584 xfs_extent_busy_put_pag(pag, wakeup);
562 xfs_perag_put(pag);
563 }
564 pag = xfs_perag_get(mp, busyp->agno);
565 spin_lock(&pag->pagb_lock);
566 agno = busyp->agno; 585 agno = busyp->agno;
586 pag = xfs_perag_get(mp, agno);
587 spin_lock(&pag->pagb_lock);
588 wakeup = false;
567 } 589 }
568 590
569 if (do_discard && busyp->length && 591 if (do_discard && busyp->length &&
570 !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) 592 !(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) {
571 busyp->flags = XFS_EXTENT_BUSY_DISCARDED; 593 busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
572 else 594 } else {
573 xfs_extent_busy_clear_one(mp, pag, busyp); 595 xfs_extent_busy_clear_one(mp, pag, busyp);
596 wakeup = true;
597 }
574 } 598 }
575 599
576 if (pag) { 600 if (pag)
577 spin_unlock(&pag->pagb_lock); 601 xfs_extent_busy_put_pag(pag, wakeup);
602}
603
604/*
605 * Flush out all busy extents for this AG.
606 */
607void
608xfs_extent_busy_flush(
609 struct xfs_mount *mp,
610 struct xfs_perag *pag,
611 unsigned busy_gen)
612{
613 DEFINE_WAIT (wait);
614 int log_flushed = 0, error;
615
616 trace_xfs_log_force(mp, 0, _THIS_IP_);
617 error = _xfs_log_force(mp, XFS_LOG_SYNC, &log_flushed);
618 if (error)
619 return;
620
621 do {
622 prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
623 if (busy_gen != READ_ONCE(pag->pagb_gen))
624 break;
625 schedule();
626 } while (1);
627
628 finish_wait(&pag->pagb_wait, &wait);
629}
630
631void
632xfs_extent_busy_wait_all(
633 struct xfs_mount *mp)
634{
635 DEFINE_WAIT (wait);
636 xfs_agnumber_t agno;
637
638 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
639 struct xfs_perag *pag = xfs_perag_get(mp, agno);
640
641 do {
642 prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
643 if (RB_EMPTY_ROOT(&pag->pagb_tree))
644 break;
645 schedule();
646 } while (1);
647 finish_wait(&pag->pagb_wait, &wait);
648
578 xfs_perag_put(pag); 649 xfs_perag_put(pag);
579 } 650 }
580} 651}
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index bfff284d2dcc..60195ea1b84a 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -58,9 +58,16 @@ void
58xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, 58xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
59 xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); 59 xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
60 60
61bool
62xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
63 xfs_extlen_t *len, unsigned *busy_gen);
64
65void
66xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag,
67 unsigned busy_gen);
68
61void 69void
62xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t bno, 70xfs_extent_busy_wait_all(struct xfs_mount *mp);
63 xfs_extlen_t len, xfs_agblock_t *rbno, xfs_extlen_t *rlen);
64 71
65int 72int
66xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b); 73xfs_extent_busy_ag_cmp(void *priv, struct list_head *a, struct list_head *b);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 1f1e4ae44150..566eb79666ad 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -45,6 +45,7 @@
45#include "xfs_rmap_btree.h" 45#include "xfs_rmap_btree.h"
46#include "xfs_refcount_btree.h" 46#include "xfs_refcount_btree.h"
47#include "xfs_reflink.h" 47#include "xfs_reflink.h"
48#include "xfs_extent_busy.h"
48 49
49 50
50static DEFINE_MUTEX(xfs_uuid_table_mutex); 51static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -213,6 +214,7 @@ xfs_initialize_perag(
213 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); 214 INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
214 if (xfs_buf_hash_init(pag)) 215 if (xfs_buf_hash_init(pag))
215 goto out_free_pag; 216 goto out_free_pag;
217 init_waitqueue_head(&pag->pagb_wait);
216 218
217 if (radix_tree_preload(GFP_NOFS)) 219 if (radix_tree_preload(GFP_NOFS))
218 goto out_hash_destroy; 220 goto out_hash_destroy;
@@ -1079,6 +1081,12 @@ xfs_unmountfs(
1079 xfs_log_force(mp, XFS_LOG_SYNC); 1081 xfs_log_force(mp, XFS_LOG_SYNC);
1080 1082
1081 /* 1083 /*
1084 * Wait for all busy extents to be freed, including completion of
1085 * any discard operation.
1086 */
1087 xfs_extent_busy_wait_all(mp);
1088
1089 /*
1082 * We now need to tell the world we are unmounting. This will allow 1090 * We now need to tell the world we are unmounting. This will allow
1083 * us to detect that the filesystem is going away and we should error 1091 * us to detect that the filesystem is going away and we should error
1084 * out anything that we have been retrying in the background. This will 1092 * out anything that we have been retrying in the background. This will
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 7f351f706b7a..20e2981a4aec 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -384,6 +384,8 @@ typedef struct xfs_perag {
384 xfs_agino_t pagl_rightrec; 384 xfs_agino_t pagl_rightrec;
385 spinlock_t pagb_lock; /* lock for pagb_tree */ 385 spinlock_t pagb_lock; /* lock for pagb_tree */
386 struct rb_root pagb_tree; /* ordered tree of busy extents */ 386 struct rb_root pagb_tree; /* ordered tree of busy extents */
387 unsigned int pagb_gen; /* generation count for pagb_tree */
388 wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */
387 389
388 atomic_t pagf_fstrms; /* # of filestreams active in this AG */ 390 atomic_t pagf_fstrms; /* # of filestreams active in this AG */
389 391