aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2006-09-07 00:26:50 -0400
committerDavid Chatterton <chatz@sgi.com>2006-09-07 00:26:50 -0400
commit4be536debe3f7b0c62283e77fd6bd8bdb9f83c6f (patch)
tree0b1ce616c52cb659fc4341afb13bcccaa2499b6c
parent10387e5eb45c6e48d67102b88229f5bc6037461c (diff)
[XFS] Prevent free space oversubscription and xfssyncd looping.
The fix for recent ENOSPC deadlocks introduced certain limitations on allocations. The fix could cause xfssyncd to loop endlessly if we did not leave some space free for the allocator to work correctly. Basically, we needed to ensure that we had at least 4 blocks free for an AG free list and a block for the inode bmap btree at all times. However, this did not take into account the fact that each AG has a free list that needs 4 blocks. Hence any filesystem with more than one AG could cause oversubscription of free space and make xfssyncd spin forever trying to allocate space needed for AG freelists that was not available in the AG. The following patch reserves space for the free lists in all AGs plus the inode bmap btree which prevents oversubscription. It also prevents those blocks from being reported as free space (as they can never be used) and makes the SMP in-core superblock accounting code and the reserved block ioctl respect this requirement. SGI-PV: 955674 SGI-Modid: xfs-linux-melb:xfs-kern:26894a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: David Chatterton <chatz@sgi.com>
-rw-r--r--fs/xfs/xfs_alloc.h20
-rw-r--r--fs/xfs/xfs_fsops.c16
-rw-r--r--fs/xfs/xfs_mount.c32
-rw-r--r--fs/xfs/xfs_vfsops.c3
4 files changed, 40 insertions, 31 deletions
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 650591f999ae..5a4256120ccc 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -44,6 +44,26 @@ typedef enum xfs_alloctype
44#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ 44#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
45 45
46/* 46/*
47 * In order to avoid ENOSPC-related deadlock caused by
48 * out-of-order locking of AGF buffer (PV 947395), we place
49 * constraints on the relationship among actual allocations for
50 * data blocks, freelist blocks, and potential file data bmap
51 * btree blocks. However, these restrictions may result in no
52 * actual space allocated for a delayed extent, for example, a data
53 * block in a certain AG is allocated but there is no additional
54 * block for the additional bmap btree block due to a split of the
55 * bmap btree of the file. The result of this may lead to an
56 * infinite loop in xfssyncd when the file gets flushed to disk and
57 * all delayed extents need to be actually allocated. To get around
58 * this, we explicitly set aside a few blocks which will not be
59 * reserved in delayed allocation. Considering the minimum number of
60 * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap
61 * btree requires 1 fsb, so we set the number of set-aside blocks
62 * to 4 + 4*agcount.
63 */
64#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4))
65
66/*
47 * Argument structure for xfs_alloc routines. 67 * Argument structure for xfs_alloc routines.
48 * This is turned into a structure to avoid having 20 arguments passed 68 * This is turned into a structure to avoid having 20 arguments passed
49 * down several levels of the stack. 69 * down several levels of the stack.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 077629bab532..c064e72ada9e 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,7 +462,7 @@ xfs_fs_counts(
462 462
463 xfs_icsb_sync_counters_lazy(mp); 463 xfs_icsb_sync_counters_lazy(mp);
464 s = XFS_SB_LOCK(mp); 464 s = XFS_SB_LOCK(mp);
465 cnt->freedata = mp->m_sb.sb_fdblocks; 465 cnt->freedata = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
466 cnt->freertx = mp->m_sb.sb_frextents; 466 cnt->freertx = mp->m_sb.sb_frextents;
467 cnt->freeino = mp->m_sb.sb_ifree; 467 cnt->freeino = mp->m_sb.sb_ifree;
468 cnt->allocino = mp->m_sb.sb_icount; 468 cnt->allocino = mp->m_sb.sb_icount;
@@ -519,15 +519,19 @@ xfs_reserve_blocks(
519 } 519 }
520 mp->m_resblks = request; 520 mp->m_resblks = request;
521 } else { 521 } else {
522 __int64_t free;
523
524 free = mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
522 delta = request - mp->m_resblks; 525 delta = request - mp->m_resblks;
523 lcounter = mp->m_sb.sb_fdblocks - delta; 526 lcounter = free - delta;
524 if (lcounter < 0) { 527 if (lcounter < 0) {
525 /* We can't satisfy the request, just get what we can */ 528 /* We can't satisfy the request, just get what we can */
526 mp->m_resblks += mp->m_sb.sb_fdblocks; 529 mp->m_resblks += free;
527 mp->m_resblks_avail += mp->m_sb.sb_fdblocks; 530 mp->m_resblks_avail += free;
528 mp->m_sb.sb_fdblocks = 0; 531 mp->m_sb.sb_fdblocks = XFS_ALLOC_SET_ASIDE(mp);
529 } else { 532 } else {
530 mp->m_sb.sb_fdblocks = lcounter; 533 mp->m_sb.sb_fdblocks =
534 lcounter + XFS_ALLOC_SET_ASIDE(mp);
531 mp->m_resblks = request; 535 mp->m_resblks = request;
532 mp->m_resblks_avail += delta; 536 mp->m_resblks_avail += delta;
533 } 537 }
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 4be5c0b2d296..9dfae18d995f 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1243,24 +1243,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1243 xfs_trans_log_buf(tp, bp, first, last); 1243 xfs_trans_log_buf(tp, bp, first, last);
1244} 1244}
1245 1245
1246/*
1247 * In order to avoid ENOSPC-related deadlock caused by
1248 * out-of-order locking of AGF buffer (PV 947395), we place
1249 * constraints on the relationship among actual allocations for
1250 * data blocks, freelist blocks, and potential file data bmap
1251 * btree blocks. However, these restrictions may result in no
1252 * actual space allocated for a delayed extent, for example, a data
1253 * block in a certain AG is allocated but there is no additional
1254 * block for the additional bmap btree block due to a split of the
1255 * bmap btree of the file. The result of this may lead to an
1256 * infinite loop in xfssyncd when the file gets flushed to disk and
1257 * all delayed extents need to be actually allocated. To get around
1258 * this, we explicitly set aside a few blocks which will not be
1259 * reserved in delayed allocation. Considering the minimum number of
1260 * needed freelist blocks is 4 fsbs, a potential split of file's bmap
1261 * btree requires 1 fsb, so we set the number of set-aside blocks to 8.
1262*/
1263#define SET_ASIDE_BLOCKS 8
1264 1246
1265/* 1247/*
1266 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1248 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
@@ -1306,7 +1288,8 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1306 return 0; 1288 return 0;
1307 case XFS_SBS_FDBLOCKS: 1289 case XFS_SBS_FDBLOCKS:
1308 1290
1309 lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS; 1291 lcounter = (long long)
1292 mp->m_sb.sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
1310 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1293 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1311 1294
1312 if (delta > 0) { /* Putting blocks back */ 1295 if (delta > 0) { /* Putting blocks back */
@@ -1340,7 +1323,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1340 } 1323 }
1341 } 1324 }
1342 1325
1343 mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS; 1326 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
1344 return 0; 1327 return 0;
1345 case XFS_SBS_FREXTENTS: 1328 case XFS_SBS_FREXTENTS:
1346 lcounter = (long long)mp->m_sb.sb_frextents; 1329 lcounter = (long long)mp->m_sb.sb_frextents;
@@ -2021,7 +2004,8 @@ xfs_icsb_sync_counters_lazy(
2021 * when we get near ENOSPC. 2004 * when we get near ENOSPC.
2022 */ 2005 */
2023#define XFS_ICSB_INO_CNTR_REENABLE 64 2006#define XFS_ICSB_INO_CNTR_REENABLE 64
2024#define XFS_ICSB_FDBLK_CNTR_REENABLE 512 2007#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
2008 (512 + XFS_ALLOC_SET_ASIDE(mp))
2025STATIC void 2009STATIC void
2026xfs_icsb_balance_counter( 2010xfs_icsb_balance_counter(
2027 xfs_mount_t *mp, 2011 xfs_mount_t *mp,
@@ -2055,7 +2039,7 @@ xfs_icsb_balance_counter(
2055 case XFS_SBS_FDBLOCKS: 2039 case XFS_SBS_FDBLOCKS:
2056 count = mp->m_sb.sb_fdblocks; 2040 count = mp->m_sb.sb_fdblocks;
2057 resid = do_div(count, weight); 2041 resid = do_div(count, weight);
2058 if (count < XFS_ICSB_FDBLK_CNTR_REENABLE) 2042 if (count < XFS_ICSB_FDBLK_CNTR_REENABLE(mp))
2059 goto out; 2043 goto out;
2060 break; 2044 break;
2061 default: 2045 default:
@@ -2110,11 +2094,11 @@ again:
2110 case XFS_SBS_FDBLOCKS: 2094 case XFS_SBS_FDBLOCKS:
2111 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0); 2095 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
2112 2096
2113 lcounter = icsbp->icsb_fdblocks; 2097 lcounter = icsbp->icsb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
2114 lcounter += delta; 2098 lcounter += delta;
2115 if (unlikely(lcounter < 0)) 2099 if (unlikely(lcounter < 0))
2116 goto slow_path; 2100 goto slow_path;
2117 icsbp->icsb_fdblocks = lcounter; 2101 icsbp->icsb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
2118 break; 2102 break;
2119 default: 2103 default:
2120 BUG(); 2104 BUG();
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index b427d220a169..a34796e57afb 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -811,7 +811,8 @@ xfs_statvfs(
811 statp->f_bsize = sbp->sb_blocksize; 811 statp->f_bsize = sbp->sb_blocksize;
812 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 812 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
813 statp->f_blocks = sbp->sb_dblocks - lsize; 813 statp->f_blocks = sbp->sb_dblocks - lsize;
814 statp->f_bfree = statp->f_bavail = sbp->sb_fdblocks; 814 statp->f_bfree = statp->f_bavail =
815 sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
815 fakeinos = statp->f_bfree << sbp->sb_inopblog; 816 fakeinos = statp->f_bfree << sbp->sb_inopblog;
816#if XFS_BIG_INUMS 817#if XFS_BIG_INUMS
817 fakeinos += mp->m_inoadd; 818 fakeinos += mp->m_inoadd;