aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
authorYingping Lu <yingping@sgi.com>2006-06-09 00:55:18 -0400
committerNathan Scott <nathans@sgi.com>2006-06-09 00:55:18 -0400
commitd210a28cd851082cec9b282443f8cc0e6fc09830 (patch)
tree77b8c843d4cb7e6095b607570c5fd16702e50592 /fs/xfs
parentd3446eac3f50dade2f09ed212b112609ee78fb33 (diff)
[XFS] In actual allocation of file system blocks and freeing extents, the
transaction within each such operation may involve multiple locking of AGF buffer. While the freeing extent function has sorted the extents based on AGF number before entering into transaction, however, when the file system space is very limited, the allocation of space would try every AGF to get space allocated, this could potentially cause out-of-order locking, thus deadlock could happen. This fix mitigates the scarce space for allocation by setting aside a few blocks without reservation, and avoid deadlock by maintaining ascending order of AGF locking. SGI-PV: 947395 SGI-Modid: xfs-linux-melb:xfs-kern:210801a Signed-off-by: Yingping Lu <yingping@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_alloc.c29
-rw-r--r--fs/xfs/xfs_alloc.h2
-rw-r--r--fs/xfs/xfs_bmap.c5
-rw-r--r--fs/xfs/xfs_bmap_btree.c10
-rw-r--r--fs/xfs/xfs_mount.c24
5 files changed, 55 insertions, 15 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8558226281c4..22af489d3f34 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1862,7 +1862,7 @@ xfs_alloc_fix_freelist(
1862 (pag->pagf_longest - delta) : 1862 (pag->pagf_longest - delta) :
1863 (pag->pagf_flcount > 0 || pag->pagf_longest > 0); 1863 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
1864 if (args->minlen + args->alignment + args->minalignslop - 1 > longest || 1864 if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
1865 (args->minleft && 1865 (!(flags & XFS_ALLOC_FLAG_FREEING) &&
1866 (int)(pag->pagf_freeblks + pag->pagf_flcount - 1866 (int)(pag->pagf_freeblks + pag->pagf_flcount -
1867 need - args->total) < 1867 need - args->total) <
1868 (int)args->minleft)) { 1868 (int)args->minleft)) {
@@ -1898,7 +1898,7 @@ xfs_alloc_fix_freelist(
1898 longest = (longest > delta) ? (longest - delta) : 1898 longest = (longest > delta) ? (longest - delta) :
1899 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); 1899 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1900 if (args->minlen + args->alignment + args->minalignslop - 1 > longest || 1900 if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
1901 (args->minleft && 1901 (!(flags & XFS_ALLOC_FLAG_FREEING) &&
1902 (int)(be32_to_cpu(agf->agf_freeblks) + 1902 (int)(be32_to_cpu(agf->agf_freeblks) +
1903 be32_to_cpu(agf->agf_flcount) - need - args->total) < 1903 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1904 (int)args->minleft)) { 1904 (int)args->minleft)) {
@@ -1951,8 +1951,14 @@ xfs_alloc_fix_freelist(
1951 * the restrictions correctly. Can happen for free calls 1951 * the restrictions correctly. Can happen for free calls
1952 * on a completely full ag. 1952 * on a completely full ag.
1953 */ 1953 */
1954 if (targs.agbno == NULLAGBLOCK) 1954 if (targs.agbno == NULLAGBLOCK) {
1955 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1956 xfs_trans_brelse(tp, agflbp);
1957 args->agbp = NULL;
1958 return 0;
1959 }
1955 break; 1960 break;
1961 }
1956 /* 1962 /*
1957 * Put each allocated block on the list. 1963 * Put each allocated block on the list.
1958 */ 1964 */
@@ -2360,8 +2366,19 @@ xfs_alloc_vextent(
2360 if (args->agno == sagno && 2366 if (args->agno == sagno &&
2361 type == XFS_ALLOCTYPE_START_BNO) 2367 type == XFS_ALLOCTYPE_START_BNO)
2362 args->type = XFS_ALLOCTYPE_THIS_AG; 2368 args->type = XFS_ALLOCTYPE_THIS_AG;
2363 if (++(args->agno) == mp->m_sb.sb_agcount) 2369 /*
2364 args->agno = 0; 2370 * For the first allocation, we can try any AG to get
2371 * space. However, if we already have allocated a
2372 * block, we don't want to try AGs whose number is below
2373 * sagno. Otherwise, we may end up with out-of-order
2374 * locking of AGF, which might cause deadlock.
2375 */
2376 if (++(args->agno) == mp->m_sb.sb_agcount) {
2377 if (args->firstblock != NULLFSBLOCK)
2378 args->agno = sagno;
2379 else
2380 args->agno = 0;
2381 }
2365 /* 2382 /*
2366 * Reached the starting a.g., must either be done 2383 * Reached the starting a.g., must either be done
2367 * or switch to non-trylock mode. 2384 * or switch to non-trylock mode.
@@ -2443,7 +2460,7 @@ xfs_free_extent(
2443 args.minlen = args.minleft = args.minalignslop = 0; 2460 args.minlen = args.minleft = args.minalignslop = 0;
2444 down_read(&args.mp->m_peraglock); 2461 down_read(&args.mp->m_peraglock);
2445 args.pag = &args.mp->m_perag[args.agno]; 2462 args.pag = &args.mp->m_perag[args.agno];
2446 if ((error = xfs_alloc_fix_freelist(&args, 0))) 2463 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2447 goto error0; 2464 goto error0;
2448#ifdef DEBUG 2465#ifdef DEBUG
2449 ASSERT(args.agbp != NULL); 2466 ASSERT(args.agbp != NULL);
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 2d1f8928b267..650591f999ae 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -41,6 +41,7 @@ typedef enum xfs_alloctype
41 * Flags for xfs_alloc_fix_freelist. 41 * Flags for xfs_alloc_fix_freelist.
42 */ 42 */
43#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ 43#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
44#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
44 45
45/* 46/*
46 * Argument structure for xfs_alloc routines. 47 * Argument structure for xfs_alloc routines.
@@ -70,6 +71,7 @@ typedef struct xfs_alloc_arg {
70 char wasfromfl; /* set if allocation is from freelist */ 71 char wasfromfl; /* set if allocation is from freelist */
71 char isfl; /* set if is freelist blocks - !acctg */ 72 char isfl; /* set if is freelist blocks - !acctg */
72 char userdata; /* set if this is user data */ 73 char userdata; /* set if this is user data */
74 xfs_fsblock_t firstblock; /* io first block allocated */
73} xfs_alloc_arg_t; 75} xfs_alloc_arg_t;
74 76
75/* 77/*
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 890ad3528174..ad595dbefe16 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2762,6 +2762,7 @@ xfs_bmap_btalloc(
2762 args.mp = mp; 2762 args.mp = mp;
2763 args.fsbno = ap->rval; 2763 args.fsbno = ap->rval;
2764 args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); 2764 args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
2765 args.firstblock = ap->firstblock;
2765 blen = 0; 2766 blen = 0;
2766 if (nullfb) { 2767 if (nullfb) {
2767 args.type = XFS_ALLOCTYPE_START_BNO; 2768 args.type = XFS_ALLOCTYPE_START_BNO;
@@ -2821,7 +2822,7 @@ xfs_bmap_btalloc(
2821 else 2822 else
2822 args.minlen = ap->alen; 2823 args.minlen = ap->alen;
2823 } else if (ap->low) { 2824 } else if (ap->low) {
2824 args.type = XFS_ALLOCTYPE_FIRST_AG; 2825 args.type = XFS_ALLOCTYPE_START_BNO;
2825 args.total = args.minlen = ap->minlen; 2826 args.total = args.minlen = ap->minlen;
2826 } else { 2827 } else {
2827 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2828 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -3452,6 +3453,7 @@ xfs_bmap_extents_to_btree(
3452 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 3453 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3453 args.tp = tp; 3454 args.tp = tp;
3454 args.mp = mp; 3455 args.mp = mp;
3456 args.firstblock = *firstblock;
3455 if (*firstblock == NULLFSBLOCK) { 3457 if (*firstblock == NULLFSBLOCK) {
3456 args.type = XFS_ALLOCTYPE_START_BNO; 3458 args.type = XFS_ALLOCTYPE_START_BNO;
3457 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 3459 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
@@ -3587,6 +3589,7 @@ xfs_bmap_local_to_extents(
3587 3589
3588 args.tp = tp; 3590 args.tp = tp;
3589 args.mp = ip->i_mount; 3591 args.mp = ip->i_mount;
3592 args.firstblock = *firstblock;
3590 ASSERT((ifp->if_flags & 3593 ASSERT((ifp->if_flags &
3591 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); 3594 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3592 /* 3595 /*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index bea44709afbe..3b6dfc9b53af 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1569,12 +1569,11 @@ xfs_bmbt_split(
1569 lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp)); 1569 lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
1570 left = XFS_BUF_TO_BMBT_BLOCK(lbp); 1570 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1571 args.fsbno = cur->bc_private.b.firstblock; 1571 args.fsbno = cur->bc_private.b.firstblock;
1572 args.firstblock = args.fsbno;
1572 if (args.fsbno == NULLFSBLOCK) { 1573 if (args.fsbno == NULLFSBLOCK) {
1573 args.fsbno = lbno; 1574 args.fsbno = lbno;
1574 args.type = XFS_ALLOCTYPE_START_BNO; 1575 args.type = XFS_ALLOCTYPE_START_BNO;
1575 } else if (cur->bc_private.b.flist->xbf_low) 1576 } else
1576 args.type = XFS_ALLOCTYPE_FIRST_AG;
1577 else
1578 args.type = XFS_ALLOCTYPE_NEAR_BNO; 1577 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1579 args.mod = args.minleft = args.alignment = args.total = args.isfl = 1578 args.mod = args.minleft = args.alignment = args.total = args.isfl =
1580 args.userdata = args.minalignslop = 0; 1579 args.userdata = args.minalignslop = 0;
@@ -2356,6 +2355,7 @@ xfs_bmbt_newroot(
2356 args.userdata = args.minalignslop = 0; 2355 args.userdata = args.minalignslop = 0;
2357 args.minlen = args.maxlen = args.prod = 1; 2356 args.minlen = args.maxlen = args.prod = 1;
2358 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; 2357 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
2358 args.firstblock = args.fsbno;
2359 if (args.fsbno == NULLFSBLOCK) { 2359 if (args.fsbno == NULLFSBLOCK) {
2360#ifdef DEBUG 2360#ifdef DEBUG
2361 if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { 2361 if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) {
@@ -2365,9 +2365,7 @@ xfs_bmbt_newroot(
2365#endif 2365#endif
2366 args.fsbno = INT_GET(*pp, ARCH_CONVERT); 2366 args.fsbno = INT_GET(*pp, ARCH_CONVERT);
2367 args.type = XFS_ALLOCTYPE_START_BNO; 2367 args.type = XFS_ALLOCTYPE_START_BNO;
2368 } else if (args.wasdel) 2368 } else
2369 args.type = XFS_ALLOCTYPE_FIRST_AG;
2370 else
2371 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2369 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2372 if ((error = xfs_alloc_vextent(&args))) { 2370 if ((error = xfs_alloc_vextent(&args))) {
2373 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2371 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c0b1c2906880..4b7be49cc4de 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1254,6 +1254,26 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1254 1254
1255 xfs_trans_log_buf(tp, bp, first, last); 1255 xfs_trans_log_buf(tp, bp, first, last);
1256} 1256}
1257
1258/*
1259 * In order to avoid ENOSPC-related deadlock caused by
1260 * out-of-order locking of AGF buffer (PV 947395), we place
1261 * constraints on the relationship among actual allocations for
1262 * data blocks, freelist blocks, and potential file data bmap
1263 * btree blocks. However, these restrictions may result in no
1264 * actual space allocated for a delayed extent, for example, a data
1265 * block in a certain AG is allocated but there is no additional
1266 * block for the additional bmap btree block due to a split of the
1267 * bmap btree of the file. The result of this may lead to an
1268 * infinite loop in xfssyncd when the file gets flushed to disk and
1269 * all delayed extents need to be actually allocated. To get around
1270 * this, we explicitly set aside a few blocks which will not be
1271 * reserved in delayed allocation. Considering the minimum number of
1272 * needed freelist blocks is 4 fsbs, a potential split of file's bmap
1273 * btree requires 1 fsb, so we set the number of set-aside blocks to 8.
1274*/
1275#define SET_ASIDE_BLOCKS 8
1276
1257/* 1277/*
1258 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1278 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
1259 * a delta to a specified field in the in-core superblock. Simply 1279 * a delta to a specified field in the in-core superblock. Simply
@@ -1298,7 +1318,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1298 return 0; 1318 return 0;
1299 case XFS_SBS_FDBLOCKS: 1319 case XFS_SBS_FDBLOCKS:
1300 1320
1301 lcounter = (long long)mp->m_sb.sb_fdblocks; 1321 lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS;
1302 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1322 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1303 1323
1304 if (delta > 0) { /* Putting blocks back */ 1324 if (delta > 0) { /* Putting blocks back */
@@ -1332,7 +1352,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1332 } 1352 }
1333 } 1353 }
1334 1354
1335 mp->m_sb.sb_fdblocks = lcounter; 1355 mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS;
1336 return 0; 1356 return 0;
1337 case XFS_SBS_FREXTENTS: 1357 case XFS_SBS_FREXTENTS:
1338 lcounter = (long long)mp->m_sb.sb_frextents; 1358 lcounter = (long long)mp->m_sb.sb_frextents;