aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_alloc.c29
-rw-r--r--fs/xfs/xfs_alloc.h2
-rw-r--r--fs/xfs/xfs_bmap.c5
-rw-r--r--fs/xfs/xfs_bmap_btree.c10
-rw-r--r--fs/xfs/xfs_mount.c24
5 files changed, 55 insertions, 15 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8558226281c4..22af489d3f34 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1862,7 +1862,7 @@ xfs_alloc_fix_freelist(
1862 (pag->pagf_longest - delta) : 1862 (pag->pagf_longest - delta) :
1863 (pag->pagf_flcount > 0 || pag->pagf_longest > 0); 1863 (pag->pagf_flcount > 0 || pag->pagf_longest > 0);
1864 if (args->minlen + args->alignment + args->minalignslop - 1 > longest || 1864 if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
1865 (args->minleft && 1865 (!(flags & XFS_ALLOC_FLAG_FREEING) &&
1866 (int)(pag->pagf_freeblks + pag->pagf_flcount - 1866 (int)(pag->pagf_freeblks + pag->pagf_flcount -
1867 need - args->total) < 1867 need - args->total) <
1868 (int)args->minleft)) { 1868 (int)args->minleft)) {
@@ -1898,7 +1898,7 @@ xfs_alloc_fix_freelist(
1898 longest = (longest > delta) ? (longest - delta) : 1898 longest = (longest > delta) ? (longest - delta) :
1899 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); 1899 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1900 if (args->minlen + args->alignment + args->minalignslop - 1 > longest || 1900 if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
1901 (args->minleft && 1901 (!(flags & XFS_ALLOC_FLAG_FREEING) &&
1902 (int)(be32_to_cpu(agf->agf_freeblks) + 1902 (int)(be32_to_cpu(agf->agf_freeblks) +
1903 be32_to_cpu(agf->agf_flcount) - need - args->total) < 1903 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1904 (int)args->minleft)) { 1904 (int)args->minleft)) {
@@ -1951,8 +1951,14 @@ xfs_alloc_fix_freelist(
1951 * the restrictions correctly. Can happen for free calls 1951 * the restrictions correctly. Can happen for free calls
1952 * on a completely full ag. 1952 * on a completely full ag.
1953 */ 1953 */
1954 if (targs.agbno == NULLAGBLOCK) 1954 if (targs.agbno == NULLAGBLOCK) {
1955 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1956 xfs_trans_brelse(tp, agflbp);
1957 args->agbp = NULL;
1958 return 0;
1959 }
1955 break; 1960 break;
1961 }
1956 /* 1962 /*
1957 * Put each allocated block on the list. 1963 * Put each allocated block on the list.
1958 */ 1964 */
@@ -2360,8 +2366,19 @@ xfs_alloc_vextent(
2360 if (args->agno == sagno && 2366 if (args->agno == sagno &&
2361 type == XFS_ALLOCTYPE_START_BNO) 2367 type == XFS_ALLOCTYPE_START_BNO)
2362 args->type = XFS_ALLOCTYPE_THIS_AG; 2368 args->type = XFS_ALLOCTYPE_THIS_AG;
2363 if (++(args->agno) == mp->m_sb.sb_agcount) 2369 /*
2364 args->agno = 0; 2370 * For the first allocation, we can try any AG to get
2371 * space. However, if we already have allocated a
2372 * block, we don't want to try AGs whose number is below
2373 * sagno. Otherwise, we may end up with out-of-order
2374 * locking of AGF, which might cause deadlock.
2375 */
2376 if (++(args->agno) == mp->m_sb.sb_agcount) {
2377 if (args->firstblock != NULLFSBLOCK)
2378 args->agno = sagno;
2379 else
2380 args->agno = 0;
2381 }
2365 /* 2382 /*
2366 * Reached the starting a.g., must either be done 2383 * Reached the starting a.g., must either be done
2367 * or switch to non-trylock mode. 2384 * or switch to non-trylock mode.
@@ -2443,7 +2460,7 @@ xfs_free_extent(
2443 args.minlen = args.minleft = args.minalignslop = 0; 2460 args.minlen = args.minleft = args.minalignslop = 0;
2444 down_read(&args.mp->m_peraglock); 2461 down_read(&args.mp->m_peraglock);
2445 args.pag = &args.mp->m_perag[args.agno]; 2462 args.pag = &args.mp->m_perag[args.agno];
2446 if ((error = xfs_alloc_fix_freelist(&args, 0))) 2463 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
2447 goto error0; 2464 goto error0;
2448#ifdef DEBUG 2465#ifdef DEBUG
2449 ASSERT(args.agbp != NULL); 2466 ASSERT(args.agbp != NULL);
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 2d1f8928b267..650591f999ae 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -41,6 +41,7 @@ typedef enum xfs_alloctype
41 * Flags for xfs_alloc_fix_freelist. 41 * Flags for xfs_alloc_fix_freelist.
42 */ 42 */
43#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ 43#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
44#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
44 45
45/* 46/*
46 * Argument structure for xfs_alloc routines. 47 * Argument structure for xfs_alloc routines.
@@ -70,6 +71,7 @@ typedef struct xfs_alloc_arg {
70 char wasfromfl; /* set if allocation is from freelist */ 71 char wasfromfl; /* set if allocation is from freelist */
71 char isfl; /* set if is freelist blocks - !acctg */ 72 char isfl; /* set if is freelist blocks - !acctg */
72 char userdata; /* set if this is user data */ 73 char userdata; /* set if this is user data */
74 xfs_fsblock_t firstblock; /* io first block allocated */
73} xfs_alloc_arg_t; 75} xfs_alloc_arg_t;
74 76
75/* 77/*
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 890ad3528174..ad595dbefe16 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2762,6 +2762,7 @@ xfs_bmap_btalloc(
2762 args.mp = mp; 2762 args.mp = mp;
2763 args.fsbno = ap->rval; 2763 args.fsbno = ap->rval;
2764 args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); 2764 args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
2765 args.firstblock = ap->firstblock;
2765 blen = 0; 2766 blen = 0;
2766 if (nullfb) { 2767 if (nullfb) {
2767 args.type = XFS_ALLOCTYPE_START_BNO; 2768 args.type = XFS_ALLOCTYPE_START_BNO;
@@ -2821,7 +2822,7 @@ xfs_bmap_btalloc(
2821 else 2822 else
2822 args.minlen = ap->alen; 2823 args.minlen = ap->alen;
2823 } else if (ap->low) { 2824 } else if (ap->low) {
2824 args.type = XFS_ALLOCTYPE_FIRST_AG; 2825 args.type = XFS_ALLOCTYPE_START_BNO;
2825 args.total = args.minlen = ap->minlen; 2826 args.total = args.minlen = ap->minlen;
2826 } else { 2827 } else {
2827 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2828 args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -3452,6 +3453,7 @@ xfs_bmap_extents_to_btree(
3452 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); 3453 XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
3453 args.tp = tp; 3454 args.tp = tp;
3454 args.mp = mp; 3455 args.mp = mp;
3456 args.firstblock = *firstblock;
3455 if (*firstblock == NULLFSBLOCK) { 3457 if (*firstblock == NULLFSBLOCK) {
3456 args.type = XFS_ALLOCTYPE_START_BNO; 3458 args.type = XFS_ALLOCTYPE_START_BNO;
3457 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); 3459 args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
@@ -3587,6 +3589,7 @@ xfs_bmap_local_to_extents(
3587 3589
3588 args.tp = tp; 3590 args.tp = tp;
3589 args.mp = ip->i_mount; 3591 args.mp = ip->i_mount;
3592 args.firstblock = *firstblock;
3590 ASSERT((ifp->if_flags & 3593 ASSERT((ifp->if_flags &
3591 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); 3594 (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
3592 /* 3595 /*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index bea44709afbe..3b6dfc9b53af 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1569,12 +1569,11 @@ xfs_bmbt_split(
1569 lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp)); 1569 lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
1570 left = XFS_BUF_TO_BMBT_BLOCK(lbp); 1570 left = XFS_BUF_TO_BMBT_BLOCK(lbp);
1571 args.fsbno = cur->bc_private.b.firstblock; 1571 args.fsbno = cur->bc_private.b.firstblock;
1572 args.firstblock = args.fsbno;
1572 if (args.fsbno == NULLFSBLOCK) { 1573 if (args.fsbno == NULLFSBLOCK) {
1573 args.fsbno = lbno; 1574 args.fsbno = lbno;
1574 args.type = XFS_ALLOCTYPE_START_BNO; 1575 args.type = XFS_ALLOCTYPE_START_BNO;
1575 } else if (cur->bc_private.b.flist->xbf_low) 1576 } else
1576 args.type = XFS_ALLOCTYPE_FIRST_AG;
1577 else
1578 args.type = XFS_ALLOCTYPE_NEAR_BNO; 1577 args.type = XFS_ALLOCTYPE_NEAR_BNO;
1579 args.mod = args.minleft = args.alignment = args.total = args.isfl = 1578 args.mod = args.minleft = args.alignment = args.total = args.isfl =
1580 args.userdata = args.minalignslop = 0; 1579 args.userdata = args.minalignslop = 0;
@@ -2356,6 +2355,7 @@ xfs_bmbt_newroot(
2356 args.userdata = args.minalignslop = 0; 2355 args.userdata = args.minalignslop = 0;
2357 args.minlen = args.maxlen = args.prod = 1; 2356 args.minlen = args.maxlen = args.prod = 1;
2358 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; 2357 args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
2358 args.firstblock = args.fsbno;
2359 if (args.fsbno == NULLFSBLOCK) { 2359 if (args.fsbno == NULLFSBLOCK) {
2360#ifdef DEBUG 2360#ifdef DEBUG
2361 if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { 2361 if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) {
@@ -2365,9 +2365,7 @@ xfs_bmbt_newroot(
2365#endif 2365#endif
2366 args.fsbno = INT_GET(*pp, ARCH_CONVERT); 2366 args.fsbno = INT_GET(*pp, ARCH_CONVERT);
2367 args.type = XFS_ALLOCTYPE_START_BNO; 2367 args.type = XFS_ALLOCTYPE_START_BNO;
2368 } else if (args.wasdel) 2368 } else
2369 args.type = XFS_ALLOCTYPE_FIRST_AG;
2370 else
2371 args.type = XFS_ALLOCTYPE_NEAR_BNO; 2369 args.type = XFS_ALLOCTYPE_NEAR_BNO;
2372 if ((error = xfs_alloc_vextent(&args))) { 2370 if ((error = xfs_alloc_vextent(&args))) {
2373 XFS_BMBT_TRACE_CURSOR(cur, ERROR); 2371 XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c0b1c2906880..4b7be49cc4de 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1254,6 +1254,26 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1254 1254
1255 xfs_trans_log_buf(tp, bp, first, last); 1255 xfs_trans_log_buf(tp, bp, first, last);
1256} 1256}
1257
1258/*
1259 * In order to avoid ENOSPC-related deadlock caused by
1260 * out-of-order locking of AGF buffer (PV 947395), we place
1261 * constraints on the relationship among actual allocations for
1262 * data blocks, freelist blocks, and potential file data bmap
1263 * btree blocks. However, these restrictions may result in no
1264 * actual space allocated for a delayed extent, for example, a data
1265 * block in a certain AG is allocated but there is no additional
1266 * block for the additional bmap btree block due to a split of the
1267 * bmap btree of the file. The result of this may lead to an
1268 * infinite loop in xfssyncd when the file gets flushed to disk and
1269 * all delayed extents need to be actually allocated. To get around
1270 * this, we explicitly set aside a few blocks which will not be
1271 * reserved in delayed allocation. Considering the minimum number of
1272 * needed freelist blocks is 4 fsbs, a potential split of file's bmap
1273 * btree requires 1 fsb, so we set the number of set-aside blocks to 8.
1274*/
1275#define SET_ASIDE_BLOCKS 8
1276
1257/* 1277/*
1258 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1278 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
1259 * a delta to a specified field in the in-core superblock. Simply 1279 * a delta to a specified field in the in-core superblock. Simply
@@ -1298,7 +1318,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1298 return 0; 1318 return 0;
1299 case XFS_SBS_FDBLOCKS: 1319 case XFS_SBS_FDBLOCKS:
1300 1320
1301 lcounter = (long long)mp->m_sb.sb_fdblocks; 1321 lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS;
1302 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); 1322 res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
1303 1323
1304 if (delta > 0) { /* Putting blocks back */ 1324 if (delta > 0) { /* Putting blocks back */
@@ -1332,7 +1352,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1332 } 1352 }
1333 } 1353 }
1334 1354
1335 mp->m_sb.sb_fdblocks = lcounter; 1355 mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS;
1336 return 0; 1356 return 0;
1337 case XFS_SBS_FREXTENTS: 1357 case XFS_SBS_FREXTENTS:
1338 lcounter = (long long)mp->m_sb.sb_frextents; 1358 lcounter = (long long)mp->m_sb.sb_frextents;