diff options
-rw-r--r-- | fs/xfs/xfs_alloc.c | 29 | ||||
-rw-r--r-- | fs/xfs/xfs_alloc.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap.c | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_btree.c | 10 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 24 |
5 files changed, 55 insertions, 15 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 8558226281c4..22af489d3f34 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -1862,7 +1862,7 @@ xfs_alloc_fix_freelist( | |||
1862 | (pag->pagf_longest - delta) : | 1862 | (pag->pagf_longest - delta) : |
1863 | (pag->pagf_flcount > 0 || pag->pagf_longest > 0); | 1863 | (pag->pagf_flcount > 0 || pag->pagf_longest > 0); |
1864 | if (args->minlen + args->alignment + args->minalignslop - 1 > longest || | 1864 | if (args->minlen + args->alignment + args->minalignslop - 1 > longest || |
1865 | (args->minleft && | 1865 | (!(flags & XFS_ALLOC_FLAG_FREEING) && |
1866 | (int)(pag->pagf_freeblks + pag->pagf_flcount - | 1866 | (int)(pag->pagf_freeblks + pag->pagf_flcount - |
1867 | need - args->total) < | 1867 | need - args->total) < |
1868 | (int)args->minleft)) { | 1868 | (int)args->minleft)) { |
@@ -1898,7 +1898,7 @@ xfs_alloc_fix_freelist( | |||
1898 | longest = (longest > delta) ? (longest - delta) : | 1898 | longest = (longest > delta) ? (longest - delta) : |
1899 | (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); | 1899 | (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0); |
1900 | if (args->minlen + args->alignment + args->minalignslop - 1 > longest || | 1900 | if (args->minlen + args->alignment + args->minalignslop - 1 > longest || |
1901 | (args->minleft && | 1901 | (!(flags & XFS_ALLOC_FLAG_FREEING) && |
1902 | (int)(be32_to_cpu(agf->agf_freeblks) + | 1902 | (int)(be32_to_cpu(agf->agf_freeblks) + |
1903 | be32_to_cpu(agf->agf_flcount) - need - args->total) < | 1903 | be32_to_cpu(agf->agf_flcount) - need - args->total) < |
1904 | (int)args->minleft)) { | 1904 | (int)args->minleft)) { |
@@ -1951,8 +1951,14 @@ xfs_alloc_fix_freelist( | |||
1951 | * the restrictions correctly. Can happen for free calls | 1951 | * the restrictions correctly. Can happen for free calls |
1952 | * on a completely full ag. | 1952 | * on a completely full ag. |
1953 | */ | 1953 | */ |
1954 | if (targs.agbno == NULLAGBLOCK) | 1954 | if (targs.agbno == NULLAGBLOCK) { |
1955 | if (!(flags & XFS_ALLOC_FLAG_FREEING)) { | ||
1956 | xfs_trans_brelse(tp, agflbp); | ||
1957 | args->agbp = NULL; | ||
1958 | return 0; | ||
1959 | } | ||
1955 | break; | 1960 | break; |
1961 | } | ||
1956 | /* | 1962 | /* |
1957 | * Put each allocated block on the list. | 1963 | * Put each allocated block on the list. |
1958 | */ | 1964 | */ |
@@ -2360,8 +2366,19 @@ xfs_alloc_vextent( | |||
2360 | if (args->agno == sagno && | 2366 | if (args->agno == sagno && |
2361 | type == XFS_ALLOCTYPE_START_BNO) | 2367 | type == XFS_ALLOCTYPE_START_BNO) |
2362 | args->type = XFS_ALLOCTYPE_THIS_AG; | 2368 | args->type = XFS_ALLOCTYPE_THIS_AG; |
2363 | if (++(args->agno) == mp->m_sb.sb_agcount) | 2369 | /* |
2364 | args->agno = 0; | 2370 | * For the first allocation, we can try any AG to get |
2371 | * space. However, if we already have allocated a | ||
2372 | * block, we don't want to try AGs whose number is below | ||
2373 | * sagno. Otherwise, we may end up with out-of-order | ||
2374 | * locking of AGF, which might cause deadlock. | ||
2375 | */ | ||
2376 | if (++(args->agno) == mp->m_sb.sb_agcount) { | ||
2377 | if (args->firstblock != NULLFSBLOCK) | ||
2378 | args->agno = sagno; | ||
2379 | else | ||
2380 | args->agno = 0; | ||
2381 | } | ||
2365 | /* | 2382 | /* |
2366 | * Reached the starting a.g., must either be done | 2383 | * Reached the starting a.g., must either be done |
2367 | * or switch to non-trylock mode. | 2384 | * or switch to non-trylock mode. |
@@ -2443,7 +2460,7 @@ xfs_free_extent( | |||
2443 | args.minlen = args.minleft = args.minalignslop = 0; | 2460 | args.minlen = args.minleft = args.minalignslop = 0; |
2444 | down_read(&args.mp->m_peraglock); | 2461 | down_read(&args.mp->m_peraglock); |
2445 | args.pag = &args.mp->m_perag[args.agno]; | 2462 | args.pag = &args.mp->m_perag[args.agno]; |
2446 | if ((error = xfs_alloc_fix_freelist(&args, 0))) | 2463 | if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) |
2447 | goto error0; | 2464 | goto error0; |
2448 | #ifdef DEBUG | 2465 | #ifdef DEBUG |
2449 | ASSERT(args.agbp != NULL); | 2466 | ASSERT(args.agbp != NULL); |
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 2d1f8928b267..650591f999ae 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
@@ -41,6 +41,7 @@ typedef enum xfs_alloctype | |||
41 | * Flags for xfs_alloc_fix_freelist. | 41 | * Flags for xfs_alloc_fix_freelist. |
42 | */ | 42 | */ |
43 | #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ | 43 | #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ |
44 | #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ | ||
44 | 45 | ||
45 | /* | 46 | /* |
46 | * Argument structure for xfs_alloc routines. | 47 | * Argument structure for xfs_alloc routines. |
@@ -70,6 +71,7 @@ typedef struct xfs_alloc_arg { | |||
70 | char wasfromfl; /* set if allocation is from freelist */ | 71 | char wasfromfl; /* set if allocation is from freelist */ |
71 | char isfl; /* set if is freelist blocks - !acctg */ | 72 | char isfl; /* set if is freelist blocks - !acctg */ |
72 | char userdata; /* set if this is user data */ | 73 | char userdata; /* set if this is user data */ |
74 | xfs_fsblock_t firstblock; /* io first block allocated */ | ||
73 | } xfs_alloc_arg_t; | 75 | } xfs_alloc_arg_t; |
74 | 76 | ||
75 | /* | 77 | /* |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 890ad3528174..ad595dbefe16 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -2762,6 +2762,7 @@ xfs_bmap_btalloc( | |||
2762 | args.mp = mp; | 2762 | args.mp = mp; |
2763 | args.fsbno = ap->rval; | 2763 | args.fsbno = ap->rval; |
2764 | args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); | 2764 | args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks); |
2765 | args.firstblock = ap->firstblock; | ||
2765 | blen = 0; | 2766 | blen = 0; |
2766 | if (nullfb) { | 2767 | if (nullfb) { |
2767 | args.type = XFS_ALLOCTYPE_START_BNO; | 2768 | args.type = XFS_ALLOCTYPE_START_BNO; |
@@ -2821,7 +2822,7 @@ xfs_bmap_btalloc( | |||
2821 | else | 2822 | else |
2822 | args.minlen = ap->alen; | 2823 | args.minlen = ap->alen; |
2823 | } else if (ap->low) { | 2824 | } else if (ap->low) { |
2824 | args.type = XFS_ALLOCTYPE_FIRST_AG; | 2825 | args.type = XFS_ALLOCTYPE_START_BNO; |
2825 | args.total = args.minlen = ap->minlen; | 2826 | args.total = args.minlen = ap->minlen; |
2826 | } else { | 2827 | } else { |
2827 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 2828 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
@@ -3452,6 +3453,7 @@ xfs_bmap_extents_to_btree( | |||
3452 | XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); | 3453 | XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE); |
3453 | args.tp = tp; | 3454 | args.tp = tp; |
3454 | args.mp = mp; | 3455 | args.mp = mp; |
3456 | args.firstblock = *firstblock; | ||
3455 | if (*firstblock == NULLFSBLOCK) { | 3457 | if (*firstblock == NULLFSBLOCK) { |
3456 | args.type = XFS_ALLOCTYPE_START_BNO; | 3458 | args.type = XFS_ALLOCTYPE_START_BNO; |
3457 | args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); | 3459 | args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); |
@@ -3587,6 +3589,7 @@ xfs_bmap_local_to_extents( | |||
3587 | 3589 | ||
3588 | args.tp = tp; | 3590 | args.tp = tp; |
3589 | args.mp = ip->i_mount; | 3591 | args.mp = ip->i_mount; |
3592 | args.firstblock = *firstblock; | ||
3590 | ASSERT((ifp->if_flags & | 3593 | ASSERT((ifp->if_flags & |
3591 | (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); | 3594 | (XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE); |
3592 | /* | 3595 | /* |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index bea44709afbe..3b6dfc9b53af 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
@@ -1569,12 +1569,11 @@ xfs_bmbt_split( | |||
1569 | lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp)); | 1569 | lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp)); |
1570 | left = XFS_BUF_TO_BMBT_BLOCK(lbp); | 1570 | left = XFS_BUF_TO_BMBT_BLOCK(lbp); |
1571 | args.fsbno = cur->bc_private.b.firstblock; | 1571 | args.fsbno = cur->bc_private.b.firstblock; |
1572 | args.firstblock = args.fsbno; | ||
1572 | if (args.fsbno == NULLFSBLOCK) { | 1573 | if (args.fsbno == NULLFSBLOCK) { |
1573 | args.fsbno = lbno; | 1574 | args.fsbno = lbno; |
1574 | args.type = XFS_ALLOCTYPE_START_BNO; | 1575 | args.type = XFS_ALLOCTYPE_START_BNO; |
1575 | } else if (cur->bc_private.b.flist->xbf_low) | 1576 | } else |
1576 | args.type = XFS_ALLOCTYPE_FIRST_AG; | ||
1577 | else | ||
1578 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 1577 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
1579 | args.mod = args.minleft = args.alignment = args.total = args.isfl = | 1578 | args.mod = args.minleft = args.alignment = args.total = args.isfl = |
1580 | args.userdata = args.minalignslop = 0; | 1579 | args.userdata = args.minalignslop = 0; |
@@ -2356,6 +2355,7 @@ xfs_bmbt_newroot( | |||
2356 | args.userdata = args.minalignslop = 0; | 2355 | args.userdata = args.minalignslop = 0; |
2357 | args.minlen = args.maxlen = args.prod = 1; | 2356 | args.minlen = args.maxlen = args.prod = 1; |
2358 | args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; | 2357 | args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL; |
2358 | args.firstblock = args.fsbno; | ||
2359 | if (args.fsbno == NULLFSBLOCK) { | 2359 | if (args.fsbno == NULLFSBLOCK) { |
2360 | #ifdef DEBUG | 2360 | #ifdef DEBUG |
2361 | if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { | 2361 | if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) { |
@@ -2365,9 +2365,7 @@ xfs_bmbt_newroot( | |||
2365 | #endif | 2365 | #endif |
2366 | args.fsbno = INT_GET(*pp, ARCH_CONVERT); | 2366 | args.fsbno = INT_GET(*pp, ARCH_CONVERT); |
2367 | args.type = XFS_ALLOCTYPE_START_BNO; | 2367 | args.type = XFS_ALLOCTYPE_START_BNO; |
2368 | } else if (args.wasdel) | 2368 | } else |
2369 | args.type = XFS_ALLOCTYPE_FIRST_AG; | ||
2370 | else | ||
2371 | args.type = XFS_ALLOCTYPE_NEAR_BNO; | 2369 | args.type = XFS_ALLOCTYPE_NEAR_BNO; |
2372 | if ((error = xfs_alloc_vextent(&args))) { | 2370 | if ((error = xfs_alloc_vextent(&args))) { |
2373 | XFS_BMBT_TRACE_CURSOR(cur, ERROR); | 2371 | XFS_BMBT_TRACE_CURSOR(cur, ERROR); |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index c0b1c2906880..4b7be49cc4de 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -1254,6 +1254,26 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields) | |||
1254 | 1254 | ||
1255 | xfs_trans_log_buf(tp, bp, first, last); | 1255 | xfs_trans_log_buf(tp, bp, first, last); |
1256 | } | 1256 | } |
1257 | |||
1258 | /* | ||
1259 | * In order to avoid ENOSPC-related deadlock caused by | ||
1260 | * out-of-order locking of AGF buffer (PV 947395), we place | ||
1261 | * constraints on the relationship among actual allocations for | ||
1262 | * data blocks, freelist blocks, and potential file data bmap | ||
1263 | * btree blocks. However, these restrictions may result in no | ||
1264 | * actual space allocated for a delayed extent, for example, a data | ||
1265 | * block in a certain AG is allocated but there is no additional | ||
1266 | * block for the additional bmap btree block due to a split of the | ||
1267 | * bmap btree of the file. The result of this may lead to an | ||
1268 | * infinite loop in xfssyncd when the file gets flushed to disk and | ||
1269 | * all delayed extents need to be actually allocated. To get around | ||
1270 | * this, we explicitly set aside a few blocks which will not be | ||
1271 | * reserved in delayed allocation. Considering the minimum number of | ||
1272 | * needed freelist blocks is 4 fsbs, a potential split of file's bmap | ||
1273 | * btree requires 1 fsb, so we set the number of set-aside blocks to 8. | ||
1274 | */ | ||
1275 | #define SET_ASIDE_BLOCKS 8 | ||
1276 | |||
1257 | /* | 1277 | /* |
1258 | * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply | 1278 | * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply |
1259 | * a delta to a specified field in the in-core superblock. Simply | 1279 | * a delta to a specified field in the in-core superblock. Simply |
@@ -1298,7 +1318,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, | |||
1298 | return 0; | 1318 | return 0; |
1299 | case XFS_SBS_FDBLOCKS: | 1319 | case XFS_SBS_FDBLOCKS: |
1300 | 1320 | ||
1301 | lcounter = (long long)mp->m_sb.sb_fdblocks; | 1321 | lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS; |
1302 | res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); | 1322 | res_used = (long long)(mp->m_resblks - mp->m_resblks_avail); |
1303 | 1323 | ||
1304 | if (delta > 0) { /* Putting blocks back */ | 1324 | if (delta > 0) { /* Putting blocks back */ |
@@ -1332,7 +1352,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, | |||
1332 | } | 1352 | } |
1333 | } | 1353 | } |
1334 | 1354 | ||
1335 | mp->m_sb.sb_fdblocks = lcounter; | 1355 | mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS; |
1336 | return 0; | 1356 | return 0; |
1337 | case XFS_SBS_FREXTENTS: | 1357 | case XFS_SBS_FREXTENTS: |
1338 | lcounter = (long long)mp->m_sb.sb_frextents; | 1358 | lcounter = (long long)mp->m_sb.sb_frextents; |