aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_mount.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_mount.c')
-rw-r--r--fs/xfs/xfs_mount.c236
1 files changed, 185 insertions, 51 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index eb403b40e120..e79b56b4bca6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -201,6 +201,38 @@ xfs_uuid_unmount(
201 201
202 202
203/* 203/*
204 * Reference counting access wrappers to the perag structures.
205 */
206struct xfs_perag *
207xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
208{
209 struct xfs_perag *pag;
210 int ref = 0;
211
212 spin_lock(&mp->m_perag_lock);
213 pag = radix_tree_lookup(&mp->m_perag_tree, agno);
214 if (pag) {
215 ASSERT(atomic_read(&pag->pag_ref) >= 0);
216 /* catch leaks in the positive direction during testing */
217 ASSERT(atomic_read(&pag->pag_ref) < 1000);
218 ref = atomic_inc_return(&pag->pag_ref);
219 }
220 spin_unlock(&mp->m_perag_lock);
221 trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
222 return pag;
223}
224
225void
226xfs_perag_put(struct xfs_perag *pag)
227{
228 int ref;
229
230 ASSERT(atomic_read(&pag->pag_ref) > 0);
231 ref = atomic_dec_return(&pag->pag_ref);
232 trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
233}
234
235/*
204 * Free up the resources associated with a mount structure. Assume that 236 * Free up the resources associated with a mount structure. Assume that
205 * the structure was initially zeroed, so we can tell which fields got 237 * the structure was initially zeroed, so we can tell which fields got
206 * initialized. 238 * initialized.
@@ -209,13 +241,16 @@ STATIC void
209xfs_free_perag( 241xfs_free_perag(
210 xfs_mount_t *mp) 242 xfs_mount_t *mp)
211{ 243{
212 if (mp->m_perag) { 244 xfs_agnumber_t agno;
213 int agno; 245 struct xfs_perag *pag;
214 246
215 for (agno = 0; agno < mp->m_maxagi; agno++) 247 for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
216 if (mp->m_perag[agno].pagb_list) 248 spin_lock(&mp->m_perag_lock);
217 kmem_free(mp->m_perag[agno].pagb_list); 249 pag = radix_tree_delete(&mp->m_perag_tree, agno);
218 kmem_free(mp->m_perag); 250 ASSERT(pag);
251 ASSERT(atomic_read(&pag->pag_ref) == 0);
252 spin_unlock(&mp->m_perag_lock);
253 kmem_free(pag);
219 } 254 }
220} 255}
221 256
@@ -389,22 +424,57 @@ xfs_initialize_perag_icache(
389 } 424 }
390} 425}
391 426
392xfs_agnumber_t 427int
393xfs_initialize_perag( 428xfs_initialize_perag(
394 xfs_mount_t *mp, 429 xfs_mount_t *mp,
395 xfs_agnumber_t agcount) 430 xfs_agnumber_t agcount,
431 xfs_agnumber_t *maxagi)
396{ 432{
397 xfs_agnumber_t index, max_metadata; 433 xfs_agnumber_t index, max_metadata;
434 xfs_agnumber_t first_initialised = 0;
398 xfs_perag_t *pag; 435 xfs_perag_t *pag;
399 xfs_agino_t agino; 436 xfs_agino_t agino;
400 xfs_ino_t ino; 437 xfs_ino_t ino;
401 xfs_sb_t *sbp = &mp->m_sb; 438 xfs_sb_t *sbp = &mp->m_sb;
402 xfs_ino_t max_inum = XFS_MAXINUMBER_32; 439 xfs_ino_t max_inum = XFS_MAXINUMBER_32;
440 int error = -ENOMEM;
403 441
404 /* Check to see if the filesystem can overflow 32 bit inodes */ 442 /* Check to see if the filesystem can overflow 32 bit inodes */
405 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0); 443 agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
406 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino); 444 ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
407 445
446 /*
447 * Walk the current per-ag tree so we don't try to initialise AGs
448 * that already exist (growfs case). Allocate and insert all the
449 * AGs we don't find ready for initialisation.
450 */
451 for (index = 0; index < agcount; index++) {
452 pag = xfs_perag_get(mp, index);
453 if (pag) {
454 xfs_perag_put(pag);
455 continue;
456 }
457 if (!first_initialised)
458 first_initialised = index;
459 pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
460 if (!pag)
461 goto out_unwind;
462 if (radix_tree_preload(GFP_NOFS))
463 goto out_unwind;
464 spin_lock(&mp->m_perag_lock);
465 if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
466 BUG();
467 spin_unlock(&mp->m_perag_lock);
468 radix_tree_preload_end();
469 error = -EEXIST;
470 goto out_unwind;
471 }
472 pag->pag_agno = index;
473 pag->pag_mount = mp;
474 spin_unlock(&mp->m_perag_lock);
475 radix_tree_preload_end();
476 }
477
408 /* Clear the mount flag if no inode can overflow 32 bits 478 /* Clear the mount flag if no inode can overflow 32 bits
409 * on this filesystem, or if specifically requested.. 479 * on this filesystem, or if specifically requested..
410 */ 480 */
@@ -438,21 +508,33 @@ xfs_initialize_perag(
438 } 508 }
439 509
440 /* This ag is preferred for inodes */ 510 /* This ag is preferred for inodes */
441 pag = &mp->m_perag[index]; 511 pag = xfs_perag_get(mp, index);
442 pag->pagi_inodeok = 1; 512 pag->pagi_inodeok = 1;
443 if (index < max_metadata) 513 if (index < max_metadata)
444 pag->pagf_metadata = 1; 514 pag->pagf_metadata = 1;
445 xfs_initialize_perag_icache(pag); 515 xfs_initialize_perag_icache(pag);
516 xfs_perag_put(pag);
446 } 517 }
447 } else { 518 } else {
448 /* Setup default behavior for smaller filesystems */ 519 /* Setup default behavior for smaller filesystems */
449 for (index = 0; index < agcount; index++) { 520 for (index = 0; index < agcount; index++) {
450 pag = &mp->m_perag[index]; 521 pag = xfs_perag_get(mp, index);
451 pag->pagi_inodeok = 1; 522 pag->pagi_inodeok = 1;
452 xfs_initialize_perag_icache(pag); 523 xfs_initialize_perag_icache(pag);
524 xfs_perag_put(pag);
453 } 525 }
454 } 526 }
455 return index; 527 if (maxagi)
528 *maxagi = index;
529 return 0;
530
531out_unwind:
532 kmem_free(pag);
533 for (; index > first_initialised; index--) {
534 pag = radix_tree_delete(&mp->m_perag_tree, index);
535 kmem_free(pag);
536 }
537 return error;
456} 538}
457 539
458void 540void
@@ -583,7 +665,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
583 * access to the superblock. 665 * access to the superblock.
584 */ 666 */
585 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); 667 sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
586 extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED; 668 extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED;
587 669
588 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), 670 bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
589 extra_flags); 671 extra_flags);
@@ -731,12 +813,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
731 error = xfs_ialloc_pagi_init(mp, NULL, index); 813 error = xfs_ialloc_pagi_init(mp, NULL, index);
732 if (error) 814 if (error)
733 return error; 815 return error;
734 pag = &mp->m_perag[index]; 816 pag = xfs_perag_get(mp, index);
735 ifree += pag->pagi_freecount; 817 ifree += pag->pagi_freecount;
736 ialloc += pag->pagi_count; 818 ialloc += pag->pagi_count;
737 bfree += pag->pagf_freeblks; 819 bfree += pag->pagf_freeblks;
738 bfreelst += pag->pagf_flcount; 820 bfreelst += pag->pagf_flcount;
739 btree += pag->pagf_btreeblks; 821 btree += pag->pagf_btreeblks;
822 xfs_perag_put(pag);
740 } 823 }
741 /* 824 /*
742 * Overwrite incore superblock counters with just-read data 825 * Overwrite incore superblock counters with just-read data
@@ -1008,6 +1091,24 @@ xfs_mount_reset_sbqflags(
1008 return xfs_trans_commit(tp, 0); 1091 return xfs_trans_commit(tp, 0);
1009} 1092}
1010 1093
1094__uint64_t
1095xfs_default_resblks(xfs_mount_t *mp)
1096{
1097 __uint64_t resblks;
1098
1099 /*
1100 * We default to 5% or 8192 fsbs of space reserved, whichever is
1101 * smaller. This is intended to cover concurrent allocation
1102 * transactions when we initially hit enospc. These each require a 4
1103 * block reservation. Hence by default we cover roughly 2000 concurrent
1104 * allocation reservations.
1105 */
1106 resblks = mp->m_sb.sb_dblocks;
1107 do_div(resblks, 20);
1108 resblks = min_t(__uint64_t, resblks, 8192);
1109 return resblks;
1110}
1111
1011/* 1112/*
1012 * This function does the following on an initial mount of a file system: 1113 * This function does the following on an initial mount of a file system:
1013 * - reads the superblock from disk and init the mount struct 1114 * - reads the superblock from disk and init the mount struct
@@ -1152,13 +1253,13 @@ xfs_mountfs(
1152 /* 1253 /*
1153 * Allocate and initialize the per-ag data. 1254 * Allocate and initialize the per-ag data.
1154 */ 1255 */
1155 init_rwsem(&mp->m_peraglock); 1256 spin_lock_init(&mp->m_perag_lock);
1156 mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t), 1257 INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
1157 KM_MAYFAIL); 1258 error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1158 if (!mp->m_perag) 1259 if (error) {
1260 cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
1159 goto out_remove_uuid; 1261 goto out_remove_uuid;
1160 1262 }
1161 mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
1162 1263
1163 if (!sbp->sb_logblocks) { 1264 if (!sbp->sb_logblocks) {
1164 cmn_err(CE_WARN, "XFS: no log defined"); 1265 cmn_err(CE_WARN, "XFS: no log defined");
@@ -1319,17 +1420,16 @@ xfs_mountfs(
1319 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations 1420 * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
1320 * are not allowed to use this reserved space. 1421 * are not allowed to use this reserved space.
1321 * 1422 *
1322 * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1323 * This may drive us straight to ENOSPC on mount, but that implies 1423 * This may drive us straight to ENOSPC on mount, but that implies
1324 * we were already there on the last unmount. Warn if this occurs. 1424 * we were already there on the last unmount. Warn if this occurs.
1325 */ 1425 */
1326 resblks = mp->m_sb.sb_dblocks; 1426 if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1327 do_div(resblks, 20); 1427 resblks = xfs_default_resblks(mp);
1328 resblks = min_t(__uint64_t, resblks, 1024); 1428 error = xfs_reserve_blocks(mp, &resblks, NULL);
1329 error = xfs_reserve_blocks(mp, &resblks, NULL); 1429 if (error)
1330 if (error) 1430 cmn_err(CE_WARN, "XFS: Unable to allocate reserve "
1331 cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. " 1431 "blocks. Continuing without a reserve pool.");
1332 "Continuing without a reserve pool."); 1432 }
1333 1433
1334 return 0; 1434 return 0;
1335 1435
@@ -1372,8 +1472,19 @@ xfs_unmountfs(
1372 * push out the iclog we will never get that unlocked. hence we 1472 * push out the iclog we will never get that unlocked. hence we
1373 * need to force the log first. 1473 * need to force the log first.
1374 */ 1474 */
1375 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1475 xfs_log_force(mp, XFS_LOG_SYNC);
1376 xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC); 1476
1477 /*
1478 * Do a delwri reclaim pass first so that as many dirty inodes are
1479 * queued up for IO as possible. Then flush the buffers before making
1480 * a synchronous path to catch all the remaining inodes are reclaimed.
1481 * This makes the reclaim process as quick as possible by avoiding
1482 * synchronous writeout and blocking on inodes already in the delwri
1483 * state as much as possible.
1484 */
1485 xfs_reclaim_inodes(mp, 0);
1486 XFS_bflush(mp->m_ddev_targp);
1487 xfs_reclaim_inodes(mp, SYNC_WAIT);
1377 1488
1378 xfs_qm_unmount(mp); 1489 xfs_qm_unmount(mp);
1379 1490
@@ -1382,7 +1493,7 @@ xfs_unmountfs(
1382 * that nothing is pinned. This is important because bflush() 1493 * that nothing is pinned. This is important because bflush()
1383 * will skip pinned buffers. 1494 * will skip pinned buffers.
1384 */ 1495 */
1385 xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC); 1496 xfs_log_force(mp, XFS_LOG_SYNC);
1386 1497
1387 xfs_binval(mp->m_ddev_targp); 1498 xfs_binval(mp->m_ddev_targp);
1388 if (mp->m_rtdev_targp) { 1499 if (mp->m_rtdev_targp) {
@@ -1548,15 +1659,14 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1548 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields); 1659 xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
1549 1660
1550 /* find modified range */ 1661 /* find modified range */
1662 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1663 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1664 last = xfs_sb_info[f + 1].offset - 1;
1551 1665
1552 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields); 1666 f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
1553 ASSERT((1LL << f) & XFS_SB_MOD_BITS); 1667 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1554 first = xfs_sb_info[f].offset; 1668 first = xfs_sb_info[f].offset;
1555 1669
1556 f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1557 ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1558 last = xfs_sb_info[f + 1].offset - 1;
1559
1560 xfs_trans_log_buf(tp, bp, first, last); 1670 xfs_trans_log_buf(tp, bp, first, last);
1561} 1671}
1562 1672
@@ -1620,26 +1730,30 @@ xfs_mod_incore_sb_unlocked(
1620 lcounter += rem; 1730 lcounter += rem;
1621 } 1731 }
1622 } else { /* Taking blocks away */ 1732 } else { /* Taking blocks away */
1623
1624 lcounter += delta; 1733 lcounter += delta;
1734 if (lcounter >= 0) {
1735 mp->m_sb.sb_fdblocks = lcounter +
1736 XFS_ALLOC_SET_ASIDE(mp);
1737 return 0;
1738 }
1625 1739
1626 /* 1740 /*
1627 * If were out of blocks, use any available reserved blocks if 1741 * We are out of blocks, use any available reserved
1628 * were allowed to. 1742 * blocks if were allowed to.
1629 */ 1743 */
1744 if (!rsvd)
1745 return XFS_ERROR(ENOSPC);
1630 1746
1631 if (lcounter < 0) { 1747 lcounter = (long long)mp->m_resblks_avail + delta;
1632 if (rsvd) { 1748 if (lcounter >= 0) {
1633 lcounter = (long long)mp->m_resblks_avail + delta; 1749 mp->m_resblks_avail = lcounter;
1634 if (lcounter < 0) { 1750 return 0;
1635 return XFS_ERROR(ENOSPC);
1636 }
1637 mp->m_resblks_avail = lcounter;
1638 return 0;
1639 } else { /* not reserved */
1640 return XFS_ERROR(ENOSPC);
1641 }
1642 } 1751 }
1752 printk_once(KERN_WARNING
1753 "Filesystem \"%s\": reserve blocks depleted! "
1754 "Consider increasing reserve pool size.",
1755 mp->m_fsname);
1756 return XFS_ERROR(ENOSPC);
1643 } 1757 }
1644 1758
1645 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp); 1759 mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
@@ -1887,7 +2001,7 @@ xfs_getsb(
1887 2001
1888 ASSERT(mp->m_sb_bp != NULL); 2002 ASSERT(mp->m_sb_bp != NULL);
1889 bp = mp->m_sb_bp; 2003 bp = mp->m_sb_bp;
1890 if (flags & XFS_BUF_TRYLOCK) { 2004 if (flags & XBF_TRYLOCK) {
1891 if (!XFS_BUF_CPSEMA(bp)) { 2005 if (!XFS_BUF_CPSEMA(bp)) {
1892 return NULL; 2006 return NULL;
1893 } 2007 }
@@ -1947,6 +2061,26 @@ xfs_mount_log_sb(
1947 return error; 2061 return error;
1948} 2062}
1949 2063
2064/*
2065 * If the underlying (data/log/rt) device is readonly, there are some
2066 * operations that cannot proceed.
2067 */
2068int
2069xfs_dev_is_read_only(
2070 struct xfs_mount *mp,
2071 char *message)
2072{
2073 if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
2074 xfs_readonly_buftarg(mp->m_logdev_targp) ||
2075 (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
2076 cmn_err(CE_NOTE,
2077 "XFS: %s required on read-only device.", message);
2078 cmn_err(CE_NOTE,
2079 "XFS: write access unavailable, cannot proceed.");
2080 return EROFS;
2081 }
2082 return 0;
2083}
1950 2084
1951#ifdef HAVE_PERCPU_SB 2085#ifdef HAVE_PERCPU_SB
1952/* 2086/*