1 files changed, 185 insertions, 51 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index eb403b40e120..e79b56b4bca6 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -201,6 +201,38 @@ xfs_uuid_unmount(
 /*
+ * Reference counting access wrappers to the perag structures.
+ */
+struct xfs_perag *
+xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
+{
+        struct xfs_perag        *pag;
+        int                     ref = 0;
+        spin_lock(&mp->m_perag_lock);
+        pag = radix_tree_lookup(&mp->m_perag_tree, agno);
+        if (pag) {
+                ASSERT(atomic_read(&pag->pag_ref) >= 0);
+                /* catch leaks in the positive direction during testing */
+                ASSERT(atomic_read(&pag->pag_ref) < 1000);
+                ref = atomic_inc_return(&pag->pag_ref);
+        }
+        spin_unlock(&mp->m_perag_lock);
+        trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
+        return pag;
+}
+void
+xfs_perag_put(struct xfs_perag *pag)
+{
+        int     ref;
+        ASSERT(atomic_read(&pag->pag_ref) > 0);
+        ref = atomic_dec_return(&pag->pag_ref);
+        trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
+}
+/*
 * Free up the resources associated with a mount structure.  Assume that
 * the structure was initially zeroed, so we can tell which fields got
 * initialized.
@@ -209,13 +241,16 @@ STATIC void
 xfs_free_perag(
        xfs_mount_t     *mp)
 {
-        if (mp->m_perag) {
+        xfs_agnumber_t  agno;
-                int     agno;
+        struct xfs_perag *pag;
-                for (agno = 0; agno < mp->m_maxagi; agno++)
+        for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
-                        if (mp->m_perag[agno].pagb_list)
+                spin_lock(&mp->m_perag_lock);
-                                kmem_free(mp->m_perag[agno].pagb_list);
+                pag = radix_tree_delete(&mp->m_perag_tree, agno);
-                kmem_free(mp->m_perag);
+                ASSERT(pag);
+                ASSERT(atomic_read(&pag->pag_ref) == 0);
+                spin_unlock(&mp->m_perag_lock);
+                kmem_free(pag);
        }
 }
@@ -389,22 +424,57 @@ xfs_initialize_perag_icache(
        }
 }
-xfs_agnumber_t
+int
 xfs_initialize_perag(
        xfs_mount_t     *mp,
-        xfs_agnumber_t  agcount)
+        xfs_agnumber_t  agcount,
+        xfs_agnumber_t  *maxagi)
 {
        xfs_agnumber_t  index, max_metadata;
+        xfs_agnumber_t  first_initialised = 0;
        xfs_perag_t     *pag;
        xfs_agino_t     agino;
        xfs_ino_t       ino;
        xfs_sb_t        *sbp = &mp->m_sb;
        xfs_ino_t       max_inum = XFS_MAXINUMBER_32;
+        int             error = -ENOMEM;
        /* Check to see if the filesystem can overflow 32 bit inodes */
        agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
        ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
+        /*
+         * Walk the current per-ag tree so we don't try to initialise AGs
+         * that already exist (growfs case). Allocate and insert all the
+         * AGs we don't find ready for initialisation.
+         */
+        for (index = 0; index < agcount; index++) {
+                pag = xfs_perag_get(mp, index);
+                if (pag) {
+                        xfs_perag_put(pag);
+                        continue;
+                }
+                if (!first_initialised)
+                        first_initialised = index;
+                pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
+                if (!pag)
+                        goto out_unwind;
+                if (radix_tree_preload(GFP_NOFS))
+                        goto out_unwind;
+                spin_lock(&mp->m_perag_lock);
+                if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
+                        BUG();
+                        spin_unlock(&mp->m_perag_lock);
+                        radix_tree_preload_end();
+                        error = -EEXIST;
+                        goto out_unwind;
+                }
+                pag->pag_agno = index;
+                pag->pag_mount = mp;
+                spin_unlock(&mp->m_perag_lock);
+                radix_tree_preload_end();
+        }
        /* Clear the mount flag if no inode can overflow 32 bits
         * on this filesystem, or if specifically requested..
         */
@@ -438,21 +508,33 @@ xfs_initialize_perag(
                        }
                        /* This ag is preferred for inodes */
-                        pag = &mp->m_perag[index];
+                        pag = xfs_perag_get(mp, index);
                        pag->pagi_inodeok = 1;
                        if (index < max_metadata)
                                pag->pagf_metadata = 1;
                        xfs_initialize_perag_icache(pag);
+                        xfs_perag_put(pag);
                }
        } else {
                /* Setup default behavior for smaller filesystems */
                for (index = 0; index < agcount; index++) {
-                        pag = &mp->m_perag[index];
+                        pag = xfs_perag_get(mp, index);
                        pag->pagi_inodeok = 1;
                        xfs_initialize_perag_icache(pag);
+                        xfs_perag_put(pag);
                }
        }
-        return index;
+        if (maxagi)
+                *maxagi = index;
+        return 0;
+out_unwind:
+        kmem_free(pag);
+        for (; index > first_initialised; index--) {
+                pag = radix_tree_delete(&mp->m_perag_tree, index);
+                kmem_free(pag);
+        }
+        return error;
 }
 void
@@ -583,7 +665,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
         * access to the superblock.
         */
        sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
-        extra_flags = XFS_BUF_LOCK | XFS_BUF_MANAGE | XFS_BUF_MAPPED;
+        extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED;
        bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
                          extra_flags);
@@ -731,12 +813,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
                error = xfs_ialloc_pagi_init(mp, NULL, index);
                if (error)
                        return error;
-                pag = &mp->m_perag[index];
+                pag = xfs_perag_get(mp, index);
                ifree += pag->pagi_freecount;
                ialloc += pag->pagi_count;
                bfree += pag->pagf_freeblks;
                bfreelst += pag->pagf_flcount;
                btree += pag->pagf_btreeblks;
+                xfs_perag_put(pag);
        }
        /*
         * Overwrite incore superblock counters with just-read data
@@ -1008,6 +1091,24 @@ xfs_mount_reset_sbqflags(
        return xfs_trans_commit(tp, 0);
 }
+__uint64_t
+xfs_default_resblks(xfs_mount_t *mp)
+{
+        __uint64_t resblks;
+        /*
+         * We default to 5% or 8192 fsbs of space reserved, whichever is
+         * smaller.  This is intended to cover concurrent allocation
+         * transactions when we initially hit enospc. These each require a 4
+         * block reservation. Hence by default we cover roughly 2000 concurrent
+         * allocation reservations.
+         */
+        resblks = mp->m_sb.sb_dblocks;
+        do_div(resblks, 20);
+        resblks = min_t(__uint64_t, resblks, 8192);
+        return resblks;
+}
 /*
 * This function does the following on an initial mount of a file system:
 *      - reads the superblock from disk and init the mount struct
@@ -1152,13 +1253,13 @@ xfs_mountfs(
        /*
         * Allocate and initialize the per-ag data.
         */
-        init_rwsem(&mp->m_peraglock);
+        spin_lock_init(&mp->m_perag_lock);
-        mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t),
+        INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
-                                  KM_MAYFAIL);
+        error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
-        if (!mp->m_perag)
+        if (error) {
+                cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
                goto out_remove_uuid;
+        }
-        mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
        if (!sbp->sb_logblocks) {
                cmn_err(CE_WARN, "XFS: no log defined");
@@ -1319,17 +1420,16 @@ xfs_mountfs(
         * attr, unwritten extent conversion at ENOSPC, etc. Data allocations
         * are not allowed to use this reserved space.
         *
-         * We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
         * This may drive us straight to ENOSPC on mount, but that implies
         * we were already there on the last unmount. Warn if this occurs.
         */
-        resblks = mp->m_sb.sb_dblocks;
+        if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
-        do_div(resblks, 20);
+                resblks = xfs_default_resblks(mp);
-        resblks = min_t(__uint64_t, resblks, 1024);
+                error = xfs_reserve_blocks(mp, &resblks, NULL);
-        error = xfs_reserve_blocks(mp, &resblks, NULL);
+                if (error)
-        if (error)
+                        cmn_err(CE_WARN, "XFS: Unable to allocate reserve "
-                cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. "
+                                "blocks. Continuing without a reserve pool.");
-                                "Continuing without a reserve pool.");
+        }
        return 0;
@@ -1372,8 +1472,19 @@ xfs_unmountfs(
         * push out the iclog we will never get that unlocked. hence we
         * need to force the log first.
         */
-        xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
+        xfs_log_force(mp, XFS_LOG_SYNC);
-        xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);
+        /*
+         * Do a delwri reclaim pass first so that as many dirty inodes are
+         * queued up for IO as possible. Then flush the buffers before making
+         * a synchronous path to catch all the remaining inodes are reclaimed.
+         * This makes the reclaim process as quick as possible by avoiding
+         * synchronous writeout and blocking on inodes already in the delwri
+         * state as much as possible.
+         */
+        xfs_reclaim_inodes(mp, 0);
+        XFS_bflush(mp->m_ddev_targp);
+        xfs_reclaim_inodes(mp, SYNC_WAIT);
        xfs_qm_unmount(mp);
@@ -1382,7 +1493,7 @@ xfs_unmountfs(
         * that nothing is pinned.  This is important because bflush()
         * will skip pinned buffers.
         */
-        xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE | XFS_LOG_SYNC);
+        xfs_log_force(mp, XFS_LOG_SYNC);
        xfs_binval(mp->m_ddev_targp);
        if (mp->m_rtdev_targp) {
@@ -1548,15 +1659,14 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
        xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
        /* find modified range */
+        f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
+        ASSERT((1LL << f) & XFS_SB_MOD_BITS);
+        last = xfs_sb_info[f + 1].offset - 1;
        f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
        ASSERT((1LL << f) & XFS_SB_MOD_BITS);
        first = xfs_sb_info[f].offset;
-        f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
-        ASSERT((1LL << f) & XFS_SB_MOD_BITS);
-        last = xfs_sb_info[f + 1].offset - 1;
        xfs_trans_log_buf(tp, bp, first, last);
 }
@@ -1620,26 +1730,30 @@ xfs_mod_incore_sb_unlocked(
                                lcounter += rem;
                        }
                } else {                                /* Taking blocks away */
                        lcounter += delta;
+                        if (lcounter >= 0) {
+                                mp->m_sb.sb_fdblocks = lcounter +
+                                                        XFS_ALLOC_SET_ASIDE(mp);
+                                return 0;
+                        }
-                /*
+                        /*
-                 * If were out of blocks, use any available reserved blocks if
+                         * We are out of blocks, use any available reserved
-                 * were allowed to.
+                         * blocks if were allowed to.
-                 */
+                         */
+                        if (!rsvd)
+                                return XFS_ERROR(ENOSPC);
-                        if (lcounter < 0) {
+                        lcounter = (long long)mp->m_resblks_avail + delta;
-                                if (rsvd) {
+                        if (lcounter >= 0) {
-                                        lcounter = (long long)mp->m_resblks_avail + delta;
+                                mp->m_resblks_avail = lcounter;
-                                        if (lcounter < 0) {
+                                return 0;
-                                                return XFS_ERROR(ENOSPC);
-                                        }
-                                        mp->m_resblks_avail = lcounter;
-                                        return 0;
-                                } else {        /* not reserved */
-                                        return XFS_ERROR(ENOSPC);
-                                }
                        }
+                        printk_once(KERN_WARNING
+                                "Filesystem \"%s\": reserve blocks depleted! "
+                                "Consider increasing reserve pool size.",
+                                mp->m_fsname);
+                        return XFS_ERROR(ENOSPC);
                }
                mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
@@ -1887,7 +2001,7 @@ xfs_getsb(
        ASSERT(mp->m_sb_bp != NULL);
        bp = mp->m_sb_bp;
-        if (flags & XFS_BUF_TRYLOCK) {
+        if (flags & XBF_TRYLOCK) {
                if (!XFS_BUF_CPSEMA(bp)) {
                        return NULL;
                }
@@ -1947,6 +2061,26 @@ xfs_mount_log_sb(
        return error;
 }
+/*
+ * If the underlying (data/log/rt) device is readonly, there are some
+ * operations that cannot proceed.
+ */
+int
+xfs_dev_is_read_only(
+        struct xfs_mount        *mp,
+        char                    *message)
+{
+        if (xfs_readonly_buftarg(mp->m_ddev_targp) ||
+            xfs_readonly_buftarg(mp->m_logdev_targp) ||
+            (mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
+                cmn_err(CE_NOTE,
+                        "XFS: %s required on read-only device.", message);
+                cmn_err(CE_NOTE,
+                        "XFS: write access unavailable, cannot proceed.");
+                return EROFS;
+        }
+        return 0;
+}
 #ifdef HAVE_PERCPU_SB
 /*

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index eb403b40e120..e79b56b4bca6 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c
@@ -201,6 +201,38 @@ xfs_uuid_unmount(
201		201
202		202
203	/*	203	/*
		204	* Reference counting access wrappers to the perag structures.
		205	*/
		206	struct xfs_perag *
		207	xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno)
		208	{
		209	struct xfs_perag *pag;
		210	int ref = 0;
		211
		212	spin_lock(&mp->m_perag_lock);
		213	pag = radix_tree_lookup(&mp->m_perag_tree, agno);
		214	if (pag) {
		215	ASSERT(atomic_read(&pag->pag_ref) >= 0);
		216	/* catch leaks in the positive direction during testing */
		217	ASSERT(atomic_read(&pag->pag_ref) < 1000);
		218	ref = atomic_inc_return(&pag->pag_ref);
		219	}
		220	spin_unlock(&mp->m_perag_lock);
		221	trace_xfs_perag_get(mp, agno, ref, _RET_IP_);
		222	return pag;
		223	}
		224
		225	void
		226	xfs_perag_put(struct xfs_perag *pag)
		227	{
		228	int ref;
		229
		230	ASSERT(atomic_read(&pag->pag_ref) > 0);
		231	ref = atomic_dec_return(&pag->pag_ref);
		232	trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_);
		233	}
		234
		235	/*
204	* Free up the resources associated with a mount structure. Assume that	236	* Free up the resources associated with a mount structure. Assume that
205	* the structure was initially zeroed, so we can tell which fields got	237	* the structure was initially zeroed, so we can tell which fields got
206	* initialized.	238	* initialized.
@@ -209,13 +241,16 @@ STATIC void
209	xfs_free_perag(	241	xfs_free_perag(
210	xfs_mount_t *mp)	242	xfs_mount_t *mp)
211	{	243	{
212	if (mp->m_perag) {	244	xfs_agnumber_t agno;
213	int agno;	245	struct xfs_perag *pag;
214		246
215	for (agno = 0; agno < mp->m_maxagi; agno++)	247	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
216	if (mp->m_perag[agno].pagb_list)	248	spin_lock(&mp->m_perag_lock);
217	kmem_free(mp->m_perag[agno].pagb_list);	249	pag = radix_tree_delete(&mp->m_perag_tree, agno);
218	kmem_free(mp->m_perag);	250	ASSERT(pag);
		251	ASSERT(atomic_read(&pag->pag_ref) == 0);
		252	spin_unlock(&mp->m_perag_lock);
		253	kmem_free(pag);
219	}	254	}
220	}	255	}
221		256
@@ -389,22 +424,57 @@ xfs_initialize_perag_icache(
389	}	424	}
390	}	425	}
391		426
392	xfs_agnumber_t	427	int
393	xfs_initialize_perag(	428	xfs_initialize_perag(
394	xfs_mount_t *mp,	429	xfs_mount_t *mp,
395	xfs_agnumber_t agcount)	430	xfs_agnumber_t agcount,
		431	xfs_agnumber_t *maxagi)
396	{	432	{
397	xfs_agnumber_t index, max_metadata;	433	xfs_agnumber_t index, max_metadata;
		434	xfs_agnumber_t first_initialised = 0;
398	xfs_perag_t *pag;	435	xfs_perag_t *pag;
399	xfs_agino_t agino;	436	xfs_agino_t agino;
400	xfs_ino_t ino;	437	xfs_ino_t ino;
401	xfs_sb_t *sbp = &mp->m_sb;	438	xfs_sb_t *sbp = &mp->m_sb;
402	xfs_ino_t max_inum = XFS_MAXINUMBER_32;	439	xfs_ino_t max_inum = XFS_MAXINUMBER_32;
		440	int error = -ENOMEM;
403		441
404	/* Check to see if the filesystem can overflow 32 bit inodes */	442	/* Check to see if the filesystem can overflow 32 bit inodes */
405	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);	443	agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
406	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);	444	ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
407		445
		446	/*
		447	* Walk the current per-ag tree so we don't try to initialise AGs
		448	* that already exist (growfs case). Allocate and insert all the
		449	* AGs we don't find ready for initialisation.
		450	*/
		451	for (index = 0; index < agcount; index++) {
		452	pag = xfs_perag_get(mp, index);
		453	if (pag) {
		454	xfs_perag_put(pag);
		455	continue;
		456	}
		457	if (!first_initialised)
		458	first_initialised = index;
		459	pag = kmem_zalloc(sizeof(*pag), KM_MAYFAIL);
		460	if (!pag)
		461	goto out_unwind;
		462	if (radix_tree_preload(GFP_NOFS))
		463	goto out_unwind;
		464	spin_lock(&mp->m_perag_lock);
		465	if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
		466	BUG();
		467	spin_unlock(&mp->m_perag_lock);
		468	radix_tree_preload_end();
		469	error = -EEXIST;
		470	goto out_unwind;
		471	}
		472	pag->pag_agno = index;
		473	pag->pag_mount = mp;
		474	spin_unlock(&mp->m_perag_lock);
		475	radix_tree_preload_end();
		476	}
		477
408	/* Clear the mount flag if no inode can overflow 32 bits	478	/* Clear the mount flag if no inode can overflow 32 bits
409	* on this filesystem, or if specifically requested..	479	* on this filesystem, or if specifically requested..
410	*/	480	*/
@@ -438,21 +508,33 @@ xfs_initialize_perag(
438	}	508	}
439		509
440	/* This ag is preferred for inodes */	510	/* This ag is preferred for inodes */
441	pag = &mp->m_perag[index];	511	pag = xfs_perag_get(mp, index);
442	pag->pagi_inodeok = 1;	512	pag->pagi_inodeok = 1;
443	if (index < max_metadata)	513	if (index < max_metadata)
444	pag->pagf_metadata = 1;	514	pag->pagf_metadata = 1;
445	xfs_initialize_perag_icache(pag);	515	xfs_initialize_perag_icache(pag);
		516	xfs_perag_put(pag);
446	}	517	}
447	} else {	518	} else {
448	/* Setup default behavior for smaller filesystems */	519	/* Setup default behavior for smaller filesystems */
449	for (index = 0; index < agcount; index++) {	520	for (index = 0; index < agcount; index++) {
450	pag = &mp->m_perag[index];	521	pag = xfs_perag_get(mp, index);
451	pag->pagi_inodeok = 1;	522	pag->pagi_inodeok = 1;
452	xfs_initialize_perag_icache(pag);	523	xfs_initialize_perag_icache(pag);
		524	xfs_perag_put(pag);
453	}	525	}
454	}	526	}
455	return index;	527	if (maxagi)
		528	*maxagi = index;
		529	return 0;
		530
		531	out_unwind:
		532	kmem_free(pag);
		533	for (; index > first_initialised; index--) {
		534	pag = radix_tree_delete(&mp->m_perag_tree, index);
		535	kmem_free(pag);
		536	}
		537	return error;
456	}	538	}
457		539
458	void	540	void
@@ -583,7 +665,7 @@ xfs_readsb(xfs_mount_t *mp, int flags)
583	* access to the superblock.	665	* access to the superblock.
584	*/	666	*/
585	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);	667	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp);
586	extra_flags = XFS_BUF_LOCK \| XFS_BUF_MANAGE \| XFS_BUF_MAPPED;	668	extra_flags = XBF_LOCK \| XBF_FS_MANAGED \| XBF_MAPPED;
587		669
588	bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),	670	bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size),
589	extra_flags);	671	extra_flags);
@@ -731,12 +813,13 @@ xfs_initialize_perag_data(xfs_mount_t *mp, xfs_agnumber_t agcount)
731	error = xfs_ialloc_pagi_init(mp, NULL, index);	813	error = xfs_ialloc_pagi_init(mp, NULL, index);
732	if (error)	814	if (error)
733	return error;	815	return error;
734	pag = &mp->m_perag[index];	816	pag = xfs_perag_get(mp, index);
735	ifree += pag->pagi_freecount;	817	ifree += pag->pagi_freecount;
736	ialloc += pag->pagi_count;	818	ialloc += pag->pagi_count;
737	bfree += pag->pagf_freeblks;	819	bfree += pag->pagf_freeblks;
738	bfreelst += pag->pagf_flcount;	820	bfreelst += pag->pagf_flcount;
739	btree += pag->pagf_btreeblks;	821	btree += pag->pagf_btreeblks;
		822	xfs_perag_put(pag);
740	}	823	}
741	/*	824	/*
742	* Overwrite incore superblock counters with just-read data	825	* Overwrite incore superblock counters with just-read data
@@ -1008,6 +1091,24 @@ xfs_mount_reset_sbqflags(
1008	return xfs_trans_commit(tp, 0);	1091	return xfs_trans_commit(tp, 0);
1009	}	1092	}
1010		1093
		1094	__uint64_t
		1095	xfs_default_resblks(xfs_mount_t *mp)
		1096	{
		1097	__uint64_t resblks;
		1098
		1099	/*
		1100	* We default to 5% or 8192 fsbs of space reserved, whichever is
		1101	* smaller. This is intended to cover concurrent allocation
		1102	* transactions when we initially hit enospc. These each require a 4
		1103	* block reservation. Hence by default we cover roughly 2000 concurrent
		1104	* allocation reservations.
		1105	*/
		1106	resblks = mp->m_sb.sb_dblocks;
		1107	do_div(resblks, 20);
		1108	resblks = min_t(__uint64_t, resblks, 8192);
		1109	return resblks;
		1110	}
		1111
1011	/*	1112	/*
1012	* This function does the following on an initial mount of a file system:	1113	* This function does the following on an initial mount of a file system:
1013	* - reads the superblock from disk and init the mount struct	1114	* - reads the superblock from disk and init the mount struct
@@ -1152,13 +1253,13 @@ xfs_mountfs(
1152	/*	1253	/*
1153	* Allocate and initialize the per-ag data.	1254	* Allocate and initialize the per-ag data.
1154	*/	1255	*/
1155	init_rwsem(&mp->m_peraglock);	1256	spin_lock_init(&mp->m_perag_lock);
1156	mp->m_perag = kmem_zalloc(sbp->sb_agcount * sizeof(xfs_perag_t),	1257	INIT_RADIX_TREE(&mp->m_perag_tree, GFP_NOFS);
1157	KM_MAYFAIL);	1258	error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
1158	if (!mp->m_perag)	1259	if (error) {
		1260	cmn_err(CE_WARN, "XFS: Failed per-ag init: %d", error);
1159	goto out_remove_uuid;	1261	goto out_remove_uuid;
1160		1262	}
1161	mp->m_maxagi = xfs_initialize_perag(mp, sbp->sb_agcount);
1162		1263
1163	if (!sbp->sb_logblocks) {	1264	if (!sbp->sb_logblocks) {
1164	cmn_err(CE_WARN, "XFS: no log defined");	1265	cmn_err(CE_WARN, "XFS: no log defined");
@@ -1319,17 +1420,16 @@ xfs_mountfs(
1319	* attr, unwritten extent conversion at ENOSPC, etc. Data allocations	1420	* attr, unwritten extent conversion at ENOSPC, etc. Data allocations
1320	* are not allowed to use this reserved space.	1421	* are not allowed to use this reserved space.
1321	*	1422	*
1322	* We default to 5% or 1024 fsbs of space reserved, whichever is smaller.
1323	* This may drive us straight to ENOSPC on mount, but that implies	1423	* This may drive us straight to ENOSPC on mount, but that implies
1324	* we were already there on the last unmount. Warn if this occurs.	1424	* we were already there on the last unmount. Warn if this occurs.
1325	*/	1425	*/
1326	resblks = mp->m_sb.sb_dblocks;	1426	if (!(mp->m_flags & XFS_MOUNT_RDONLY)) {
1327	do_div(resblks, 20);	1427	resblks = xfs_default_resblks(mp);
1328	resblks = min_t(__uint64_t, resblks, 1024);	1428	error = xfs_reserve_blocks(mp, &resblks, NULL);
1329	error = xfs_reserve_blocks(mp, &resblks, NULL);	1429	if (error)
1330	if (error)	1430	cmn_err(CE_WARN, "XFS: Unable to allocate reserve "
1331	cmn_err(CE_WARN, "XFS: Unable to allocate reserve blocks. "	1431	"blocks. Continuing without a reserve pool.");
1332	"Continuing without a reserve pool.");	1432	}
1333		1433
1334	return 0;	1434	return 0;
1335		1435
@@ -1372,8 +1472,19 @@ xfs_unmountfs(
1372	* push out the iclog we will never get that unlocked. hence we	1472	* push out the iclog we will never get that unlocked. hence we
1373	* need to force the log first.	1473	* need to force the log first.
1374	*/	1474	*/
1375	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE \| XFS_LOG_SYNC);	1475	xfs_log_force(mp, XFS_LOG_SYNC);
1376	xfs_reclaim_inodes(mp, XFS_IFLUSH_ASYNC);	1476
		1477	/*
		1478	* Do a delwri reclaim pass first so that as many dirty inodes are
		1479	* queued up for IO as possible. Then flush the buffers before making
		1480	* a synchronous path to catch all the remaining inodes are reclaimed.
		1481	* This makes the reclaim process as quick as possible by avoiding
		1482	* synchronous writeout and blocking on inodes already in the delwri
		1483	* state as much as possible.
		1484	*/
		1485	xfs_reclaim_inodes(mp, 0);
		1486	XFS_bflush(mp->m_ddev_targp);
		1487	xfs_reclaim_inodes(mp, SYNC_WAIT);
1377		1488
1378	xfs_qm_unmount(mp);	1489	xfs_qm_unmount(mp);
1379		1490
@@ -1382,7 +1493,7 @@ xfs_unmountfs(
1382	* that nothing is pinned. This is important because bflush()	1493	* that nothing is pinned. This is important because bflush()
1383	* will skip pinned buffers.	1494	* will skip pinned buffers.
1384	*/	1495	*/
1385	xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE \| XFS_LOG_SYNC);	1496	xfs_log_force(mp, XFS_LOG_SYNC);
1386		1497
1387	xfs_binval(mp->m_ddev_targp);	1498	xfs_binval(mp->m_ddev_targp);
1388	if (mp->m_rtdev_targp) {	1499	if (mp->m_rtdev_targp) {
@@ -1548,15 +1659,14 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1548	xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);	1659	xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, fields);
1549		1660
1550	/* find modified range */	1661	/* find modified range */
		1662	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
		1663	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
		1664	last = xfs_sb_info[f + 1].offset - 1;
1551		1665
1552	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);	1666	f = (xfs_sb_field_t)xfs_lowbit64((__uint64_t)fields);
1553	ASSERT((1LL << f) & XFS_SB_MOD_BITS);	1667	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1554	first = xfs_sb_info[f].offset;	1668	first = xfs_sb_info[f].offset;
1555		1669
1556	f = (xfs_sb_field_t)xfs_highbit64((__uint64_t)fields);
1557	ASSERT((1LL << f) & XFS_SB_MOD_BITS);
1558	last = xfs_sb_info[f + 1].offset - 1;
1559
1560	xfs_trans_log_buf(tp, bp, first, last);	1670	xfs_trans_log_buf(tp, bp, first, last);
1561	}	1671	}
1562		1672
@@ -1620,26 +1730,30 @@ xfs_mod_incore_sb_unlocked(
1620	lcounter += rem;	1730	lcounter += rem;
1621	}	1731	}
1622	} else { /* Taking blocks away */	1732	} else { /* Taking blocks away */
1623
1624	lcounter += delta;	1733	lcounter += delta;
		1734	if (lcounter >= 0) {
		1735	mp->m_sb.sb_fdblocks = lcounter +
		1736	XFS_ALLOC_SET_ASIDE(mp);
		1737	return 0;
		1738	}
1625		1739
1626	/*	1740	/*
1627	* If were out of blocks, use any available reserved blocks if	1741	* We are out of blocks, use any available reserved
1628	* were allowed to.	1742	* blocks if were allowed to.
1629	*/	1743	*/
		1744	if (!rsvd)
		1745	return XFS_ERROR(ENOSPC);
1630		1746
1631	if (lcounter < 0) {	1747	lcounter = (long long)mp->m_resblks_avail + delta;
1632	if (rsvd) {	1748	if (lcounter >= 0) {
1633	lcounter = (long long)mp->m_resblks_avail + delta;	1749	mp->m_resblks_avail = lcounter;
1634	if (lcounter < 0) {	1750	return 0;
1635	return XFS_ERROR(ENOSPC);
1636	}
1637	mp->m_resblks_avail = lcounter;
1638	return 0;
1639	} else { /* not reserved */
1640	return XFS_ERROR(ENOSPC);
1641	}
1642	}	1751	}
		1752	printk_once(KERN_WARNING
		1753	"Filesystem \"%s\": reserve blocks depleted! "
		1754	"Consider increasing reserve pool size.",
		1755	mp->m_fsname);
		1756	return XFS_ERROR(ENOSPC);
1643	}	1757	}
1644		1758
1645	mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);	1759	mp->m_sb.sb_fdblocks = lcounter + XFS_ALLOC_SET_ASIDE(mp);
@@ -1887,7 +2001,7 @@ xfs_getsb(
1887		2001
1888	ASSERT(mp->m_sb_bp != NULL);	2002	ASSERT(mp->m_sb_bp != NULL);
1889	bp = mp->m_sb_bp;	2003	bp = mp->m_sb_bp;
1890	if (flags & XFS_BUF_TRYLOCK) {	2004	if (flags & XBF_TRYLOCK) {
1891	if (!XFS_BUF_CPSEMA(bp)) {	2005	if (!XFS_BUF_CPSEMA(bp)) {
1892	return NULL;	2006	return NULL;
1893	}	2007	}
@@ -1947,6 +2061,26 @@ xfs_mount_log_sb(
1947	return error;	2061	return error;
1948	}	2062	}
1949		2063
		2064	/*
		2065	* If the underlying (data/log/rt) device is readonly, there are some
		2066	* operations that cannot proceed.
		2067	*/
		2068	int
		2069	xfs_dev_is_read_only(
		2070	struct xfs_mount *mp,
		2071	char *message)
		2072	{
		2073	if (xfs_readonly_buftarg(mp->m_ddev_targp) \|\|
		2074	xfs_readonly_buftarg(mp->m_logdev_targp) \|\|
		2075	(mp->m_rtdev_targp && xfs_readonly_buftarg(mp->m_rtdev_targp))) {
		2076	cmn_err(CE_NOTE,
		2077	"XFS: %s required on read-only device.", message);
		2078	cmn_err(CE_NOTE,
		2079	"XFS: write access unavailable, cannot proceed.");
		2080	return EROFS;
		2081	}
		2082	return 0;
		2083	}
1950		2084
1951	#ifdef HAVE_PERCPU_SB	2085	#ifdef HAVE_PERCPU_SB
1952	/*	2086	/*