xfs: Remove icsb infrastructure

Now that the in-core superblock infrastructure has been replaced with generic per-cpu counters, we don't need it anymore. Nuke it from orbit so we are sure that it won't haunt us again... Signed-off-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Brian Foster <bfoster@redhat.com> Signed-off-by: Dave Chinner <david@fromorbit.com>
author: Dave Chinner <dchinner@redhat.com> 2015-02-23 05:22:31 -0500
committer: Dave Chinner <david@fromorbit.com> 2015-02-23 05:22:31 -0500
commit: 5681ca40064fdb3efe477a604d690ab0425708b3 (patch)
tree: 22bfb1ebea1bef65094a8d9e7fb4526996028d54 /fs/xfs/xfs_mount.c
parent: 0d485ada404b3614b045e574bec26aaf5d9b3c5b (diff)
1 files changed, 1 insertions, 515 deletions
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 767c09a5d3ff..05b392e35e35 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
 #include "xfs_sysfs.h"
-#ifdef HAVE_PERCPU_SB
-STATIC void     xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
-                                                int);
-STATIC void     xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
-                                                int);
-STATIC void     xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
-#else
-#define xfs_icsb_balance_counter(mp, a, b)              do { } while (0)
-#define xfs_icsb_balance_counter_locked(mp, a, b)       do { } while (0)
-#endif
 static DEFINE_MUTEX(xfs_uuid_table_mutex);
 static int xfs_uuid_table_size;
 static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
                goto reread;
        }
-        /* Initialize per-cpu counters */
+        xfs_reinit_percpu_counters(mp);
-        xfs_icsb_reinit_counters(mp);
        /* no need to be quiet anymore, so reset the buf ops */
        bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
        if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
                return 0;
-        xfs_icsb_sync_counters(mp, 0);
        /*
         * we don't need to do this if we are updating the superblock
         * counters on every modification.
@@ -1466,502 +1451,3 @@ xfs_dev_is_read_only(
        }
        return 0;
 }
-#ifdef HAVE_PERCPU_SB
-/*
- * Per-cpu incore superblock counters
- *
- * Simple concept, difficult implementation
- *
- * Basically, replace the incore superblock counters with a distributed per cpu
- * counter for contended fields (e.g.  free block count).
- *
- * Difficulties arise in that the incore sb is used for ENOSPC checking, and
- * hence needs to be accurately read when we are running low on space. Hence
- * there is a method to enable and disable the per-cpu counters based on how
- * much "stuff" is available in them.
- *
- * Basically, a counter is enabled if there is enough free resource to justify
- * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
- * ENOSPC), then we disable the counters to synchronise all callers and
- * re-distribute the available resources.
- *
- * If, once we redistributed the available resources, we still get a failure,
- * we disable the per-cpu counter and go through the slow path.
- *
- * The slow path is the current xfs_mod_incore_sb() function.  This means that
- * when we disable a per-cpu counter, we need to drain its resources back to
- * the global superblock. We do this after disabling the counter to prevent
- * more threads from queueing up on the counter.
- *
- * Essentially, this means that we still need a lock in the fast path to enable
- * synchronisation between the global counters and the per-cpu counters. This
- * is not a problem because the lock will be local to a CPU almost all the time
- * and have little contention except when we get to ENOSPC conditions.
- *
- * Basically, this lock becomes a barrier that enables us to lock out the fast
- * path while we do things like enabling and disabling counters and
- * synchronising the counters.
- *
- * Locking rules:
- *
- *      1. m_sb_lock before picking up per-cpu locks
- *      2. per-cpu locks always picked up via for_each_online_cpu() order
- *      3. accurate counter sync requires m_sb_lock + per cpu locks
- *      4. modifying per-cpu counters requires holding per-cpu lock
- *      5. modifying global counters requires holding m_sb_lock
- *      6. enabling or disabling a counter requires holding the m_sb_lock 
- *         and _none_ of the per-cpu locks.
- *
- * Disabled counters are only ever re-enabled by a balance operation
- * that results in more free resources per CPU than a given threshold.
- * To ensure counters don't remain disabled, they are rebalanced when
- * the global resource goes above a higher threshold (i.e. some hysteresis
- * is present to prevent thrashing).
- */
-#ifdef CONFIG_HOTPLUG_CPU
-/*
- * hot-plug CPU notifier support.
- *
- * We need a notifier per filesystem as we need to be able to identify
- * the filesystem to balance the counters out. This is achieved by
- * having a notifier block embedded in the xfs_mount_t and doing pointer
- * magic to get the mount pointer from the notifier block address.
- */
-STATIC int
-xfs_icsb_cpu_notify(
-        struct notifier_block *nfb,
-        unsigned long action,
-        void *hcpu)
-{
-        xfs_icsb_cnts_t *cntp;
-        xfs_mount_t     *mp;
-        mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
-        cntp = (xfs_icsb_cnts_t *)
-                        per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
-        switch (action) {
-        case CPU_UP_PREPARE:
-        case CPU_UP_PREPARE_FROZEN:
-                /* Easy Case - initialize the area and locks, and
-                 * then rebalance when online does everything else for us. */
-                memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-                break;
-        case CPU_ONLINE:
-        case CPU_ONLINE_FROZEN:
-                xfs_icsb_lock(mp);
-                xfs_icsb_unlock(mp);
-                break;
-        case CPU_DEAD:
-        case CPU_DEAD_FROZEN:
-                /* Disable all the counters, then fold the dead cpu's
-                 * count into the total on the global superblock and
-                 * re-enable the counters. */
-                xfs_icsb_lock(mp);
-                spin_lock(&mp->m_sb_lock);
-                memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-                spin_unlock(&mp->m_sb_lock);
-                xfs_icsb_unlock(mp);
-                break;
-        }
-        return NOTIFY_OK;
-}
-#endif /* CONFIG_HOTPLUG_CPU */
-int
-xfs_icsb_init_counters(
-        xfs_mount_t     *mp)
-{
-        xfs_icsb_cnts_t *cntp;
-        int             error;
-        int             i;
-        error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
-        if (error)
-                return error;
-        error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
-        if (error)
-                goto free_icount;
-        error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
-        if (error)
-                goto free_ifree;
-        mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
-        if (!mp->m_sb_cnts) {
-                error = -ENOMEM;
-                goto free_fdblocks;
-        }
-        for_each_online_cpu(i) {
-                cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-                memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
-        }
-        mutex_init(&mp->m_icsb_mutex);
-        /*
-         * start with all counters disabled so that the
-         * initial balance kicks us off correctly
-         */
-        mp->m_icsb_counters = -1;
-#ifdef CONFIG_HOTPLUG_CPU
-        mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
-        mp->m_icsb_notifier.priority = 0;
-        register_hotcpu_notifier(&mp->m_icsb_notifier);
-#endif /* CONFIG_HOTPLUG_CPU */
-        return 0;
-free_fdblocks:
-        percpu_counter_destroy(&mp->m_fdblocks);
-free_ifree:
-        percpu_counter_destroy(&mp->m_ifree);
-free_icount:
-        percpu_counter_destroy(&mp->m_icount);
-        return error;
-}
-void
-xfs_icsb_reinit_counters(
-        xfs_mount_t     *mp)
-{
-        percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
-        percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
-        percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
-        xfs_icsb_lock(mp);
-        /*
-         * start with all counters disabled so that the
-         * initial balance kicks us off correctly
-         */
-        mp->m_icsb_counters = -1;
-        xfs_icsb_unlock(mp);
-}
-void
-xfs_icsb_destroy_counters(
-        xfs_mount_t     *mp)
-{
-        if (mp->m_sb_cnts) {
-                unregister_hotcpu_notifier(&mp->m_icsb_notifier);
-                free_percpu(mp->m_sb_cnts);
-        }
-        percpu_counter_destroy(&mp->m_icount);
-        percpu_counter_destroy(&mp->m_ifree);
-        percpu_counter_destroy(&mp->m_fdblocks);
-        mutex_destroy(&mp->m_icsb_mutex);
-}
-STATIC void
-xfs_icsb_lock_cntr(
-        xfs_icsb_cnts_t *icsbp)
-{
-        while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
-                ndelay(1000);
-        }
-}
-STATIC void
-xfs_icsb_unlock_cntr(
-        xfs_icsb_cnts_t *icsbp)
-{
-        clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
-}
-STATIC void
-xfs_icsb_lock_all_counters(
-        xfs_mount_t     *mp)
-{
-        xfs_icsb_cnts_t *cntp;
-        int             i;
-        for_each_online_cpu(i) {
-                cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-                xfs_icsb_lock_cntr(cntp);
-        }
-}
-STATIC void
-xfs_icsb_unlock_all_counters(
-        xfs_mount_t     *mp)
-{
-        xfs_icsb_cnts_t *cntp;
-        int             i;
-        for_each_online_cpu(i) {
-                cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
-                xfs_icsb_unlock_cntr(cntp);
-        }
-}
-STATIC void
-xfs_icsb_count(
-        xfs_mount_t     *mp,
-        xfs_icsb_cnts_t *cnt,
-        int             flags)
-{
-        memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
-        if (!(flags & XFS_ICSB_LAZY_COUNT))
-                xfs_icsb_lock_all_counters(mp);
-        if (!(flags & XFS_ICSB_LAZY_COUNT))
-                xfs_icsb_unlock_all_counters(mp);
-}
-STATIC int
-xfs_icsb_counter_disabled(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field)
-{
-        return test_bit(field, &mp->m_icsb_counters);
-}
-STATIC void
-xfs_icsb_disable_counter(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field)
-{
-        xfs_icsb_cnts_t cnt;
-        /*
-         * If we are already disabled, then there is nothing to do
-         * here. We check before locking all the counters to avoid
-         * the expensive lock operation when being called in the
-         * slow path and the counter is already disabled. This is
-         * safe because the only time we set or clear this state is under
-         * the m_icsb_mutex.
-         */
-        if (xfs_icsb_counter_disabled(mp, field))
-                return;
-        xfs_icsb_lock_all_counters(mp);
-        if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
-                /* drain back to superblock */
-                xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
-                switch(field) {
-                default:
-                        BUG();
-                }
-        }
-        xfs_icsb_unlock_all_counters(mp);
-}
-STATIC void
-xfs_icsb_enable_counter(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field,
-        uint64_t        count,
-        uint64_t        resid)
-{
-        int             i;
-        xfs_icsb_lock_all_counters(mp);
-        for_each_online_cpu(i) {
-                switch (field) {
-                default:
-                        BUG();
-                        break;
-                }
-                resid = 0;
-        }
-        clear_bit(field, &mp->m_icsb_counters);
-        xfs_icsb_unlock_all_counters(mp);
-}
-void
-xfs_icsb_sync_counters_locked(
-        xfs_mount_t     *mp,
-        int             flags)
-{
-        xfs_icsb_cnts_t cnt;
-        xfs_icsb_count(mp, &cnt, flags);
-}
-/*
- * Accurate update of per-cpu counters to incore superblock
- */
-void
-xfs_icsb_sync_counters(
-        xfs_mount_t     *mp,
-        int             flags)
-{
-        spin_lock(&mp->m_sb_lock);
-        xfs_icsb_sync_counters_locked(mp, flags);
-        spin_unlock(&mp->m_sb_lock);
-}
-/*
- * Balance and enable/disable counters as necessary.
- *
- * Thresholds for re-enabling counters are somewhat magic.  inode counts are
- * chosen to be the same number as single on disk allocation chunk per CPU, and
- * free blocks is something far enough zero that we aren't going thrash when we
- * get near ENOSPC. We also need to supply a minimum we require per cpu to
- * prevent looping endlessly when xfs_alloc_space asks for more than will
- * be distributed to a single CPU but each CPU has enough blocks to be
- * reenabled.
- *
- * Note that we can be called when counters are already disabled.
- * xfs_icsb_disable_counter() optimises the counter locking in this case to
- * prevent locking every per-cpu counter needlessly.
- */
-#define XFS_ICSB_INO_CNTR_REENABLE      (uint64_t)64
-#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
-                (uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
-STATIC void
-xfs_icsb_balance_counter_locked(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field,
-        int             min_per_cpu)
-{
-        uint64_t        count, resid;
-        /* disable counter and sync counter */
-        xfs_icsb_disable_counter(mp, field);
-        /* update counters  - first CPU gets residual*/
-        switch (field) {
-        default:
-                BUG();
-                count = resid = 0;      /* quiet, gcc */
-                break;
-        }
-        xfs_icsb_enable_counter(mp, field, count, resid);
-}
-STATIC void
-xfs_icsb_balance_counter(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  fields,
-        int             min_per_cpu)
-{
-        spin_lock(&mp->m_sb_lock);
-        xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
-        spin_unlock(&mp->m_sb_lock);
-}
-int
-xfs_icsb_modify_counters(
-        xfs_mount_t     *mp,
-        xfs_sb_field_t  field,
-        int64_t         delta,
-        int             rsvd)
-{
-        xfs_icsb_cnts_t *icsbp;
-        int             ret = 0;
-        might_sleep();
-again:
-        preempt_disable();
-        icsbp = this_cpu_ptr(mp->m_sb_cnts);
-        /*
-         * if the counter is disabled, go to slow path
-         */
-        if (unlikely(xfs_icsb_counter_disabled(mp, field)))
-                goto slow_path;
-        xfs_icsb_lock_cntr(icsbp);
-        if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
-                xfs_icsb_unlock_cntr(icsbp);
-                goto slow_path;
-        }
-        switch (field) {
-        default:
-                BUG();
-                goto balance_counter; /* be still, gcc */
-        }
-        xfs_icsb_unlock_cntr(icsbp);
-        preempt_enable();
-        return 0;
-slow_path:
-        preempt_enable();
-        /*
-         * serialise with a mutex so we don't burn lots of cpu on
-         * the superblock lock. We still need to hold the superblock
-         * lock, however, when we modify the global structures.
-         */
-        xfs_icsb_lock(mp);
-        /*
-         * Now running atomically.
-         *
-         * If the counter is enabled, someone has beaten us to rebalancing.
-         * Drop the lock and try again in the fast path....
-         */
-        if (!(xfs_icsb_counter_disabled(mp, field))) {
-                xfs_icsb_unlock(mp);
-                goto again;
-        }
-        /*
-         * The counter is currently disabled. Because we are
-         * running atomically here, we know a rebalance cannot
-         * be in progress. Hence we can go straight to operating
-         * on the global superblock. We do not call xfs_mod_incore_sb()
-         * here even though we need to get the m_sb_lock. Doing so
-         * will cause us to re-enter this function and deadlock.
-         * Hence we get the m_sb_lock ourselves and then call
-         * xfs_mod_incore_sb_unlocked() as the unlocked path operates
-         * directly on the global counters.
-         */
-        spin_lock(&mp->m_sb_lock);
-        ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
-        spin_unlock(&mp->m_sb_lock);
-        /*
-         * Now that we've modified the global superblock, we
-         * may be able to re-enable the distributed counters
-         * (e.g. lots of space just got freed). After that
-         * we are done.
-         */
-        if (ret != -ENOSPC)
-                xfs_icsb_balance_counter(mp, field, 0);
-        xfs_icsb_unlock(mp);
-        return ret;
-balance_counter:
-        xfs_icsb_unlock_cntr(icsbp);
-        preempt_enable();
-        /*
-         * We may have multiple threads here if multiple per-cpu
-         * counters run dry at the same time. This will mean we can
-         * do more balances than strictly necessary but it is not
-         * the common slowpath case.
-         */
-        xfs_icsb_lock(mp);
-        /*
-         * running atomically.
-         *
-         * This will leave the counter in the correct state for future
-         * accesses. After the rebalance, we simply try again and our retry
-         * will either succeed through the fast path or slow path without
-         * another balance operation being required.
-         */
-        xfs_icsb_balance_counter(mp, field, delta);
-        xfs_icsb_unlock(mp);
-        goto again;
-}
-#endif
author	Dave Chinner <dchinner@redhat.com>	2015-02-23 05:22:31 -0500
committer	Dave Chinner <david@fromorbit.com>	2015-02-23 05:22:31 -0500
commit	5681ca40064fdb3efe477a604d690ab0425708b3 (patch)
tree	22bfb1ebea1bef65094a8d9e7fb4526996028d54 /fs/xfs/xfs_mount.c
parent	0d485ada404b3614b045e574bec26aaf5d9b3c5b (diff)

diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 767c09a5d3ff..05b392e35e35 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c
@@ -43,18 +43,6 @@
43	#include "xfs_sysfs.h"	43	#include "xfs_sysfs.h"
44		44
45		45
46	#ifdef HAVE_PERCPU_SB
47	STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t,
48	int);
49	STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t,
50	int);
51	STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t);
52	#else
53
54	#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
55	#define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0)
56	#endif
57
58	static DEFINE_MUTEX(xfs_uuid_table_mutex);	46	static DEFINE_MUTEX(xfs_uuid_table_mutex);
59	static int xfs_uuid_table_size;	47	static int xfs_uuid_table_size;
60	static uuid_t *xfs_uuid_table;	48	static uuid_t *xfs_uuid_table;
@@ -347,8 +335,7 @@ reread:
347	goto reread;	335	goto reread;
348	}	336	}
349		337
350	/* Initialize per-cpu counters */	338	xfs_reinit_percpu_counters(mp);
351	xfs_icsb_reinit_counters(mp);
352		339
353	/* no need to be quiet anymore, so reset the buf ops */	340	/* no need to be quiet anymore, so reset the buf ops */
354	bp->b_ops = &xfs_sb_buf_ops;	341	bp->b_ops = &xfs_sb_buf_ops;
@@ -1087,8 +1074,6 @@ xfs_log_sbcount(xfs_mount_t *mp)
1087	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))	1074	if (!xfs_fs_writable(mp, SB_FREEZE_COMPLETE))
1088	return 0;	1075	return 0;
1089		1076
1090	xfs_icsb_sync_counters(mp, 0);
1091
1092	/*	1077	/*
1093	* we don't need to do this if we are updating the superblock	1078	* we don't need to do this if we are updating the superblock
1094	* counters on every modification.	1079	* counters on every modification.
@@ -1466,502 +1451,3 @@ xfs_dev_is_read_only(
1466	}	1451	}
1467	return 0;	1452	return 0;
1468	}	1453	}
1469
1470	#ifdef HAVE_PERCPU_SB
1471	/*
1472	* Per-cpu incore superblock counters
1473	*
1474	* Simple concept, difficult implementation
1475	*
1476	* Basically, replace the incore superblock counters with a distributed per cpu
1477	* counter for contended fields (e.g. free block count).
1478	*
1479	* Difficulties arise in that the incore sb is used for ENOSPC checking, and
1480	* hence needs to be accurately read when we are running low on space. Hence
1481	* there is a method to enable and disable the per-cpu counters based on how
1482	* much "stuff" is available in them.
1483	*
1484	* Basically, a counter is enabled if there is enough free resource to justify
1485	* running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
1486	* ENOSPC), then we disable the counters to synchronise all callers and
1487	* re-distribute the available resources.
1488	*
1489	* If, once we redistributed the available resources, we still get a failure,
1490	* we disable the per-cpu counter and go through the slow path.
1491	*
1492	* The slow path is the current xfs_mod_incore_sb() function. This means that
1493	* when we disable a per-cpu counter, we need to drain its resources back to
1494	* the global superblock. We do this after disabling the counter to prevent
1495	* more threads from queueing up on the counter.
1496	*
1497	* Essentially, this means that we still need a lock in the fast path to enable
1498	* synchronisation between the global counters and the per-cpu counters. This
1499	* is not a problem because the lock will be local to a CPU almost all the time
1500	* and have little contention except when we get to ENOSPC conditions.
1501	*
1502	* Basically, this lock becomes a barrier that enables us to lock out the fast
1503	* path while we do things like enabling and disabling counters and
1504	* synchronising the counters.
1505	*
1506	* Locking rules:
1507	*
1508	* 1. m_sb_lock before picking up per-cpu locks
1509	* 2. per-cpu locks always picked up via for_each_online_cpu() order
1510	* 3. accurate counter sync requires m_sb_lock + per cpu locks
1511	* 4. modifying per-cpu counters requires holding per-cpu lock
1512	* 5. modifying global counters requires holding m_sb_lock
1513	* 6. enabling or disabling a counter requires holding the m_sb_lock
1514	* and _none_ of the per-cpu locks.
1515	*
1516	* Disabled counters are only ever re-enabled by a balance operation
1517	* that results in more free resources per CPU than a given threshold.
1518	* To ensure counters don't remain disabled, they are rebalanced when
1519	* the global resource goes above a higher threshold (i.e. some hysteresis
1520	* is present to prevent thrashing).
1521	*/
1522
1523	#ifdef CONFIG_HOTPLUG_CPU
1524	/*
1525	* hot-plug CPU notifier support.
1526	*
1527	* We need a notifier per filesystem as we need to be able to identify
1528	* the filesystem to balance the counters out. This is achieved by
1529	* having a notifier block embedded in the xfs_mount_t and doing pointer
1530	* magic to get the mount pointer from the notifier block address.
1531	*/
1532	STATIC int
1533	xfs_icsb_cpu_notify(
1534	struct notifier_block *nfb,
1535	unsigned long action,
1536	void *hcpu)
1537	{
1538	xfs_icsb_cnts_t *cntp;
1539	xfs_mount_t *mp;
1540
1541	mp = (xfs_mount_t *)container_of(nfb, xfs_mount_t, m_icsb_notifier);
1542	cntp = (xfs_icsb_cnts_t *)
1543	per_cpu_ptr(mp->m_sb_cnts, (unsigned long)hcpu);
1544	switch (action) {
1545	case CPU_UP_PREPARE:
1546	case CPU_UP_PREPARE_FROZEN:
1547	/* Easy Case - initialize the area and locks, and
1548	* then rebalance when online does everything else for us. */
1549	memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1550	break;
1551	case CPU_ONLINE:
1552	case CPU_ONLINE_FROZEN:
1553	xfs_icsb_lock(mp);
1554	xfs_icsb_unlock(mp);
1555	break;
1556	case CPU_DEAD:
1557	case CPU_DEAD_FROZEN:
1558	/* Disable all the counters, then fold the dead cpu's
1559	* count into the total on the global superblock and
1560	* re-enable the counters. */
1561	xfs_icsb_lock(mp);
1562	spin_lock(&mp->m_sb_lock);
1563
1564	memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1565
1566	spin_unlock(&mp->m_sb_lock);
1567	xfs_icsb_unlock(mp);
1568	break;
1569	}
1570
1571	return NOTIFY_OK;
1572	}
1573	#endif /* CONFIG_HOTPLUG_CPU */
1574
1575	int
1576	xfs_icsb_init_counters(
1577	xfs_mount_t *mp)
1578	{
1579	xfs_icsb_cnts_t *cntp;
1580	int error;
1581	int i;
1582
1583	error = percpu_counter_init(&mp->m_icount, 0, GFP_KERNEL);
1584	if (error)
1585	return error;
1586
1587	error = percpu_counter_init(&mp->m_ifree, 0, GFP_KERNEL);
1588	if (error)
1589	goto free_icount;
1590
1591	error = percpu_counter_init(&mp->m_fdblocks, 0, GFP_KERNEL);
1592	if (error)
1593	goto free_ifree;
1594
1595	mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
1596	if (!mp->m_sb_cnts) {
1597	error = -ENOMEM;
1598	goto free_fdblocks;
1599	}
1600
1601	for_each_online_cpu(i) {
1602	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1603	memset(cntp, 0, sizeof(xfs_icsb_cnts_t));
1604	}
1605
1606	mutex_init(&mp->m_icsb_mutex);
1607
1608	/*
1609	* start with all counters disabled so that the
1610	* initial balance kicks us off correctly
1611	*/
1612	mp->m_icsb_counters = -1;
1613
1614	#ifdef CONFIG_HOTPLUG_CPU
1615	mp->m_icsb_notifier.notifier_call = xfs_icsb_cpu_notify;
1616	mp->m_icsb_notifier.priority = 0;
1617	register_hotcpu_notifier(&mp->m_icsb_notifier);
1618	#endif /* CONFIG_HOTPLUG_CPU */
1619
1620	return 0;
1621
1622	free_fdblocks:
1623	percpu_counter_destroy(&mp->m_fdblocks);
1624	free_ifree:
1625	percpu_counter_destroy(&mp->m_ifree);
1626	free_icount:
1627	percpu_counter_destroy(&mp->m_icount);
1628	return error;
1629	}
1630
1631	void
1632	xfs_icsb_reinit_counters(
1633	xfs_mount_t *mp)
1634	{
1635	percpu_counter_set(&mp->m_icount, mp->m_sb.sb_icount);
1636	percpu_counter_set(&mp->m_ifree, mp->m_sb.sb_ifree);
1637	percpu_counter_set(&mp->m_fdblocks, mp->m_sb.sb_fdblocks);
1638
1639	xfs_icsb_lock(mp);
1640	/*
1641	* start with all counters disabled so that the
1642	* initial balance kicks us off correctly
1643	*/
1644	mp->m_icsb_counters = -1;
1645	xfs_icsb_unlock(mp);
1646	}
1647
1648	void
1649	xfs_icsb_destroy_counters(
1650	xfs_mount_t *mp)
1651	{
1652	if (mp->m_sb_cnts) {
1653	unregister_hotcpu_notifier(&mp->m_icsb_notifier);
1654	free_percpu(mp->m_sb_cnts);
1655	}
1656
1657	percpu_counter_destroy(&mp->m_icount);
1658	percpu_counter_destroy(&mp->m_ifree);
1659	percpu_counter_destroy(&mp->m_fdblocks);
1660
1661	mutex_destroy(&mp->m_icsb_mutex);
1662	}
1663
1664	STATIC void
1665	xfs_icsb_lock_cntr(
1666	xfs_icsb_cnts_t *icsbp)
1667	{
1668	while (test_and_set_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags)) {
1669	ndelay(1000);
1670	}
1671	}
1672
1673	STATIC void
1674	xfs_icsb_unlock_cntr(
1675	xfs_icsb_cnts_t *icsbp)
1676	{
1677	clear_bit(XFS_ICSB_FLAG_LOCK, &icsbp->icsb_flags);
1678	}
1679
1680
1681	STATIC void
1682	xfs_icsb_lock_all_counters(
1683	xfs_mount_t *mp)
1684	{
1685	xfs_icsb_cnts_t *cntp;
1686	int i;
1687
1688	for_each_online_cpu(i) {
1689	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1690	xfs_icsb_lock_cntr(cntp);
1691	}
1692	}
1693
1694	STATIC void
1695	xfs_icsb_unlock_all_counters(
1696	xfs_mount_t *mp)
1697	{
1698	xfs_icsb_cnts_t *cntp;
1699	int i;
1700
1701	for_each_online_cpu(i) {
1702	cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1703	xfs_icsb_unlock_cntr(cntp);
1704	}
1705	}
1706
1707	STATIC void
1708	xfs_icsb_count(
1709	xfs_mount_t *mp,
1710	xfs_icsb_cnts_t *cnt,
1711	int flags)
1712	{
1713	memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
1714
1715	if (!(flags & XFS_ICSB_LAZY_COUNT))
1716	xfs_icsb_lock_all_counters(mp);
1717
1718
1719	if (!(flags & XFS_ICSB_LAZY_COUNT))
1720	xfs_icsb_unlock_all_counters(mp);
1721	}
1722
1723	STATIC int
1724	xfs_icsb_counter_disabled(
1725	xfs_mount_t *mp,
1726	xfs_sb_field_t field)
1727	{
1728	return test_bit(field, &mp->m_icsb_counters);
1729	}
1730
1731	STATIC void
1732	xfs_icsb_disable_counter(
1733	xfs_mount_t *mp,
1734	xfs_sb_field_t field)
1735	{
1736	xfs_icsb_cnts_t cnt;
1737
1738	/*
1739	* If we are already disabled, then there is nothing to do
1740	* here. We check before locking all the counters to avoid
1741	* the expensive lock operation when being called in the
1742	* slow path and the counter is already disabled. This is
1743	* safe because the only time we set or clear this state is under
1744	* the m_icsb_mutex.
1745	*/
1746	if (xfs_icsb_counter_disabled(mp, field))
1747	return;
1748
1749	xfs_icsb_lock_all_counters(mp);
1750	if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
1751	/* drain back to superblock */
1752
1753	xfs_icsb_count(mp, &cnt, XFS_ICSB_LAZY_COUNT);
1754	switch(field) {
1755	default:
1756	BUG();
1757	}
1758	}
1759
1760	xfs_icsb_unlock_all_counters(mp);
1761	}
1762
1763	STATIC void
1764	xfs_icsb_enable_counter(
1765	xfs_mount_t *mp,
1766	xfs_sb_field_t field,
1767	uint64_t count,
1768	uint64_t resid)
1769	{
1770	int i;
1771
1772	xfs_icsb_lock_all_counters(mp);
1773	for_each_online_cpu(i) {
1774	switch (field) {
1775	default:
1776	BUG();
1777	break;
1778	}
1779	resid = 0;
1780	}
1781	clear_bit(field, &mp->m_icsb_counters);
1782	xfs_icsb_unlock_all_counters(mp);
1783	}
1784
1785	void
1786	xfs_icsb_sync_counters_locked(
1787	xfs_mount_t *mp,
1788	int flags)
1789	{
1790	xfs_icsb_cnts_t cnt;
1791
1792	xfs_icsb_count(mp, &cnt, flags);
1793	}
1794
1795	/*
1796	* Accurate update of per-cpu counters to incore superblock
1797	*/
1798	void
1799	xfs_icsb_sync_counters(
1800	xfs_mount_t *mp,
1801	int flags)
1802	{
1803	spin_lock(&mp->m_sb_lock);
1804	xfs_icsb_sync_counters_locked(mp, flags);
1805	spin_unlock(&mp->m_sb_lock);
1806	}
1807
1808	/*
1809	* Balance and enable/disable counters as necessary.
1810	*
1811	* Thresholds for re-enabling counters are somewhat magic. inode counts are
1812	* chosen to be the same number as single on disk allocation chunk per CPU, and
1813	* free blocks is something far enough zero that we aren't going thrash when we
1814	* get near ENOSPC. We also need to supply a minimum we require per cpu to
1815	* prevent looping endlessly when xfs_alloc_space asks for more than will
1816	* be distributed to a single CPU but each CPU has enough blocks to be
1817	* reenabled.
1818	*
1819	* Note that we can be called when counters are already disabled.
1820	* xfs_icsb_disable_counter() optimises the counter locking in this case to
1821	* prevent locking every per-cpu counter needlessly.
1822	*/
1823
1824	#define XFS_ICSB_INO_CNTR_REENABLE (uint64_t)64
1825	#define XFS_ICSB_FDBLK_CNTR_REENABLE(mp) \
1826	(uint64_t)(512 + XFS_ALLOC_SET_ASIDE(mp))
1827	STATIC void
1828	xfs_icsb_balance_counter_locked(
1829	xfs_mount_t *mp,
1830	xfs_sb_field_t field,
1831	int min_per_cpu)
1832	{
1833	uint64_t count, resid;
1834
1835	/* disable counter and sync counter */
1836	xfs_icsb_disable_counter(mp, field);
1837
1838	/* update counters - first CPU gets residual*/
1839	switch (field) {
1840	default:
1841	BUG();
1842	count = resid = 0; /* quiet, gcc */
1843	break;
1844	}
1845
1846	xfs_icsb_enable_counter(mp, field, count, resid);
1847	}
1848
1849	STATIC void
1850	xfs_icsb_balance_counter(
1851	xfs_mount_t *mp,
1852	xfs_sb_field_t fields,
1853	int min_per_cpu)
1854	{
1855	spin_lock(&mp->m_sb_lock);
1856	xfs_icsb_balance_counter_locked(mp, fields, min_per_cpu);
1857	spin_unlock(&mp->m_sb_lock);
1858	}
1859
1860	int
1861	xfs_icsb_modify_counters(
1862	xfs_mount_t *mp,
1863	xfs_sb_field_t field,
1864	int64_t delta,
1865	int rsvd)
1866	{
1867	xfs_icsb_cnts_t *icsbp;
1868	int ret = 0;
1869
1870	might_sleep();
1871	again:
1872	preempt_disable();
1873	icsbp = this_cpu_ptr(mp->m_sb_cnts);
1874
1875	/*
1876	* if the counter is disabled, go to slow path
1877	*/
1878	if (unlikely(xfs_icsb_counter_disabled(mp, field)))
1879	goto slow_path;
1880	xfs_icsb_lock_cntr(icsbp);
1881	if (unlikely(xfs_icsb_counter_disabled(mp, field))) {
1882	xfs_icsb_unlock_cntr(icsbp);
1883	goto slow_path;
1884	}
1885
1886	switch (field) {
1887	default:
1888	BUG();
1889	goto balance_counter; /* be still, gcc */
1890	}
1891	xfs_icsb_unlock_cntr(icsbp);
1892	preempt_enable();
1893	return 0;
1894
1895	slow_path:
1896	preempt_enable();
1897
1898	/*
1899	* serialise with a mutex so we don't burn lots of cpu on
1900	* the superblock lock. We still need to hold the superblock
1901	* lock, however, when we modify the global structures.
1902	*/
1903	xfs_icsb_lock(mp);
1904
1905	/*
1906	* Now running atomically.
1907	*
1908	* If the counter is enabled, someone has beaten us to rebalancing.
1909	* Drop the lock and try again in the fast path....
1910	*/
1911	if (!(xfs_icsb_counter_disabled(mp, field))) {
1912	xfs_icsb_unlock(mp);
1913	goto again;
1914	}
1915
1916	/*
1917	* The counter is currently disabled. Because we are
1918	* running atomically here, we know a rebalance cannot
1919	* be in progress. Hence we can go straight to operating
1920	* on the global superblock. We do not call xfs_mod_incore_sb()
1921	* here even though we need to get the m_sb_lock. Doing so
1922	* will cause us to re-enter this function and deadlock.
1923	* Hence we get the m_sb_lock ourselves and then call
1924	* xfs_mod_incore_sb_unlocked() as the unlocked path operates
1925	* directly on the global counters.
1926	*/
1927	spin_lock(&mp->m_sb_lock);
1928	ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1929	spin_unlock(&mp->m_sb_lock);
1930
1931	/*
1932	* Now that we've modified the global superblock, we
1933	* may be able to re-enable the distributed counters
1934	* (e.g. lots of space just got freed). After that
1935	* we are done.
1936	*/
1937	if (ret != -ENOSPC)
1938	xfs_icsb_balance_counter(mp, field, 0);
1939	xfs_icsb_unlock(mp);
1940	return ret;
1941
1942	balance_counter:
1943	xfs_icsb_unlock_cntr(icsbp);
1944	preempt_enable();
1945
1946	/*
1947	* We may have multiple threads here if multiple per-cpu
1948	* counters run dry at the same time. This will mean we can
1949	* do more balances than strictly necessary but it is not
1950	* the common slowpath case.
1951	*/
1952	xfs_icsb_lock(mp);
1953
1954	/*
1955	* running atomically.
1956	*
1957	* This will leave the counter in the correct state for future
1958	* accesses. After the rebalance, we simply try again and our retry
1959	* will either succeed through the fast path or slow path without
1960	* another balance operation being required.
1961	*/
1962	xfs_icsb_balance_counter(mp, field, delta);
1963	xfs_icsb_unlock(mp);
1964	goto again;
1965	}
1966
1967	#endif