aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Chinner <dgc@sgi.com>2006-03-13 21:13:09 -0500
committerNathan Scott <nathans@sgi.com>2006-03-13 21:13:09 -0500
commit8d280b98cfe3c0b69c37d355218975c1c0279bb0 (patch)
tree2dc1deaec23a7da29b72152a4225c2600dacf1d4
parent9f4cbecd7e5ee6390fecd6032dc04ca8c9805dc9 (diff)
[XFS] On machines with more than 8 cpus, when running parallel I/O
threads, the incore superblock lock becomes the limiting factor for buffered write throughput. Make the contended fields in the incore superblock use per-cpu counters so that there is no global lock to limit scalability. SGI-PV: 946630 SGI-Modid: xfs-linux-melb:xfs-kern:25106a Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Nathan Scott <nathans@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h5
-rw-r--r--fs/xfs/xfs_fsops.c1
-rw-r--r--fs/xfs/xfs_mount.c560
-rw-r--r--fs/xfs/xfs_mount.h34
-rw-r--r--fs/xfs/xfs_vfsops.c3
5 files changed, 586 insertions, 17 deletions
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 67389b745526..377a9f54a049 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -100,6 +100,11 @@
100 */ 100 */
101#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */ 101#undef HAVE_REFCACHE /* reference cache not needed for NFS in 2.6 */
102#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */ 102#define HAVE_SENDFILE /* sendfile(2) exists in 2.6, but not in 2.4 */
103#if CONFIG_SMP
104#define HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
105#else
106#undef HAVE_PERCPU_SB /* per cpu superblock counters are a 2.6 feature */
107#endif
103 108
104/* 109/*
105 * State flag for unwritten extent buffers. 110 * State flag for unwritten extent buffers.
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b4d971b01588..56caa88713ab 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -462,6 +462,7 @@ xfs_fs_counts(
462{ 462{
463 unsigned long s; 463 unsigned long s;
464 464
465 xfs_icsb_sync_counters_lazy(mp);
465 s = XFS_SB_LOCK(mp); 466 s = XFS_SB_LOCK(mp);
466 cnt->freedata = mp->m_sb.sb_fdblocks; 467 cnt->freedata = mp->m_sb.sb_fdblocks;
467 cnt->freertx = mp->m_sb.sb_frextents; 468 cnt->freertx = mp->m_sb.sb_frextents;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 62188ea392c7..9b43b7b3d760 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -51,11 +51,31 @@ STATIC int xfs_uuid_mount(xfs_mount_t *);
51STATIC void xfs_uuid_unmount(xfs_mount_t *mp); 51STATIC void xfs_uuid_unmount(xfs_mount_t *mp);
52STATIC void xfs_unmountfs_wait(xfs_mount_t *); 52STATIC void xfs_unmountfs_wait(xfs_mount_t *);
53 53
54
55#ifdef HAVE_PERCPU_SB
56STATIC void xfs_icsb_destroy_counters(xfs_mount_t *);
57STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, int);
58STATIC void xfs_icsb_sync_counters(xfs_mount_t *);
59STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t,
60 int, int);
61STATIC int xfs_icsb_modify_counters_locked(xfs_mount_t *, xfs_sb_field_t,
62 int, int);
63
64#else
65
66#define xfs_icsb_destroy_counters(mp) do { } while (0)
67#define xfs_icsb_balance_counter(mp, a, b) do { } while (0)
68#define xfs_icsb_sync_counters(mp) do { } while (0)
69#define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0)
70#define xfs_icsb_modify_counters_locked(mp, a, b, c) do { } while (0)
71
72#endif
73
54static const struct { 74static const struct {
55 short offset; 75 short offset;
56 short type; /* 0 = integer 76 short type; /* 0 = integer
57 * 1 = binary / string (no translation) 77 * 1 = binary / string (no translation)
58 */ 78 */
59} xfs_sb_info[] = { 79} xfs_sb_info[] = {
60 { offsetof(xfs_sb_t, sb_magicnum), 0 }, 80 { offsetof(xfs_sb_t, sb_magicnum), 0 },
61 { offsetof(xfs_sb_t, sb_blocksize), 0 }, 81 { offsetof(xfs_sb_t, sb_blocksize), 0 },
@@ -113,7 +133,11 @@ xfs_mount_init(void)
113{ 133{
114 xfs_mount_t *mp; 134 xfs_mount_t *mp;
115 135
116 mp = kmem_zalloc(sizeof(*mp), KM_SLEEP); 136 mp = kmem_zalloc(sizeof(xfs_mount_t), KM_SLEEP);
137
138 if (xfs_icsb_init_counters(mp)) {
139 mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
140 }
117 141
118 AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail"); 142 AIL_LOCKINIT(&mp->m_ail_lock, "xfs_ail");
119 spinlock_init(&mp->m_sb_lock, "xfs_sb"); 143 spinlock_init(&mp->m_sb_lock, "xfs_sb");
@@ -136,8 +160,8 @@ xfs_mount_init(void)
136 */ 160 */
137void 161void
138xfs_mount_free( 162xfs_mount_free(
139 xfs_mount_t *mp, 163 xfs_mount_t *mp,
140 int remove_bhv) 164 int remove_bhv)
141{ 165{
142 if (mp->m_ihash) 166 if (mp->m_ihash)
143 xfs_ihash_free(mp); 167 xfs_ihash_free(mp);
@@ -177,6 +201,7 @@ xfs_mount_free(
177 VFS_REMOVEBHV(vfsp, &mp->m_bhv); 201 VFS_REMOVEBHV(vfsp, &mp->m_bhv);
178 } 202 }
179 203
204 xfs_icsb_destroy_counters(mp);
180 kmem_free(mp, sizeof(xfs_mount_t)); 205 kmem_free(mp, sizeof(xfs_mount_t));
181} 206}
182 207
@@ -527,6 +552,10 @@ xfs_readsb(xfs_mount_t *mp)
527 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); 552 ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
528 } 553 }
529 554
555 xfs_icsb_balance_counter(mp, XFS_SBS_ICOUNT, 0);
556 xfs_icsb_balance_counter(mp, XFS_SBS_IFREE, 0);
557 xfs_icsb_balance_counter(mp, XFS_SBS_FDBLOCKS, 0);
558
530 mp->m_sb_bp = bp; 559 mp->m_sb_bp = bp;
531 xfs_buf_relse(bp); 560 xfs_buf_relse(bp);
532 ASSERT(XFS_BUF_VALUSEMA(bp) > 0); 561 ASSERT(XFS_BUF_VALUSEMA(bp) > 0);
@@ -1154,6 +1183,9 @@ xfs_unmountfs_writesb(xfs_mount_t *mp)
1154 sbp = xfs_getsb(mp, 0); 1183 sbp = xfs_getsb(mp, 0);
1155 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY || 1184 if (!(XFS_MTOVFS(mp)->vfs_flag & VFS_RDONLY ||
1156 XFS_FORCED_SHUTDOWN(mp))) { 1185 XFS_FORCED_SHUTDOWN(mp))) {
1186
1187 xfs_icsb_sync_counters(mp);
1188
1157 /* 1189 /*
1158 * mark shared-readonly if desired 1190 * mark shared-readonly if desired
1159 */ 1191 */
@@ -1227,7 +1259,6 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1227 1259
1228 xfs_trans_log_buf(tp, bp, first, last); 1260 xfs_trans_log_buf(tp, bp, first, last);
1229} 1261}
1230
1231/* 1262/*
1232 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply 1263 * xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
1233 * a delta to a specified field in the in-core superblock. Simply 1264 * a delta to a specified field in the in-core superblock. Simply
@@ -1237,7 +1268,7 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
1237 * 1268 *
1238 * The SB_LOCK must be held when this routine is called. 1269 * The SB_LOCK must be held when this routine is called.
1239 */ 1270 */
1240STATIC int 1271int
1241xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field, 1272xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
1242 int delta, int rsvd) 1273 int delta, int rsvd)
1243{ 1274{
@@ -1406,9 +1437,26 @@ xfs_mod_incore_sb(xfs_mount_t *mp, xfs_sb_field_t field, int delta, int rsvd)
1406 unsigned long s; 1437 unsigned long s;
1407 int status; 1438 int status;
1408 1439
1409 s = XFS_SB_LOCK(mp); 1440 /* check for per-cpu counters */
1410 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); 1441 switch (field) {
1411 XFS_SB_UNLOCK(mp, s); 1442#ifdef HAVE_PERCPU_SB
1443 case XFS_SBS_ICOUNT:
1444 case XFS_SBS_IFREE:
1445 case XFS_SBS_FDBLOCKS:
1446 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1447 status = xfs_icsb_modify_counters(mp, field,
1448 delta, rsvd);
1449 break;
1450 }
1451 /* FALLTHROUGH */
1452#endif
1453 default:
1454 s = XFS_SB_LOCK(mp);
1455 status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
1456 XFS_SB_UNLOCK(mp, s);
1457 break;
1458 }
1459
1412 return status; 1460 return status;
1413} 1461}
1414 1462
@@ -1445,8 +1493,26 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
1445 * from the loop so we'll fall into the undo loop 1493 * from the loop so we'll fall into the undo loop
1446 * below. 1494 * below.
1447 */ 1495 */
1448 status = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, 1496 switch (msbp->msb_field) {
1449 msbp->msb_delta, rsvd); 1497#ifdef HAVE_PERCPU_SB
1498 case XFS_SBS_ICOUNT:
1499 case XFS_SBS_IFREE:
1500 case XFS_SBS_FDBLOCKS:
1501 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1502 status = xfs_icsb_modify_counters_locked(mp,
1503 msbp->msb_field,
1504 msbp->msb_delta, rsvd);
1505 break;
1506 }
1507 /* FALLTHROUGH */
1508#endif
1509 default:
1510 status = xfs_mod_incore_sb_unlocked(mp,
1511 msbp->msb_field,
1512 msbp->msb_delta, rsvd);
1513 break;
1514 }
1515
1450 if (status != 0) { 1516 if (status != 0) {
1451 break; 1517 break;
1452 } 1518 }
@@ -1463,8 +1529,28 @@ xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd)
1463 if (status != 0) { 1529 if (status != 0) {
1464 msbp--; 1530 msbp--;
1465 while (msbp >= msb) { 1531 while (msbp >= msb) {
1466 status = xfs_mod_incore_sb_unlocked(mp, 1532 switch (msbp->msb_field) {
1467 msbp->msb_field, -(msbp->msb_delta), rsvd); 1533#ifdef HAVE_PERCPU_SB
1534 case XFS_SBS_ICOUNT:
1535 case XFS_SBS_IFREE:
1536 case XFS_SBS_FDBLOCKS:
1537 if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) {
1538 status =
1539 xfs_icsb_modify_counters_locked(mp,
1540 msbp->msb_field,
1541 -(msbp->msb_delta),
1542 rsvd);
1543 break;
1544 }
1545 /* FALLTHROUGH */
1546#endif
1547 default:
1548 status = xfs_mod_incore_sb_unlocked(mp,
1549 msbp->msb_field,
1550 -(msbp->msb_delta),
1551 rsvd);
1552 break;
1553 }
1468 ASSERT(status == 0); 1554 ASSERT(status == 0);
1469 msbp--; 1555 msbp--;
1470 } 1556 }
@@ -1577,3 +1663,445 @@ xfs_mount_log_sbunit(
1577 xfs_mod_sb(tp, fields); 1663 xfs_mod_sb(tp, fields);
1578 xfs_trans_commit(tp, 0, NULL); 1664 xfs_trans_commit(tp, 0, NULL);
1579} 1665}
1666
1667
1668#ifdef HAVE_PERCPU_SB
1669/*
1670 * Per-cpu incore superblock counters
1671 *
1672 * Simple concept, difficult implementation
1673 *
1674 * Basically, replace the incore superblock counters with a distributed per cpu
1675 * counter for contended fields (e.g. free block count).
1676 *
1677 * Difficulties arise in that the incore sb is used for ENOSPC checking, and
1678 * hence needs to be accurately read when we are running low on space. Hence
1679 * there is a method to enable and disable the per-cpu counters based on how
1680 * much "stuff" is available in them.
1681 *
1682 * Basically, a counter is enabled if there is enough free resource to justify
1683 * running a per-cpu fast-path. If the per-cpu counter runs out (i.e. a local
1684 * ENOSPC), then we disable the counters to synchronise all callers and
1685 * re-distribute the available resources.
1686 *
1687 * If, once we redistributed the available resources, we still get a failure,
1688 * we disable the per-cpu counter and go through the slow path.
1689 *
1690 * The slow path is the current xfs_mod_incore_sb() function. This means that
1691 * when we disable a per-cpu counter, we need to drain it's resources back to
1692 * the global superblock. We do this after disabling the counter to prevent
1693 * more threads from queueing up on the counter.
1694 *
1695 * Essentially, this means that we still need a lock in the fast path to enable
1696 * synchronisation between the global counters and the per-cpu counters. This
1697 * is not a problem because the lock will be local to a CPU almost all the time
1698 * and have little contention except when we get to ENOSPC conditions.
1699 *
1700 * Basically, this lock becomes a barrier that enables us to lock out the fast
1701 * path while we do things like enabling and disabling counters and
1702 * synchronising the counters.
1703 *
1704 * Locking rules:
1705 *
1706 * 1. XFS_SB_LOCK() before picking up per-cpu locks
1707 * 2. per-cpu locks always picked up via for_each_online_cpu() order
1708 * 3. accurate counter sync requires XFS_SB_LOCK + per cpu locks
1709 * 4. modifying per-cpu counters requires holding per-cpu lock
1710 * 5. modifying global counters requires holding XFS_SB_LOCK
1711 * 6. enabling or disabling a counter requires holding the XFS_SB_LOCK
1712 * and _none_ of the per-cpu locks.
1713 *
1714 * Disabled counters are only ever re-enabled by a balance operation
1715 * that results in more free resources per CPU than a given threshold.
1716 * To ensure counters don't remain disabled, they are rebalanced when
1717 * the global resource goes above a higher threshold (i.e. some hysteresis
1718 * is present to prevent thrashing).
1719 *
1720 * Note: hotplug CPUs not yet supported
1721 */
1722int
1723xfs_icsb_init_counters(
1724 xfs_mount_t *mp)
1725{
1726 xfs_icsb_cnts_t *cntp;
1727 int i;
1728
1729 mp->m_sb_cnts = alloc_percpu(xfs_icsb_cnts_t);
1730 if (mp->m_sb_cnts == NULL)
1731 return -ENOMEM;
1732
1733 for_each_online_cpu(i) {
1734 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1735 spin_lock_init(&cntp->icsb_lock);
1736 }
1737 /*
1738 * start with all counters disabled so that the
1739 * initial balance kicks us off correctly
1740 */
1741 mp->m_icsb_counters = -1;
1742 return 0;
1743}
1744
1745STATIC void
1746xfs_icsb_destroy_counters(
1747 xfs_mount_t *mp)
1748{
1749 if (mp->m_sb_cnts)
1750 free_percpu(mp->m_sb_cnts);
1751}
1752
1753
1754STATIC inline void
1755xfs_icsb_lock_all_counters(
1756 xfs_mount_t *mp)
1757{
1758 xfs_icsb_cnts_t *cntp;
1759 int i;
1760
1761 for_each_online_cpu(i) {
1762 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1763 spin_lock(&cntp->icsb_lock);
1764 }
1765}
1766
1767STATIC inline void
1768xfs_icsb_unlock_all_counters(
1769 xfs_mount_t *mp)
1770{
1771 xfs_icsb_cnts_t *cntp;
1772 int i;
1773
1774 for_each_online_cpu(i) {
1775 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1776 spin_unlock(&cntp->icsb_lock);
1777 }
1778}
1779
1780STATIC void
1781xfs_icsb_count(
1782 xfs_mount_t *mp,
1783 xfs_icsb_cnts_t *cnt,
1784 int flags)
1785{
1786 xfs_icsb_cnts_t *cntp;
1787 int i;
1788
1789 memset(cnt, 0, sizeof(xfs_icsb_cnts_t));
1790
1791 if (!(flags & XFS_ICSB_LAZY_COUNT))
1792 xfs_icsb_lock_all_counters(mp);
1793
1794 for_each_online_cpu(i) {
1795 cntp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, i);
1796 cnt->icsb_icount += cntp->icsb_icount;
1797 cnt->icsb_ifree += cntp->icsb_ifree;
1798 cnt->icsb_fdblocks += cntp->icsb_fdblocks;
1799 }
1800
1801 if (!(flags & XFS_ICSB_LAZY_COUNT))
1802 xfs_icsb_unlock_all_counters(mp);
1803}
1804
1805STATIC int
1806xfs_icsb_counter_disabled(
1807 xfs_mount_t *mp,
1808 xfs_sb_field_t field)
1809{
1810 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1811 return test_bit(field, &mp->m_icsb_counters);
1812}
1813
1814STATIC int
1815xfs_icsb_disable_counter(
1816 xfs_mount_t *mp,
1817 xfs_sb_field_t field)
1818{
1819 xfs_icsb_cnts_t cnt;
1820
1821 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1822
1823 xfs_icsb_lock_all_counters(mp);
1824 if (!test_and_set_bit(field, &mp->m_icsb_counters)) {
1825 /* drain back to superblock */
1826
1827 xfs_icsb_count(mp, &cnt, XFS_ICSB_SB_LOCKED|XFS_ICSB_LAZY_COUNT);
1828 switch(field) {
1829 case XFS_SBS_ICOUNT:
1830 mp->m_sb.sb_icount = cnt.icsb_icount;
1831 break;
1832 case XFS_SBS_IFREE:
1833 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1834 break;
1835 case XFS_SBS_FDBLOCKS:
1836 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1837 break;
1838 default:
1839 BUG();
1840 }
1841 }
1842
1843 xfs_icsb_unlock_all_counters(mp);
1844
1845 return 0;
1846}
1847
1848STATIC void
1849xfs_icsb_enable_counter(
1850 xfs_mount_t *mp,
1851 xfs_sb_field_t field,
1852 uint64_t count,
1853 uint64_t resid)
1854{
1855 xfs_icsb_cnts_t *cntp;
1856 int i;
1857
1858 ASSERT((field >= XFS_SBS_ICOUNT) && (field <= XFS_SBS_FDBLOCKS));
1859
1860 xfs_icsb_lock_all_counters(mp);
1861 for_each_online_cpu(i) {
1862 cntp = per_cpu_ptr(mp->m_sb_cnts, i);
1863 switch (field) {
1864 case XFS_SBS_ICOUNT:
1865 cntp->icsb_icount = count + resid;
1866 break;
1867 case XFS_SBS_IFREE:
1868 cntp->icsb_ifree = count + resid;
1869 break;
1870 case XFS_SBS_FDBLOCKS:
1871 cntp->icsb_fdblocks = count + resid;
1872 break;
1873 default:
1874 BUG();
1875 break;
1876 }
1877 resid = 0;
1878 }
1879 clear_bit(field, &mp->m_icsb_counters);
1880 xfs_icsb_unlock_all_counters(mp);
1881}
1882
1883STATIC void
1884xfs_icsb_sync_counters_int(
1885 xfs_mount_t *mp,
1886 int flags)
1887{
1888 xfs_icsb_cnts_t cnt;
1889 int s;
1890
1891 /* Pass 1: lock all counters */
1892 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
1893 s = XFS_SB_LOCK(mp);
1894
1895 xfs_icsb_count(mp, &cnt, flags);
1896
1897 /* Step 3: update mp->m_sb fields */
1898 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_ICOUNT))
1899 mp->m_sb.sb_icount = cnt.icsb_icount;
1900 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_IFREE))
1901 mp->m_sb.sb_ifree = cnt.icsb_ifree;
1902 if (!xfs_icsb_counter_disabled(mp, XFS_SBS_FDBLOCKS))
1903 mp->m_sb.sb_fdblocks = cnt.icsb_fdblocks;
1904
1905 if ((flags & XFS_ICSB_SB_LOCKED) == 0)
1906 XFS_SB_UNLOCK(mp, s);
1907}
1908
1909/*
1910 * Accurate update of per-cpu counters to incore superblock
1911 */
1912STATIC void
1913xfs_icsb_sync_counters(
1914 xfs_mount_t *mp)
1915{
1916 xfs_icsb_sync_counters_int(mp, 0);
1917}
1918
1919/*
1920 * lazy addition used for things like df, background sb syncs, etc
1921 */
1922void
1923xfs_icsb_sync_counters_lazy(
1924 xfs_mount_t *mp)
1925{
1926 xfs_icsb_sync_counters_int(mp, XFS_ICSB_LAZY_COUNT);
1927}
1928
1929/*
1930 * Balance and enable/disable counters as necessary.
1931 *
1932 * Thresholds for re-enabling counters are somewhat magic.
1933 * inode counts are chosen to be the same number as single
1934 * on disk allocation chunk per CPU, and free blocks is
1935 * something far enough zero that we aren't going thrash
1936 * when we get near ENOSPC.
1937 */
1938#define XFS_ICSB_INO_CNTR_REENABLE 64
1939#define XFS_ICSB_FDBLK_CNTR_REENABLE 512
1940STATIC void
1941xfs_icsb_balance_counter(
1942 xfs_mount_t *mp,
1943 xfs_sb_field_t field,
1944 int flags)
1945{
1946 uint64_t count, resid = 0;
1947 int weight = num_online_cpus();
1948 int s;
1949
1950 if (!(flags & XFS_ICSB_SB_LOCKED))
1951 s = XFS_SB_LOCK(mp);
1952
1953 /* disable counter and sync counter */
1954 xfs_icsb_disable_counter(mp, field);
1955
1956 /* update counters - first CPU gets residual*/
1957 switch (field) {
1958 case XFS_SBS_ICOUNT:
1959 count = mp->m_sb.sb_icount;
1960 resid = do_div(count, weight);
1961 if (count < XFS_ICSB_INO_CNTR_REENABLE)
1962 goto out;
1963 break;
1964 case XFS_SBS_IFREE:
1965 count = mp->m_sb.sb_ifree;
1966 resid = do_div(count, weight);
1967 if (count < XFS_ICSB_INO_CNTR_REENABLE)
1968 goto out;
1969 break;
1970 case XFS_SBS_FDBLOCKS:
1971 count = mp->m_sb.sb_fdblocks;
1972 resid = do_div(count, weight);
1973 if (count < XFS_ICSB_FDBLK_CNTR_REENABLE)
1974 goto out;
1975 break;
1976 default:
1977 BUG();
1978 break;
1979 }
1980
1981 xfs_icsb_enable_counter(mp, field, count, resid);
1982out:
1983 if (!(flags & XFS_ICSB_SB_LOCKED))
1984 XFS_SB_UNLOCK(mp, s);
1985}
1986
1987STATIC int
1988xfs_icsb_modify_counters_int(
1989 xfs_mount_t *mp,
1990 xfs_sb_field_t field,
1991 int delta,
1992 int rsvd,
1993 int flags)
1994{
1995 xfs_icsb_cnts_t *icsbp;
1996 long long lcounter; /* long counter for 64 bit fields */
1997 int cpu, s, locked = 0;
1998 int ret = 0, balance_done = 0;
1999
2000again:
2001 cpu = get_cpu();
2002 icsbp = (xfs_icsb_cnts_t *)per_cpu_ptr(mp->m_sb_cnts, cpu),
2003 spin_lock(&icsbp->icsb_lock);
2004 if (unlikely(xfs_icsb_counter_disabled(mp, field)))
2005 goto slow_path;
2006
2007 switch (field) {
2008 case XFS_SBS_ICOUNT:
2009 lcounter = icsbp->icsb_icount;
2010 lcounter += delta;
2011 if (unlikely(lcounter < 0))
2012 goto slow_path;
2013 icsbp->icsb_icount = lcounter;
2014 break;
2015
2016 case XFS_SBS_IFREE:
2017 lcounter = icsbp->icsb_ifree;
2018 lcounter += delta;
2019 if (unlikely(lcounter < 0))
2020 goto slow_path;
2021 icsbp->icsb_ifree = lcounter;
2022 break;
2023
2024 case XFS_SBS_FDBLOCKS:
2025 BUG_ON((mp->m_resblks - mp->m_resblks_avail) != 0);
2026
2027 lcounter = icsbp->icsb_fdblocks;
2028 lcounter += delta;
2029 if (unlikely(lcounter < 0))
2030 goto slow_path;
2031 icsbp->icsb_fdblocks = lcounter;
2032 break;
2033 default:
2034 BUG();
2035 break;
2036 }
2037 spin_unlock(&icsbp->icsb_lock);
2038 put_cpu();
2039 if (locked)
2040 XFS_SB_UNLOCK(mp, s);
2041 return 0;
2042
2043 /*
2044 * The slow path needs to be run with the SBLOCK
2045 * held so that we prevent other threads from
2046 * attempting to run this path at the same time.
2047 * this provides exclusion for the balancing code,
2048 * and exclusive fallback if the balance does not
2049 * provide enough resources to continue in an unlocked
2050 * manner.
2051 */
2052slow_path:
2053 spin_unlock(&icsbp->icsb_lock);
2054 put_cpu();
2055
2056 /* need to hold superblock incase we need
2057 * to disable a counter */
2058 if (!(flags & XFS_ICSB_SB_LOCKED)) {
2059 s = XFS_SB_LOCK(mp);
2060 locked = 1;
2061 flags |= XFS_ICSB_SB_LOCKED;
2062 }
2063 if (!balance_done) {
2064 xfs_icsb_balance_counter(mp, field, flags);
2065 balance_done = 1;
2066 goto again;
2067 } else {
2068 /*
2069 * we might not have enough on this local
2070 * cpu to allocate for a bulk request.
2071 * We need to drain this field from all CPUs
2072 * and disable the counter fastpath
2073 */
2074 xfs_icsb_disable_counter(mp, field);
2075 }
2076
2077 ret = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd);
2078
2079 if (locked)
2080 XFS_SB_UNLOCK(mp, s);
2081 return ret;
2082}
2083
2084STATIC int
2085xfs_icsb_modify_counters(
2086 xfs_mount_t *mp,
2087 xfs_sb_field_t field,
2088 int delta,
2089 int rsvd)
2090{
2091 return xfs_icsb_modify_counters_int(mp, field, delta, rsvd, 0);
2092}
2093
2094/*
2095 * Called when superblock is already locked
2096 */
2097STATIC int
2098xfs_icsb_modify_counters_locked(
2099 xfs_mount_t *mp,
2100 xfs_sb_field_t field,
2101 int delta,
2102 int rsvd)
2103{
2104 return xfs_icsb_modify_counters_int(mp, field, delta,
2105 rsvd, XFS_ICSB_SB_LOCKED);
2106}
2107#endif
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4c9817a80435..7cca5110ca44 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -267,6 +267,32 @@ typedef struct xfs_ioops {
267#define XFS_IODONE(vfsp) \ 267#define XFS_IODONE(vfsp) \
268 (*(mp)->m_io_ops.xfs_iodone)(vfsp) 268 (*(mp)->m_io_ops.xfs_iodone)(vfsp)
269 269
270#ifdef HAVE_PERCPU_SB
271
272/*
273 * Valid per-cpu incore superblock counters. Note that if you add new counters,
274 * you may need to define new counter disabled bit field descriptors as there
275 * are more possible fields in the superblock that can fit in a bitfield on a
276 * 32 bit platform. The XFS_SBS_* values for the current current counters just
277 * fit.
278 */
279typedef struct xfs_icsb_cnts {
280 uint64_t icsb_fdblocks;
281 uint64_t icsb_ifree;
282 uint64_t icsb_icount;
283 spinlock_t icsb_lock;
284} xfs_icsb_cnts_t;
285
286#define XFS_ICSB_SB_LOCKED (1 << 0) /* sb already locked */
287#define XFS_ICSB_LAZY_COUNT (1 << 1) /* accuracy not needed */
288
289extern int xfs_icsb_init_counters(struct xfs_mount *);
290extern void xfs_icsb_sync_counters_lazy(struct xfs_mount *);
291
292#else
293#define xfs_icsb_init_counters(mp) (0)
294#define xfs_icsb_sync_counters_lazy(mp) do { } while (0)
295#endif
270 296
271typedef struct xfs_mount { 297typedef struct xfs_mount {
272 bhv_desc_t m_bhv; /* vfs xfs behavior */ 298 bhv_desc_t m_bhv; /* vfs xfs behavior */
@@ -372,6 +398,10 @@ typedef struct xfs_mount {
372 struct xfs_qmops m_qm_ops; /* vector of XQM ops */ 398 struct xfs_qmops m_qm_ops; /* vector of XQM ops */
373 struct xfs_ioops m_io_ops; /* vector of I/O ops */ 399 struct xfs_ioops m_io_ops; /* vector of I/O ops */
374 atomic_t m_active_trans; /* number trans frozen */ 400 atomic_t m_active_trans; /* number trans frozen */
401#ifdef HAVE_PERCPU_SB
402 xfs_icsb_cnts_t *m_sb_cnts; /* per-cpu superblock counters */
403 unsigned long m_icsb_counters; /* disabled per-cpu counters */
404#endif
375} xfs_mount_t; 405} xfs_mount_t;
376 406
377/* 407/*
@@ -409,6 +439,8 @@ typedef struct xfs_mount {
409#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ 439#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */
410#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred 440#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred
411 * I/O size in stat() */ 441 * I/O size in stat() */
442#define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock
443 counters */
412 444
413 445
414/* 446/*
@@ -546,6 +578,8 @@ extern void xfs_unmountfs_close(xfs_mount_t *, struct cred *);
546extern int xfs_unmountfs_writesb(xfs_mount_t *); 578extern int xfs_unmountfs_writesb(xfs_mount_t *);
547extern int xfs_unmount_flush(xfs_mount_t *, int); 579extern int xfs_unmount_flush(xfs_mount_t *, int);
548extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int); 580extern int xfs_mod_incore_sb(xfs_mount_t *, xfs_sb_field_t, int, int);
581extern int xfs_mod_incore_sb_unlocked(xfs_mount_t *, xfs_sb_field_t,
582 int, int);
549extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *, 583extern int xfs_mod_incore_sb_batch(xfs_mount_t *, xfs_mod_sb_t *,
550 uint, int); 584 uint, int);
551extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int); 585extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
diff --git a/fs/xfs/xfs_vfsops.c b/fs/xfs/xfs_vfsops.c
index 2a0a9efb8ccb..2e1045837881 100644
--- a/fs/xfs/xfs_vfsops.c
+++ b/fs/xfs/xfs_vfsops.c
@@ -55,7 +55,7 @@
55#include "xfs_clnt.h" 55#include "xfs_clnt.h"
56#include "xfs_fsops.h" 56#include "xfs_fsops.h"
57 57
58STATIC int xfs_sync(bhv_desc_t *, int, cred_t *); 58STATIC int xfs_sync(bhv_desc_t *, int, cred_t *);
59 59
60int 60int
61xfs_init(void) 61xfs_init(void)
@@ -807,6 +807,7 @@ xfs_statvfs(
807 807
808 statp->f_type = XFS_SB_MAGIC; 808 statp->f_type = XFS_SB_MAGIC;
809 809
810 xfs_icsb_sync_counters_lazy(mp);
810 s = XFS_SB_LOCK(mp); 811 s = XFS_SB_LOCK(mp);
811 statp->f_bsize = sbp->sb_blocksize; 812 statp->f_bsize = sbp->sb_blocksize;
812 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0; 813 lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;