1 files changed, 90 insertions, 4 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 669b89b95ccc..bdf3e644baae 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -1681,6 +1681,88 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
        return;
 }
+/**
+ * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
+ * @rgd: The rgrp in question
+ * @loops: An indication of how picky we can be (0=very, 1=less so)
+ *
+ * This function uses the recently added glock statistics in order to
+ * figure out whether a parciular resource group is suffering from
+ * contention from multiple nodes. This is done purely on the basis
+ * of timings, since this is the only data we have to work with and
+ * our aim here is to reject a resource group which is highly contended
+ * but (very important) not to do this too often in order to ensure that
+ * we do not land up introducing fragmentation by changing resource
+ * groups when not actually required.
+ *
+ * The calculation is fairly simple, we want to know whether the SRTTB
+ * (i.e. smoothed round trip time for blocking operations) to acquire
+ * the lock for this rgrp's glock is significantly greater than the
+ * time taken for resource groups on average. We introduce a margin in
+ * the form of the variable @var which is computed as the sum of the two
+ * respective variences, and multiplied by a factor depending on @loops
+ * and whether we have a lot of data to base the decision on. This is
+ * then tested against the square difference of the means in order to
+ * decide whether the result is statistically significant or not.
+ *
+ * Returns: A boolean verdict on the congestion status
+ */
+static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
+{
+        const struct gfs2_glock *gl = rgd->rd_gl;
+        const struct gfs2_sbd *sdp = gl->gl_sbd;
+        struct gfs2_lkstats *st;
+        s64 r_dcount, l_dcount;
+        s64 r_srttb, l_srttb;
+        s64 srttb_diff;
+        s64 sqr_diff;
+        s64 var;
+        preempt_disable();
+        st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
+        r_srttb = st->stats[GFS2_LKS_SRTTB];
+        r_dcount = st->stats[GFS2_LKS_DCOUNT];
+        var = st->stats[GFS2_LKS_SRTTVARB] +
+              gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
+        preempt_enable();
+        l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
+        l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
+        if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
+                return false;
+        srttb_diff = r_srttb - l_srttb;
+        sqr_diff = srttb_diff * srttb_diff;
+        var *= 2;
+        if (l_dcount < 8 || r_dcount < 8)
+                var *= 2;
+        if (loops == 1)
+                var *= 2;
+        return ((srttb_diff < 0) && (sqr_diff > var));
+}
+/**
+ * gfs2_rgrp_used_recently
+ * @rs: The block reservation with the rgrp to test
+ * @msecs: The time limit in milliseconds
+ *
+ * Returns: True if the rgrp glock has been used within the time limit
+ */
+static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
+                                    u64 msecs)
+{
+        u64 tdiff;
+        tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
+                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
+        return tdiff > (msecs * 1000 * 1000);
+}
 static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
 {
        struct gfs2_rgrpd *rgd = *pos;
@@ -1707,7 +1789,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *begin = NULL;
        struct gfs2_blkreserv *rs = ip->i_res;
-        int error = 0, rg_locked, flags = LM_FLAG_TRY;
+        int error = 0, rg_locked, flags = 0;
        u64 last_unlinked = NO_BLOCK;
        int loops = 0;
@@ -1731,13 +1813,18 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
                if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
                        rg_locked = 0;
+                        if (!gfs2_rs_active(rs) && (loops < 2) &&
+                             gfs2_rgrp_used_recently(rs, 1000) &&
+                             gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+                                goto next_rgrp;
                        error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
                                                   LM_ST_EXCLUSIVE, flags,
                                                   &rs->rs_rgd_gh);
-                        if (error == GLR_TRYFAILED)
-                                goto next_rgrp;
                        if (unlikely(error))
                                return error;
+                        if (!gfs2_rs_active(rs) && (loops < 2) &&
+                            gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+                                goto skip_rgrp;
                        if (sdp->sd_args.ar_rgrplvb) {
                                error = update_rgrp_lvb(rs->rs_rbm.rgd);
                                if (unlikely(error)) {
@@ -1789,7 +1876,6 @@ next_rgrp:
                 * then this checks for some less likely conditions before
                 * trying again.
                 */
-                flags &= ~LM_FLAG_TRY;
                loops++;
                /* Check that fs hasn't grown if writing to rindex */
                if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 669b89b95ccc..bdf3e644baae 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c
@@ -1681,6 +1681,88 @@ static void try_rgrp_unlink(struct gfs2_rgrpd rgd, u64 last_unlinked, u64 skip
1681	return;	1681	return;
1682	}	1682	}
1683		1683
		1684	/**
		1685	* gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
		1686	* @rgd: The rgrp in question
		1687	* @loops: An indication of how picky we can be (0=very, 1=less so)
		1688	*
		1689	* This function uses the recently added glock statistics in order to
		1690	* figure out whether a parciular resource group is suffering from
		1691	* contention from multiple nodes. This is done purely on the basis
		1692	* of timings, since this is the only data we have to work with and
		1693	* our aim here is to reject a resource group which is highly contended
		1694	* but (very important) not to do this too often in order to ensure that
		1695	* we do not land up introducing fragmentation by changing resource
		1696	* groups when not actually required.
		1697	*
		1698	* The calculation is fairly simple, we want to know whether the SRTTB
		1699	* (i.e. smoothed round trip time for blocking operations) to acquire
		1700	* the lock for this rgrp's glock is significantly greater than the
		1701	* time taken for resource groups on average. We introduce a margin in
		1702	* the form of the variable @var which is computed as the sum of the two
		1703	* respective variences, and multiplied by a factor depending on @loops
		1704	* and whether we have a lot of data to base the decision on. This is
		1705	* then tested against the square difference of the means in order to
		1706	* decide whether the result is statistically significant or not.
		1707	*
		1708	* Returns: A boolean verdict on the congestion status
		1709	*/
		1710
		1711	static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
		1712	{
		1713	const struct gfs2_glock *gl = rgd->rd_gl;
		1714	const struct gfs2_sbd *sdp = gl->gl_sbd;
		1715	struct gfs2_lkstats *st;
		1716	s64 r_dcount, l_dcount;
		1717	s64 r_srttb, l_srttb;
		1718	s64 srttb_diff;
		1719	s64 sqr_diff;
		1720	s64 var;
		1721
		1722	preempt_disable();
		1723	st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
		1724	r_srttb = st->stats[GFS2_LKS_SRTTB];
		1725	r_dcount = st->stats[GFS2_LKS_DCOUNT];
		1726	var = st->stats[GFS2_LKS_SRTTVARB] +
		1727	gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
		1728	preempt_enable();
		1729
		1730	l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
		1731	l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
		1732
		1733	if ((l_dcount < 1) \|\| (r_dcount < 1) \|\| (r_srttb == 0))
		1734	return false;
		1735
		1736	srttb_diff = r_srttb - l_srttb;
		1737	sqr_diff = srttb_diff * srttb_diff;
		1738
		1739	var *= 2;
		1740	if (l_dcount < 8 \|\| r_dcount < 8)
		1741	var *= 2;
		1742	if (loops == 1)
		1743	var *= 2;
		1744
		1745	return ((srttb_diff < 0) && (sqr_diff > var));
		1746	}
		1747
		1748	/**
		1749	* gfs2_rgrp_used_recently
		1750	* @rs: The block reservation with the rgrp to test
		1751	* @msecs: The time limit in milliseconds
		1752	*
		1753	* Returns: True if the rgrp glock has been used within the time limit
		1754	*/
		1755	static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
		1756	u64 msecs)
		1757	{
		1758	u64 tdiff;
		1759
		1760	tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
		1761	rs->rs_rbm.rgd->rd_gl->gl_dstamp));
		1762
		1763	return tdiff > (msecs * 1000 * 1000);
		1764	}
		1765
1684	static bool gfs2_select_rgrp(struct gfs2_rgrpd *pos, const struct gfs2_rgrpd begin)	1766	static bool gfs2_select_rgrp(struct gfs2_rgrpd *pos, const struct gfs2_rgrpd begin)
1685	{	1767	{
1686	struct gfs2_rgrpd rgd = pos;	1768	struct gfs2_rgrpd rgd = pos;
@@ -1707,7 +1789,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1707	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);	1789	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1708	struct gfs2_rgrpd *begin = NULL;	1790	struct gfs2_rgrpd *begin = NULL;
1709	struct gfs2_blkreserv *rs = ip->i_res;	1791	struct gfs2_blkreserv *rs = ip->i_res;
1710	int error = 0, rg_locked, flags = LM_FLAG_TRY;	1792	int error = 0, rg_locked, flags = 0;
1711	u64 last_unlinked = NO_BLOCK;	1793	u64 last_unlinked = NO_BLOCK;
1712	int loops = 0;	1794	int loops = 0;
1713		1795
@@ -1731,13 +1813,18 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1731		1813
1732	if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {	1814	if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1733	rg_locked = 0;	1815	rg_locked = 0;
		1816	if (!gfs2_rs_active(rs) && (loops < 2) &&
		1817	gfs2_rgrp_used_recently(rs, 1000) &&
		1818	gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
		1819	goto next_rgrp;
1734	error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,	1820	error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1735	LM_ST_EXCLUSIVE, flags,	1821	LM_ST_EXCLUSIVE, flags,
1736	&rs->rs_rgd_gh);	1822	&rs->rs_rgd_gh);
1737	if (error == GLR_TRYFAILED)
1738	goto next_rgrp;
1739	if (unlikely(error))	1823	if (unlikely(error))
1740	return error;	1824	return error;
		1825	if (!gfs2_rs_active(rs) && (loops < 2) &&
		1826	gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
		1827	goto skip_rgrp;
1741	if (sdp->sd_args.ar_rgrplvb) {	1828	if (sdp->sd_args.ar_rgrplvb) {
1742	error = update_rgrp_lvb(rs->rs_rbm.rgd);	1829	error = update_rgrp_lvb(rs->rs_rbm.rgd);
1743	if (unlikely(error)) {	1830	if (unlikely(error)) {
@@ -1789,7 +1876,6 @@ next_rgrp:
1789	* then this checks for some less likely conditions before	1876	* then this checks for some less likely conditions before
1790	* trying again.	1877	* trying again.
1791	*/	1878	*/
1792	flags &= ~LM_FLAG_TRY;
1793	loops++;	1879	loops++;
1794	/* Check that fs hasn't grown if writing to rindex */	1880	/* Check that fs hasn't grown if writing to rindex */
1795	if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {	1881	if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {