diff options
-rw-r--r-- | fs/gfs2/rgrp.c | 94 |
1 files changed, 90 insertions, 4 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 669b89b95ccc..bdf3e644baae 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -1681,6 +1681,88 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
1681 | return; | 1681 | return; |
1682 | } | 1682 | } |
1683 | 1683 | ||
1684 | /** | ||
1685 | * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested | ||
1686 | * @rgd: The rgrp in question | ||
1687 | * @loops: An indication of how picky we can be (0=very, 1=less so) | ||
1688 | * | ||
1689 | * This function uses the recently added glock statistics in order to | ||
1690 | * figure out whether a parciular resource group is suffering from | ||
1691 | * contention from multiple nodes. This is done purely on the basis | ||
1692 | * of timings, since this is the only data we have to work with and | ||
1693 | * our aim here is to reject a resource group which is highly contended | ||
1694 | * but (very important) not to do this too often in order to ensure that | ||
1695 | * we do not land up introducing fragmentation by changing resource | ||
1696 | * groups when not actually required. | ||
1697 | * | ||
1698 | * The calculation is fairly simple, we want to know whether the SRTTB | ||
1699 | * (i.e. smoothed round trip time for blocking operations) to acquire | ||
1700 | * the lock for this rgrp's glock is significantly greater than the | ||
1701 | * time taken for resource groups on average. We introduce a margin in | ||
1702 | * the form of the variable @var which is computed as the sum of the two | ||
1703 | * respective variences, and multiplied by a factor depending on @loops | ||
1704 | * and whether we have a lot of data to base the decision on. This is | ||
1705 | * then tested against the square difference of the means in order to | ||
1706 | * decide whether the result is statistically significant or not. | ||
1707 | * | ||
1708 | * Returns: A boolean verdict on the congestion status | ||
1709 | */ | ||
1710 | |||
1711 | static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) | ||
1712 | { | ||
1713 | const struct gfs2_glock *gl = rgd->rd_gl; | ||
1714 | const struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1715 | struct gfs2_lkstats *st; | ||
1716 | s64 r_dcount, l_dcount; | ||
1717 | s64 r_srttb, l_srttb; | ||
1718 | s64 srttb_diff; | ||
1719 | s64 sqr_diff; | ||
1720 | s64 var; | ||
1721 | |||
1722 | preempt_disable(); | ||
1723 | st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; | ||
1724 | r_srttb = st->stats[GFS2_LKS_SRTTB]; | ||
1725 | r_dcount = st->stats[GFS2_LKS_DCOUNT]; | ||
1726 | var = st->stats[GFS2_LKS_SRTTVARB] + | ||
1727 | gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; | ||
1728 | preempt_enable(); | ||
1729 | |||
1730 | l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; | ||
1731 | l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; | ||
1732 | |||
1733 | if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0)) | ||
1734 | return false; | ||
1735 | |||
1736 | srttb_diff = r_srttb - l_srttb; | ||
1737 | sqr_diff = srttb_diff * srttb_diff; | ||
1738 | |||
1739 | var *= 2; | ||
1740 | if (l_dcount < 8 || r_dcount < 8) | ||
1741 | var *= 2; | ||
1742 | if (loops == 1) | ||
1743 | var *= 2; | ||
1744 | |||
1745 | return ((srttb_diff < 0) && (sqr_diff > var)); | ||
1746 | } | ||
1747 | |||
1748 | /** | ||
1749 | * gfs2_rgrp_used_recently | ||
1750 | * @rs: The block reservation with the rgrp to test | ||
1751 | * @msecs: The time limit in milliseconds | ||
1752 | * | ||
1753 | * Returns: True if the rgrp glock has been used within the time limit | ||
1754 | */ | ||
1755 | static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, | ||
1756 | u64 msecs) | ||
1757 | { | ||
1758 | u64 tdiff; | ||
1759 | |||
1760 | tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), | ||
1761 | rs->rs_rbm.rgd->rd_gl->gl_dstamp)); | ||
1762 | |||
1763 | return tdiff > (msecs * 1000 * 1000); | ||
1764 | } | ||
1765 | |||
1684 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) | 1766 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) |
1685 | { | 1767 | { |
1686 | struct gfs2_rgrpd *rgd = *pos; | 1768 | struct gfs2_rgrpd *rgd = *pos; |
@@ -1707,7 +1789,7 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1707 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1789 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1708 | struct gfs2_rgrpd *begin = NULL; | 1790 | struct gfs2_rgrpd *begin = NULL; |
1709 | struct gfs2_blkreserv *rs = ip->i_res; | 1791 | struct gfs2_blkreserv *rs = ip->i_res; |
1710 | int error = 0, rg_locked, flags = LM_FLAG_TRY; | 1792 | int error = 0, rg_locked, flags = 0; |
1711 | u64 last_unlinked = NO_BLOCK; | 1793 | u64 last_unlinked = NO_BLOCK; |
1712 | int loops = 0; | 1794 | int loops = 0; |
1713 | 1795 | ||
@@ -1731,13 +1813,18 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1731 | 1813 | ||
1732 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { | 1814 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { |
1733 | rg_locked = 0; | 1815 | rg_locked = 0; |
1816 | if (!gfs2_rs_active(rs) && (loops < 2) && | ||
1817 | gfs2_rgrp_used_recently(rs, 1000) && | ||
1818 | gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) | ||
1819 | goto next_rgrp; | ||
1734 | error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, | 1820 | error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, |
1735 | LM_ST_EXCLUSIVE, flags, | 1821 | LM_ST_EXCLUSIVE, flags, |
1736 | &rs->rs_rgd_gh); | 1822 | &rs->rs_rgd_gh); |
1737 | if (error == GLR_TRYFAILED) | ||
1738 | goto next_rgrp; | ||
1739 | if (unlikely(error)) | 1823 | if (unlikely(error)) |
1740 | return error; | 1824 | return error; |
1825 | if (!gfs2_rs_active(rs) && (loops < 2) && | ||
1826 | gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) | ||
1827 | goto skip_rgrp; | ||
1741 | if (sdp->sd_args.ar_rgrplvb) { | 1828 | if (sdp->sd_args.ar_rgrplvb) { |
1742 | error = update_rgrp_lvb(rs->rs_rbm.rgd); | 1829 | error = update_rgrp_lvb(rs->rs_rbm.rgd); |
1743 | if (unlikely(error)) { | 1830 | if (unlikely(error)) { |
@@ -1789,7 +1876,6 @@ next_rgrp: | |||
1789 | * then this checks for some less likely conditions before | 1876 | * then this checks for some less likely conditions before |
1790 | * trying again. | 1877 | * trying again. |
1791 | */ | 1878 | */ |
1792 | flags &= ~LM_FLAG_TRY; | ||
1793 | loops++; | 1879 | loops++; |
1794 | /* Check that fs hasn't grown if writing to rindex */ | 1880 | /* Check that fs hasn't grown if writing to rindex */ |
1795 | if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { | 1881 | if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { |