aboutsummaryrefslogtreecommitdiffstats
path: root/fs/gfs2/rgrp.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r--fs/gfs2/rgrp.c139
1 files changed, 123 insertions, 16 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 38fe18f2f055..37ee061d899e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -16,6 +16,7 @@
16#include <linux/prefetch.h> 16#include <linux/prefetch.h>
17#include <linux/blkdev.h> 17#include <linux/blkdev.h>
18#include <linux/rbtree.h> 18#include <linux/rbtree.h>
19#include <linux/random.h>
19 20
20#include "gfs2.h" 21#include "gfs2.h"
21#include "incore.h" 22#include "incore.h"
@@ -251,22 +252,25 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,
251static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) 252static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
252{ 253{
253 u64 rblock = block - rbm->rgd->rd_data0; 254 u64 rblock = block - rbm->rgd->rd_data0;
254 u32 goal = (u32)rblock; 255 u32 x;
255 int x;
256 256
257 if (WARN_ON_ONCE(rblock > UINT_MAX)) 257 if (WARN_ON_ONCE(rblock > UINT_MAX))
258 return -EINVAL; 258 return -EINVAL;
259 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) 259 if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data)
260 return -E2BIG; 260 return -E2BIG;
261 261
262 for (x = 0; x < rbm->rgd->rd_length; x++) { 262 rbm->bi = rbm->rgd->rd_bits;
263 rbm->bi = rbm->rgd->rd_bits + x; 263 rbm->offset = (u32)(rblock);
264 if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { 264 /* Check if the block is within the first block */
265 rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); 265 if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY)
266 break; 266 return 0;
267 }
268 }
269 267
268 /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */
269 rbm->offset += (sizeof(struct gfs2_rgrp) -
270 sizeof(struct gfs2_meta_header)) * GFS2_NBBY;
271 x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
272 rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap;
273 rbm->bi += x;
270 return 0; 274 return 0;
271} 275}
272 276
@@ -875,7 +879,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
875 goto fail; 879 goto fail;
876 880
877 rgd->rd_gl->gl_object = rgd; 881 rgd->rd_gl->gl_object = rgd;
878 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; 882 rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
879 rgd->rd_flags &= ~GFS2_RDF_UPTODATE; 883 rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
880 if (rgd->rd_data > sdp->sd_max_rg_data) 884 if (rgd->rd_data > sdp->sd_max_rg_data)
881 sdp->sd_max_rg_data = rgd->rd_data; 885 sdp->sd_max_rg_data = rgd->rd_data;
@@ -1678,13 +1682,105 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
1678 return; 1682 return;
1679} 1683}
1680 1684
1685/**
1686 * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested
1687 * @rgd: The rgrp in question
1688 * @loops: An indication of how picky we can be (0=very, 1=less so)
1689 *
1690 * This function uses the recently added glock statistics in order to
1691 * figure out whether a parciular resource group is suffering from
1692 * contention from multiple nodes. This is done purely on the basis
1693 * of timings, since this is the only data we have to work with and
1694 * our aim here is to reject a resource group which is highly contended
1695 * but (very important) not to do this too often in order to ensure that
1696 * we do not land up introducing fragmentation by changing resource
1697 * groups when not actually required.
1698 *
1699 * The calculation is fairly simple, we want to know whether the SRTTB
1700 * (i.e. smoothed round trip time for blocking operations) to acquire
1701 * the lock for this rgrp's glock is significantly greater than the
1702 * time taken for resource groups on average. We introduce a margin in
1703 * the form of the variable @var which is computed as the sum of the two
1704 * respective variences, and multiplied by a factor depending on @loops
1705 * and whether we have a lot of data to base the decision on. This is
1706 * then tested against the square difference of the means in order to
1707 * decide whether the result is statistically significant or not.
1708 *
1709 * Returns: A boolean verdict on the congestion status
1710 */
1711
1712static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops)
1713{
1714 const struct gfs2_glock *gl = rgd->rd_gl;
1715 const struct gfs2_sbd *sdp = gl->gl_sbd;
1716 struct gfs2_lkstats *st;
1717 s64 r_dcount, l_dcount;
1718 s64 r_srttb, l_srttb;
1719 s64 srttb_diff;
1720 s64 sqr_diff;
1721 s64 var;
1722
1723 preempt_disable();
1724 st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP];
1725 r_srttb = st->stats[GFS2_LKS_SRTTB];
1726 r_dcount = st->stats[GFS2_LKS_DCOUNT];
1727 var = st->stats[GFS2_LKS_SRTTVARB] +
1728 gl->gl_stats.stats[GFS2_LKS_SRTTVARB];
1729 preempt_enable();
1730
1731 l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB];
1732 l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT];
1733
1734 if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0))
1735 return false;
1736
1737 srttb_diff = r_srttb - l_srttb;
1738 sqr_diff = srttb_diff * srttb_diff;
1739
1740 var *= 2;
1741 if (l_dcount < 8 || r_dcount < 8)
1742 var *= 2;
1743 if (loops == 1)
1744 var *= 2;
1745
1746 return ((srttb_diff < 0) && (sqr_diff > var));
1747}
1748
1749/**
1750 * gfs2_rgrp_used_recently
1751 * @rs: The block reservation with the rgrp to test
1752 * @msecs: The time limit in milliseconds
1753 *
1754 * Returns: True if the rgrp glock has been used within the time limit
1755 */
1756static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
1757 u64 msecs)
1758{
1759 u64 tdiff;
1760
1761 tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
1762 rs->rs_rbm.rgd->rd_gl->gl_dstamp));
1763
1764 return tdiff > (msecs * 1000 * 1000);
1765}
1766
1767static u32 gfs2_orlov_skip(const struct gfs2_inode *ip)
1768{
1769 const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1770 u32 skip;
1771
1772 get_random_bytes(&skip, sizeof(skip));
1773 return skip % sdp->sd_rgrps;
1774}
1775
1681static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) 1776static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin)
1682{ 1777{
1683 struct gfs2_rgrpd *rgd = *pos; 1778 struct gfs2_rgrpd *rgd = *pos;
1779 struct gfs2_sbd *sdp = rgd->rd_sbd;
1684 1780
1685 rgd = gfs2_rgrpd_get_next(rgd); 1781 rgd = gfs2_rgrpd_get_next(rgd);
1686 if (rgd == NULL) 1782 if (rgd == NULL)
1687 rgd = gfs2_rgrpd_get_next(NULL); 1783 rgd = gfs2_rgrpd_get_first(sdp);
1688 *pos = rgd; 1784 *pos = rgd;
1689 if (rgd != begin) /* If we didn't wrap */ 1785 if (rgd != begin) /* If we didn't wrap */
1690 return true; 1786 return true;
@@ -1699,14 +1795,15 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b
1699 * Returns: errno 1795 * Returns: errno
1700 */ 1796 */
1701 1797
1702int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) 1798int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags)
1703{ 1799{
1704 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 1800 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1705 struct gfs2_rgrpd *begin = NULL; 1801 struct gfs2_rgrpd *begin = NULL;
1706 struct gfs2_blkreserv *rs = ip->i_res; 1802 struct gfs2_blkreserv *rs = ip->i_res;
1707 int error = 0, rg_locked, flags = LM_FLAG_TRY; 1803 int error = 0, rg_locked, flags = 0;
1708 u64 last_unlinked = NO_BLOCK; 1804 u64 last_unlinked = NO_BLOCK;
1709 int loops = 0; 1805 int loops = 0;
1806 u32 skip = 0;
1710 1807
1711 if (sdp->sd_args.ar_rgrplvb) 1808 if (sdp->sd_args.ar_rgrplvb)
1712 flags |= GL_SKIP; 1809 flags |= GL_SKIP;
@@ -1720,6 +1817,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1720 } else { 1817 } else {
1721 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); 1818 rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
1722 } 1819 }
1820 if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV))
1821 skip = gfs2_orlov_skip(ip);
1723 if (rs->rs_rbm.rgd == NULL) 1822 if (rs->rs_rbm.rgd == NULL)
1724 return -EBADSLT; 1823 return -EBADSLT;
1725 1824
@@ -1728,13 +1827,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested)
1728 1827
1729 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { 1828 if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
1730 rg_locked = 0; 1829 rg_locked = 0;
1830 if (skip && skip--)
1831 goto next_rgrp;
1832 if (!gfs2_rs_active(rs) && (loops < 2) &&
1833 gfs2_rgrp_used_recently(rs, 1000) &&
1834 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1835 goto next_rgrp;
1731 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, 1836 error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
1732 LM_ST_EXCLUSIVE, flags, 1837 LM_ST_EXCLUSIVE, flags,
1733 &rs->rs_rgd_gh); 1838 &rs->rs_rgd_gh);
1734 if (error == GLR_TRYFAILED)
1735 goto next_rgrp;
1736 if (unlikely(error)) 1839 if (unlikely(error))
1737 return error; 1840 return error;
1841 if (!gfs2_rs_active(rs) && (loops < 2) &&
1842 gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
1843 goto skip_rgrp;
1738 if (sdp->sd_args.ar_rgrplvb) { 1844 if (sdp->sd_args.ar_rgrplvb) {
1739 error = update_rgrp_lvb(rs->rs_rbm.rgd); 1845 error = update_rgrp_lvb(rs->rs_rbm.rgd);
1740 if (unlikely(error)) { 1846 if (unlikely(error)) {
@@ -1781,12 +1887,13 @@ next_rgrp:
1781 /* Find the next rgrp, and continue looking */ 1887 /* Find the next rgrp, and continue looking */
1782 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) 1888 if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
1783 continue; 1889 continue;
1890 if (skip)
1891 continue;
1784 1892
1785 /* If we've scanned all the rgrps, but found no free blocks 1893 /* If we've scanned all the rgrps, but found no free blocks
1786 * then this checks for some less likely conditions before 1894 * then this checks for some less likely conditions before
1787 * trying again. 1895 * trying again.
1788 */ 1896 */
1789 flags &= ~LM_FLAG_TRY;
1790 loops++; 1897 loops++;
1791 /* Check that fs hasn't grown if writing to rindex */ 1898 /* Check that fs hasn't grown if writing to rindex */
1792 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { 1899 if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) {