diff options
Diffstat (limited to 'fs/gfs2/rgrp.c')
-rw-r--r-- | fs/gfs2/rgrp.c | 139 |
1 files changed, 123 insertions, 16 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 38fe18f2f055..37ee061d899e 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/prefetch.h> | 16 | #include <linux/prefetch.h> |
17 | #include <linux/blkdev.h> | 17 | #include <linux/blkdev.h> |
18 | #include <linux/rbtree.h> | 18 | #include <linux/rbtree.h> |
19 | #include <linux/random.h> | ||
19 | 20 | ||
20 | #include "gfs2.h" | 21 | #include "gfs2.h" |
21 | #include "incore.h" | 22 | #include "incore.h" |
@@ -251,22 +252,25 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, | |||
251 | static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) | 252 | static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) |
252 | { | 253 | { |
253 | u64 rblock = block - rbm->rgd->rd_data0; | 254 | u64 rblock = block - rbm->rgd->rd_data0; |
254 | u32 goal = (u32)rblock; | 255 | u32 x; |
255 | int x; | ||
256 | 256 | ||
257 | if (WARN_ON_ONCE(rblock > UINT_MAX)) | 257 | if (WARN_ON_ONCE(rblock > UINT_MAX)) |
258 | return -EINVAL; | 258 | return -EINVAL; |
259 | if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) | 259 | if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) |
260 | return -E2BIG; | 260 | return -E2BIG; |
261 | 261 | ||
262 | for (x = 0; x < rbm->rgd->rd_length; x++) { | 262 | rbm->bi = rbm->rgd->rd_bits; |
263 | rbm->bi = rbm->rgd->rd_bits + x; | 263 | rbm->offset = (u32)(rblock); |
264 | if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { | 264 | /* Check if the block is within the first block */ |
265 | rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); | 265 | if (rbm->offset < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) |
266 | break; | 266 | return 0; |
267 | } | ||
268 | } | ||
269 | 267 | ||
268 | /* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ | ||
269 | rbm->offset += (sizeof(struct gfs2_rgrp) - | ||
270 | sizeof(struct gfs2_meta_header)) * GFS2_NBBY; | ||
271 | x = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; | ||
272 | rbm->offset -= x * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; | ||
273 | rbm->bi += x; | ||
270 | return 0; | 274 | return 0; |
271 | } | 275 | } |
272 | 276 | ||
@@ -875,7 +879,7 @@ static int read_rindex_entry(struct gfs2_inode *ip) | |||
875 | goto fail; | 879 | goto fail; |
876 | 880 | ||
877 | rgd->rd_gl->gl_object = rgd; | 881 | rgd->rd_gl->gl_object = rgd; |
878 | rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lvb; | 882 | rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr; |
879 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; | 883 | rgd->rd_flags &= ~GFS2_RDF_UPTODATE; |
880 | if (rgd->rd_data > sdp->sd_max_rg_data) | 884 | if (rgd->rd_data > sdp->sd_max_rg_data) |
881 | sdp->sd_max_rg_data = rgd->rd_data; | 885 | sdp->sd_max_rg_data = rgd->rd_data; |
@@ -1678,13 +1682,105 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip | |||
1678 | return; | 1682 | return; |
1679 | } | 1683 | } |
1680 | 1684 | ||
1685 | /** | ||
1686 | * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested | ||
1687 | * @rgd: The rgrp in question | ||
1688 | * @loops: An indication of how picky we can be (0=very, 1=less so) | ||
1689 | * | ||
1690 | * This function uses the recently added glock statistics in order to | ||
1691 | * figure out whether a parciular resource group is suffering from | ||
1692 | * contention from multiple nodes. This is done purely on the basis | ||
1693 | * of timings, since this is the only data we have to work with and | ||
1694 | * our aim here is to reject a resource group which is highly contended | ||
1695 | * but (very important) not to do this too often in order to ensure that | ||
1696 | * we do not land up introducing fragmentation by changing resource | ||
1697 | * groups when not actually required. | ||
1698 | * | ||
1699 | * The calculation is fairly simple, we want to know whether the SRTTB | ||
1700 | * (i.e. smoothed round trip time for blocking operations) to acquire | ||
1701 | * the lock for this rgrp's glock is significantly greater than the | ||
1702 | * time taken for resource groups on average. We introduce a margin in | ||
1703 | * the form of the variable @var which is computed as the sum of the two | ||
1704 | * respective variences, and multiplied by a factor depending on @loops | ||
1705 | * and whether we have a lot of data to base the decision on. This is | ||
1706 | * then tested against the square difference of the means in order to | ||
1707 | * decide whether the result is statistically significant or not. | ||
1708 | * | ||
1709 | * Returns: A boolean verdict on the congestion status | ||
1710 | */ | ||
1711 | |||
1712 | static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) | ||
1713 | { | ||
1714 | const struct gfs2_glock *gl = rgd->rd_gl; | ||
1715 | const struct gfs2_sbd *sdp = gl->gl_sbd; | ||
1716 | struct gfs2_lkstats *st; | ||
1717 | s64 r_dcount, l_dcount; | ||
1718 | s64 r_srttb, l_srttb; | ||
1719 | s64 srttb_diff; | ||
1720 | s64 sqr_diff; | ||
1721 | s64 var; | ||
1722 | |||
1723 | preempt_disable(); | ||
1724 | st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; | ||
1725 | r_srttb = st->stats[GFS2_LKS_SRTTB]; | ||
1726 | r_dcount = st->stats[GFS2_LKS_DCOUNT]; | ||
1727 | var = st->stats[GFS2_LKS_SRTTVARB] + | ||
1728 | gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; | ||
1729 | preempt_enable(); | ||
1730 | |||
1731 | l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; | ||
1732 | l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; | ||
1733 | |||
1734 | if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0)) | ||
1735 | return false; | ||
1736 | |||
1737 | srttb_diff = r_srttb - l_srttb; | ||
1738 | sqr_diff = srttb_diff * srttb_diff; | ||
1739 | |||
1740 | var *= 2; | ||
1741 | if (l_dcount < 8 || r_dcount < 8) | ||
1742 | var *= 2; | ||
1743 | if (loops == 1) | ||
1744 | var *= 2; | ||
1745 | |||
1746 | return ((srttb_diff < 0) && (sqr_diff > var)); | ||
1747 | } | ||
1748 | |||
1749 | /** | ||
1750 | * gfs2_rgrp_used_recently | ||
1751 | * @rs: The block reservation with the rgrp to test | ||
1752 | * @msecs: The time limit in milliseconds | ||
1753 | * | ||
1754 | * Returns: True if the rgrp glock has been used within the time limit | ||
1755 | */ | ||
1756 | static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, | ||
1757 | u64 msecs) | ||
1758 | { | ||
1759 | u64 tdiff; | ||
1760 | |||
1761 | tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), | ||
1762 | rs->rs_rbm.rgd->rd_gl->gl_dstamp)); | ||
1763 | |||
1764 | return tdiff > (msecs * 1000 * 1000); | ||
1765 | } | ||
1766 | |||
1767 | static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) | ||
1768 | { | ||
1769 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1770 | u32 skip; | ||
1771 | |||
1772 | get_random_bytes(&skip, sizeof(skip)); | ||
1773 | return skip % sdp->sd_rgrps; | ||
1774 | } | ||
1775 | |||
1681 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) | 1776 | static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) |
1682 | { | 1777 | { |
1683 | struct gfs2_rgrpd *rgd = *pos; | 1778 | struct gfs2_rgrpd *rgd = *pos; |
1779 | struct gfs2_sbd *sdp = rgd->rd_sbd; | ||
1684 | 1780 | ||
1685 | rgd = gfs2_rgrpd_get_next(rgd); | 1781 | rgd = gfs2_rgrpd_get_next(rgd); |
1686 | if (rgd == NULL) | 1782 | if (rgd == NULL) |
1687 | rgd = gfs2_rgrpd_get_next(NULL); | 1783 | rgd = gfs2_rgrpd_get_first(sdp); |
1688 | *pos = rgd; | 1784 | *pos = rgd; |
1689 | if (rgd != begin) /* If we didn't wrap */ | 1785 | if (rgd != begin) /* If we didn't wrap */ |
1690 | return true; | 1786 | return true; |
@@ -1699,14 +1795,15 @@ static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *b | |||
1699 | * Returns: errno | 1795 | * Returns: errno |
1700 | */ | 1796 | */ |
1701 | 1797 | ||
1702 | int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | 1798 | int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested, u32 aflags) |
1703 | { | 1799 | { |
1704 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1800 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1705 | struct gfs2_rgrpd *begin = NULL; | 1801 | struct gfs2_rgrpd *begin = NULL; |
1706 | struct gfs2_blkreserv *rs = ip->i_res; | 1802 | struct gfs2_blkreserv *rs = ip->i_res; |
1707 | int error = 0, rg_locked, flags = LM_FLAG_TRY; | 1803 | int error = 0, rg_locked, flags = 0; |
1708 | u64 last_unlinked = NO_BLOCK; | 1804 | u64 last_unlinked = NO_BLOCK; |
1709 | int loops = 0; | 1805 | int loops = 0; |
1806 | u32 skip = 0; | ||
1710 | 1807 | ||
1711 | if (sdp->sd_args.ar_rgrplvb) | 1808 | if (sdp->sd_args.ar_rgrplvb) |
1712 | flags |= GL_SKIP; | 1809 | flags |= GL_SKIP; |
@@ -1720,6 +1817,8 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1720 | } else { | 1817 | } else { |
1721 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); | 1818 | rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1); |
1722 | } | 1819 | } |
1820 | if (S_ISDIR(ip->i_inode.i_mode) && (aflags & GFS2_AF_ORLOV)) | ||
1821 | skip = gfs2_orlov_skip(ip); | ||
1723 | if (rs->rs_rbm.rgd == NULL) | 1822 | if (rs->rs_rbm.rgd == NULL) |
1724 | return -EBADSLT; | 1823 | return -EBADSLT; |
1725 | 1824 | ||
@@ -1728,13 +1827,20 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, u32 requested) | |||
1728 | 1827 | ||
1729 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { | 1828 | if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { |
1730 | rg_locked = 0; | 1829 | rg_locked = 0; |
1830 | if (skip && skip--) | ||
1831 | goto next_rgrp; | ||
1832 | if (!gfs2_rs_active(rs) && (loops < 2) && | ||
1833 | gfs2_rgrp_used_recently(rs, 1000) && | ||
1834 | gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) | ||
1835 | goto next_rgrp; | ||
1731 | error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, | 1836 | error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, |
1732 | LM_ST_EXCLUSIVE, flags, | 1837 | LM_ST_EXCLUSIVE, flags, |
1733 | &rs->rs_rgd_gh); | 1838 | &rs->rs_rgd_gh); |
1734 | if (error == GLR_TRYFAILED) | ||
1735 | goto next_rgrp; | ||
1736 | if (unlikely(error)) | 1839 | if (unlikely(error)) |
1737 | return error; | 1840 | return error; |
1841 | if (!gfs2_rs_active(rs) && (loops < 2) && | ||
1842 | gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) | ||
1843 | goto skip_rgrp; | ||
1738 | if (sdp->sd_args.ar_rgrplvb) { | 1844 | if (sdp->sd_args.ar_rgrplvb) { |
1739 | error = update_rgrp_lvb(rs->rs_rbm.rgd); | 1845 | error = update_rgrp_lvb(rs->rs_rbm.rgd); |
1740 | if (unlikely(error)) { | 1846 | if (unlikely(error)) { |
@@ -1781,12 +1887,13 @@ next_rgrp: | |||
1781 | /* Find the next rgrp, and continue looking */ | 1887 | /* Find the next rgrp, and continue looking */ |
1782 | if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) | 1888 | if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) |
1783 | continue; | 1889 | continue; |
1890 | if (skip) | ||
1891 | continue; | ||
1784 | 1892 | ||
1785 | /* If we've scanned all the rgrps, but found no free blocks | 1893 | /* If we've scanned all the rgrps, but found no free blocks |
1786 | * then this checks for some less likely conditions before | 1894 | * then this checks for some less likely conditions before |
1787 | * trying again. | 1895 | * trying again. |
1788 | */ | 1896 | */ |
1789 | flags &= ~LM_FLAG_TRY; | ||
1790 | loops++; | 1897 | loops++; |
1791 | /* Check that fs hasn't grown if writing to rindex */ | 1898 | /* Check that fs hasn't grown if writing to rindex */ |
1792 | if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { | 1899 | if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { |