summaryrefslogtreecommitdiffstats
path: root/block/bfq-iosched.c
diff options
context:
space:
mode:
authorPaolo Valente <paolo.valente@linaro.org>2019-06-25 01:12:43 -0400
committerJens Axboe <axboe@kernel.dk>2019-06-25 11:07:34 -0400
commit766d61412ef840295f55e98e2c5fb0fc110c6ca4 (patch)
treeea7dbe88b5b57fc57616afb65b4786f24209f968 /block/bfq-iosched.c
parent6b2c8e522c8980fedfd24f3d1e69c3bccdb9414d (diff)
block, bfq: reset inject limit when think-time state changes
Until the base value of the request service times gets finally computed for a bfq_queue, the inject limit does depend on the think-time state (short|long). The limit must be 0 or 1 if the think time is deemed, respectively, as short or long. However, such a check and possible limit update is performed only periodically, once per second. So, to make the injection mechanism much more reactive, this commit performs the update also every time the think-time state changes. In addition, in the following special case, this commit lets the inject limit of a bfq_queue bfqq remain equal to 1 even if bfqq's think time is short: bfqq's I/O is synchronized with that of some other queue, i.e., bfqq may receive new I/O only after the I/O of the other queue is completed. Keeping the inject limit to 1 allows the blocking I/O to be served while bfqq is in service. And this is very convenient both for bfqq and for the total throughput, as explained in detail in the comments in bfq_update_has_short_ttime(). Reported-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu> Tested-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/bfq-iosched.c')
-rw-r--r--block/bfq-iosched.c219
1 files changed, 151 insertions, 68 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 44c6bbcd7720..9bc10198ddff 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -1735,6 +1735,72 @@ static void bfq_bfqq_handle_idle_busy_switch(struct bfq_data *bfqd,
1735 false, BFQQE_PREEMPTED); 1735 false, BFQQE_PREEMPTED);
1736} 1736}
1737 1737
1738static void bfq_reset_inject_limit(struct bfq_data *bfqd,
1739 struct bfq_queue *bfqq)
1740{
1741 /* invalidate baseline total service time */
1742 bfqq->last_serv_time_ns = 0;
1743
1744 /*
1745 * Reset pointer in case we are waiting for
1746 * some request completion.
1747 */
1748 bfqd->waited_rq = NULL;
1749
1750 /*
1751 * If bfqq has a short think time, then start by setting the
1752 * inject limit to 0 prudentially, because the service time of
1753 * an injected I/O request may be higher than the think time
1754 * of bfqq, and therefore, if one request was injected when
1755 * bfqq remains empty, this injected request might delay the
1756 * service of the next I/O request for bfqq significantly. In
1757 * case bfqq can actually tolerate some injection, then the
1758 * adaptive update will however raise the limit soon. This
1759 * lucky circumstance holds exactly because bfqq has a short
1760 * think time, and thus, after remaining empty, is likely to
1761 * get new I/O enqueued---and then completed---before being
1762 * expired. This is the very pattern that gives the
1763 * limit-update algorithm the chance to measure the effect of
1764 * injection on request service times, and then to update the
1765 * limit accordingly.
1766 *
1767 * However, in the following special case, the inject limit is
1768 * left to 1 even if the think time is short: bfqq's I/O is
1769 * synchronized with that of some other queue, i.e., bfqq may
1770 * receive new I/O only after the I/O of the other queue is
1771 * completed. Keeping the inject limit to 1 allows the
1772 * blocking I/O to be served while bfqq is in service. And
1773 * this is very convenient both for bfqq and for overall
1774 * throughput, as explained in detail in the comments in
1775 * bfq_update_has_short_ttime().
1776 *
1777 * On the opposite end, if bfqq has a long think time, then
1778 * start directly by 1, because:
1779 * a) on the bright side, keeping at most one request in
1780 * service in the drive is unlikely to cause any harm to the
1781 * latency of bfqq's requests, as the service time of a single
1782 * request is likely to be lower than the think time of bfqq;
1783 * b) on the downside, after becoming empty, bfqq is likely to
1784 * expire before getting its next request. With this request
1785 * arrival pattern, it is very hard to sample total service
1786 * times and update the inject limit accordingly (see comments
1787 * on bfq_update_inject_limit()). So the limit is likely to be
1788 * never, or at least seldom, updated. As a consequence, by
1789 * setting the limit to 1, we avoid that no injection ever
1790 * occurs with bfqq. On the downside, this proactive step
1791 * further reduces chances to actually compute the baseline
1792 * total service time. Thus it reduces chances to execute the
1793 * limit-update algorithm and possibly raise the limit to more
1794 * than 1.
1795 */
1796 if (bfq_bfqq_has_short_ttime(bfqq))
1797 bfqq->inject_limit = 0;
1798 else
1799 bfqq->inject_limit = 1;
1800
1801 bfqq->decrease_time_jif = jiffies;
1802}
1803
1738static void bfq_add_request(struct request *rq) 1804static void bfq_add_request(struct request *rq)
1739{ 1805{
1740 struct bfq_queue *bfqq = RQ_BFQQ(rq); 1806 struct bfq_queue *bfqq = RQ_BFQQ(rq);
@@ -1755,71 +1821,8 @@ static void bfq_add_request(struct request *rq)
1755 * bfq_update_inject_limit(). 1821 * bfq_update_inject_limit().
1756 */ 1822 */
1757 if (time_is_before_eq_jiffies(bfqq->decrease_time_jif + 1823 if (time_is_before_eq_jiffies(bfqq->decrease_time_jif +
1758 msecs_to_jiffies(1000))) { 1824 msecs_to_jiffies(1000)))
1759 /* invalidate baseline total service time */ 1825 bfq_reset_inject_limit(bfqd, bfqq);
1760 bfqq->last_serv_time_ns = 0;
1761
1762 /*
1763 * Reset pointer in case we are waiting for
1764 * some request completion.
1765 */
1766 bfqd->waited_rq = NULL;
1767
1768 /*
1769 * If bfqq has a short think time, then start
1770 * by setting the inject limit to 0
1771 * prudentially, because the service time of
1772 * an injected I/O request may be higher than
1773 * the think time of bfqq, and therefore, if
1774 * one request was injected when bfqq remains
1775 * empty, this injected request might delay
1776 * the service of the next I/O request for
1777 * bfqq significantly. In case bfqq can
1778 * actually tolerate some injection, then the
1779 * adaptive update will however raise the
1780 * limit soon. This lucky circumstance holds
1781 * exactly because bfqq has a short think
1782 * time, and thus, after remaining empty, is
1783 * likely to get new I/O enqueued---and then
1784 * completed---before being expired. This is
1785 * the very pattern that gives the
1786 * limit-update algorithm the chance to
1787 * measure the effect of injection on request
1788 * service times, and then to update the limit
1789 * accordingly.
1790 *
1791 * On the opposite end, if bfqq has a long
1792 * think time, then start directly by 1,
1793 * because:
1794 * a) on the bright side, keeping at most one
1795 * request in service in the drive is unlikely
1796 * to cause any harm to the latency of bfqq's
1797 * requests, as the service time of a single
1798 * request is likely to be lower than the
1799 * think time of bfqq;
1800 * b) on the downside, after becoming empty,
1801 * bfqq is likely to expire before getting its
1802 * next request. With this request arrival
1803 * pattern, it is very hard to sample total
1804 * service times and update the inject limit
1805 * accordingly (see comments on
1806 * bfq_update_inject_limit()). So the limit is
1807 * likely to be never, or at least seldom,
1808 * updated. As a consequence, by setting the
1809 * limit to 1, we avoid that no injection ever
1810 * occurs with bfqq. On the downside, this
1811 * proactive step further reduces chances to
1812 * actually compute the baseline total service
1813 * time. Thus it reduces chances to execute the
1814 * limit-update algorithm and possibly raise the
1815 * limit to more than 1.
1816 */
1817 if (bfq_bfqq_has_short_ttime(bfqq))
1818 bfqq->inject_limit = 0;
1819 else
1820 bfqq->inject_limit = 1;
1821 bfqq->decrease_time_jif = jiffies;
1822 }
1823 1826
1824 /* 1827 /*
1825 * The following conditions must hold to setup a new 1828 * The following conditions must hold to setup a new
@@ -4855,7 +4858,7 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
4855 struct bfq_queue *bfqq, 4858 struct bfq_queue *bfqq,
4856 struct bfq_io_cq *bic) 4859 struct bfq_io_cq *bic)
4857{ 4860{
4858 bool has_short_ttime = true; 4861 bool has_short_ttime = true, state_changed;
4859 4862
4860 /* 4863 /*
4861 * No need to update has_short_ttime if bfqq is async or in 4864 * No need to update has_short_ttime if bfqq is async or in
@@ -4880,13 +4883,93 @@ static void bfq_update_has_short_ttime(struct bfq_data *bfqd,
4880 bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle)) 4883 bfqq->ttime.ttime_mean > bfqd->bfq_slice_idle))
4881 has_short_ttime = false; 4884 has_short_ttime = false;
4882 4885
4883 bfq_log_bfqq(bfqd, bfqq, "update_has_short_ttime: has_short_ttime %d", 4886 state_changed = has_short_ttime != bfq_bfqq_has_short_ttime(bfqq);
4884 has_short_ttime);
4885 4887
4886 if (has_short_ttime) 4888 if (has_short_ttime)
4887 bfq_mark_bfqq_has_short_ttime(bfqq); 4889 bfq_mark_bfqq_has_short_ttime(bfqq);
4888 else 4890 else
4889 bfq_clear_bfqq_has_short_ttime(bfqq); 4891 bfq_clear_bfqq_has_short_ttime(bfqq);
4892
4893 /*
4894 * Until the base value for the total service time gets
4895 * finally computed for bfqq, the inject limit does depend on
4896 * the think-time state (short|long). In particular, the limit
4897 * is 0 or 1 if the think time is deemed, respectively, as
4898 * short or long (details in the comments in
4899 * bfq_update_inject_limit()). Accordingly, the next
4900 * instructions reset the inject limit if the think-time state
4901 * has changed and the above base value is still to be
4902 * computed.
4903 *
4904 * However, the reset is performed only if more than 100 ms
4905 * have elapsed since the last update of the inject limit, or
4906 * (inclusive) if the change is from short to long think
4907 * time. The reason for this waiting is as follows.
4908 *
4909 * bfqq may have a long think time because of a
4910 * synchronization with some other queue, i.e., because the
4911 * I/O of some other queue may need to be completed for bfqq
4912 * to receive new I/O. This happens, e.g., if bfqq is
4913 * associated with a process that does some sync. A sync
4914 * generates extra blocking I/O, which must be completed
4915 * before the process associated with bfqq can go on with its
4916 * I/O.
4917 *
4918 * If such a synchronization is actually in place, then,
4919 * without injection on bfqq, the blocking I/O cannot happen
4920 * to served while bfqq is in service. As a consequence, if
4921 * bfqq is granted I/O-dispatch-plugging, then bfqq remains
4922 * empty, and no I/O is dispatched, until the idle timeout
4923 * fires. This is likely to result in lower bandwidth and
4924 * higher latencies for bfqq, and in a severe loss of total
4925 * throughput.
4926 *
4927 * On the opposite end, a non-zero inject limit may allow the
4928 * I/O that blocks bfqq to be executed soon, and therefore
4929 * bfqq to receive new I/O soon. But, if this actually
4930 * happens, then the next think-time sample for bfqq may be
4931 * very low. This in turn may cause bfqq's think time to be
4932 * deemed short. Without the 100 ms barrier, this new state
4933 * change would cause the body of the next if to be executed
4934 * immediately. But this would set to 0 the inject
4935 * limit. Without injection, the blocking I/O would cause the
4936 * think time of bfqq to become long again, and therefore the
4937 * inject limit to be raised again, and so on. The only effect
4938 * of such a steady oscillation between the two think-time
4939 * states would be to prevent effective injection on bfqq.
4940 *
4941 * In contrast, if the inject limit is not reset during such a
4942 * long time interval as 100 ms, then the number of short
4943 * think time samples can grow significantly before the reset
4944 * is allowed. As a consequence, the think time state can
4945 * become stable before the reset. There will be no state
4946 * change when the 100 ms elapse, and therefore no reset of
4947 * the inject limit. The inject limit remains steadily equal
4948 * to 1 both during and after the 100 ms. So injection can be
4949 * performed at all times, and throughput gets boosted.
4950 *
4951 * An inject limit equal to 1 is however in conflict, in
4952 * general, with the fact that the think time of bfqq is
4953 * short, because injection may be likely to delay bfqq's I/O
4954 * (as explained in the comments in
4955 * bfq_update_inject_limit()). But this does not happen in
4956 * this special case, because bfqq's low think time is due to
4957 * an effective handling of a synchronization, through
4958 * injection. In this special case, bfqq's I/O does not get
4959 * delayed by injection; on the contrary, bfqq's I/O is
4960 * brought forward, because it is not blocked for
4961 * milliseconds.
4962 *
4963 * In addition, during the 100 ms, the base value for the
4964 * total service time is likely to get finally computed,
4965 * freeing the inject limit from its relation with the think
4966 * time.
4967 */
4968 if (state_changed && bfqq->last_serv_time_ns == 0 &&
4969 (time_is_before_eq_jiffies(bfqq->decrease_time_jif +
4970 msecs_to_jiffies(100)) ||
4971 !has_short_ttime))
4972 bfq_reset_inject_limit(bfqd, bfqq);
4890} 4973}
4891 4974
4892/* 4975/*