summaryrefslogtreecommitdiffstats
path: root/block/blk-throttle.c
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2017-05-17 16:07:27 -0400
committerJens Axboe <axboe@fb.com>2017-05-22 16:47:12 -0400
commitb4f428ef2844e9fa8154f2faaca249aa74e222a7 (patch)
treee9e7bc1bc5ca0ec95caac8e6d4bd909afe5f3b1c /block/blk-throttle.c
parent9bb67aeb96784527dbc784c7a1b234461299363c (diff)
blk-throttle: force user to configure all settings for io.low
Default value of io.low limit is 0. If user doesn't configure the limit, last patch makes cgroup be throttled to very tiny bps/iops, which could stall the system. A cgroup with default settings of io.low limit really means nothing, so we force user to configure all settings, otherwise io.low limit doesn't take effect. With this stragety, default setting of latency/idle isn't important, so just set them to very conservative and safe value. Signed-off-by: Shaohua Li <shli@fb.com> Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r--block/blk-throttle.c80
1 files changed, 37 insertions, 43 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index f6a9f42a0ad7..fc13dd0c6e39 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -22,13 +22,11 @@ static int throtl_quantum = 32;
22#define DFL_THROTL_SLICE_HD (HZ / 10) 22#define DFL_THROTL_SLICE_HD (HZ / 10)
23#define DFL_THROTL_SLICE_SSD (HZ / 50) 23#define DFL_THROTL_SLICE_SSD (HZ / 50)
24#define MAX_THROTL_SLICE (HZ) 24#define MAX_THROTL_SLICE (HZ)
25#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
26#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
27#define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */ 25#define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
28/* default latency target is 0, eg, guarantee IO latency by default */
29#define DFL_LATENCY_TARGET (0)
30#define MIN_THROTL_BPS (320 * 1024) 26#define MIN_THROTL_BPS (320 * 1024)
31#define MIN_THROTL_IOPS (10) 27#define MIN_THROTL_IOPS (10)
28#define DFL_LATENCY_TARGET (-1L)
29#define DFL_IDLE_THRESHOLD (0)
32 30
33#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT) 31#define SKIP_LATENCY (((u64)1) << BLK_STAT_RES_SHIFT)
34 32
@@ -205,8 +203,6 @@ struct throtl_data
205 unsigned int limit_index; 203 unsigned int limit_index;
206 bool limit_valid[LIMIT_CNT]; 204 bool limit_valid[LIMIT_CNT];
207 205
208 unsigned long dft_idletime_threshold; /* us */
209
210 unsigned long low_upgrade_time; 206 unsigned long low_upgrade_time;
211 unsigned long low_downgrade_time; 207 unsigned long low_downgrade_time;
212 208
@@ -500,6 +496,8 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
500 496
501 tg->latency_target = DFL_LATENCY_TARGET; 497 tg->latency_target = DFL_LATENCY_TARGET;
502 tg->latency_target_conf = DFL_LATENCY_TARGET; 498 tg->latency_target_conf = DFL_LATENCY_TARGET;
499 tg->idletime_threshold = DFL_IDLE_THRESHOLD;
500 tg->idletime_threshold_conf = DFL_IDLE_THRESHOLD;
503 501
504 return &tg->pd; 502 return &tg->pd;
505} 503}
@@ -528,9 +526,6 @@ static void throtl_pd_init(struct blkg_policy_data *pd)
528 if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent) 526 if (cgroup_subsys_on_dfl(io_cgrp_subsys) && blkg->parent)
529 sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue; 527 sq->parent_sq = &blkg_to_tg(blkg->parent)->service_queue;
530 tg->td = td; 528 tg->td = td;
531
532 tg->idletime_threshold = td->dft_idletime_threshold;
533 tg->idletime_threshold_conf = td->dft_idletime_threshold;
534} 529}
535 530
536/* 531/*
@@ -1534,7 +1529,7 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
1534 tg->iops_conf[READ][off] == iops_dft && 1529 tg->iops_conf[READ][off] == iops_dft &&
1535 tg->iops_conf[WRITE][off] == iops_dft && 1530 tg->iops_conf[WRITE][off] == iops_dft &&
1536 (off != LIMIT_LOW || 1531 (off != LIMIT_LOW ||
1537 (tg->idletime_threshold_conf == tg->td->dft_idletime_threshold && 1532 (tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD &&
1538 tg->latency_target_conf == DFL_LATENCY_TARGET))) 1533 tg->latency_target_conf == DFL_LATENCY_TARGET)))
1539 return 0; 1534 return 0;
1540 1535
@@ -1660,16 +1655,31 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
1660 tg->iops_conf[READ][LIMIT_MAX]); 1655 tg->iops_conf[READ][LIMIT_MAX]);
1661 tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW], 1656 tg->iops[WRITE][LIMIT_LOW] = min(tg->iops_conf[WRITE][LIMIT_LOW],
1662 tg->iops_conf[WRITE][LIMIT_MAX]); 1657 tg->iops_conf[WRITE][LIMIT_MAX]);
1663 1658 tg->idletime_threshold_conf = idle_time;
1664 if (index == LIMIT_LOW) { 1659 tg->latency_target_conf = latency_time;
1665 blk_throtl_update_limit_valid(tg->td); 1660
1666 if (tg->td->limit_valid[LIMIT_LOW]) 1661 /* force user to configure all settings for low limit */
1667 tg->td->limit_index = LIMIT_LOW; 1662 if (!(tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW] ||
1668 tg->idletime_threshold_conf = idle_time; 1663 tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW]) ||
1664 tg->idletime_threshold_conf == DFL_IDLE_THRESHOLD ||
1665 tg->latency_target_conf == DFL_LATENCY_TARGET) {
1666 tg->bps[READ][LIMIT_LOW] = 0;
1667 tg->bps[WRITE][LIMIT_LOW] = 0;
1668 tg->iops[READ][LIMIT_LOW] = 0;
1669 tg->iops[WRITE][LIMIT_LOW] = 0;
1670 tg->idletime_threshold = DFL_IDLE_THRESHOLD;
1671 tg->latency_target = DFL_LATENCY_TARGET;
1672 } else if (index == LIMIT_LOW) {
1669 tg->idletime_threshold = tg->idletime_threshold_conf; 1673 tg->idletime_threshold = tg->idletime_threshold_conf;
1670 tg->latency_target_conf = latency_time;
1671 tg->latency_target = tg->latency_target_conf; 1674 tg->latency_target = tg->latency_target_conf;
1672 } 1675 }
1676
1677 blk_throtl_update_limit_valid(tg->td);
1678 if (tg->td->limit_valid[LIMIT_LOW]) {
1679 if (index == LIMIT_LOW)
1680 tg->td->limit_index = LIMIT_LOW;
1681 } else
1682 tg->td->limit_index = LIMIT_MAX;
1673 tg_conf_updated(tg, index == LIMIT_LOW && 1683 tg_conf_updated(tg, index == LIMIT_LOW &&
1674 tg->td->limit_valid[LIMIT_LOW]); 1684 tg->td->limit_valid[LIMIT_LOW]);
1675 ret = 0; 1685 ret = 0;
@@ -1760,17 +1770,19 @@ static bool throtl_tg_is_idle(struct throtl_grp *tg)
1760 /* 1770 /*
1761 * cgroup is idle if: 1771 * cgroup is idle if:
1762 * - single idle is too long, longer than a fixed value (in case user 1772 * - single idle is too long, longer than a fixed value (in case user
1763 * configure a too big threshold) or 4 times of slice 1773 * configure a too big threshold) or 4 times of idletime threshold
1764 * - average think time is more than threshold 1774 * - average think time is more than threshold
1765 * - IO latency is largely below threshold 1775 * - IO latency is largely below threshold
1766 */ 1776 */
1767 unsigned long time = jiffies_to_usecs(4 * tg->td->throtl_slice); 1777 unsigned long time;
1768 bool ret; 1778 bool ret;
1769 1779
1770 time = min_t(unsigned long, MAX_IDLE_TIME, time); 1780 time = min_t(unsigned long, MAX_IDLE_TIME, 4 * tg->idletime_threshold);
1771 ret = (ktime_get_ns() >> 10) - tg->last_finish_time > time || 1781 ret = tg->latency_target == DFL_LATENCY_TARGET ||
1772 tg->avg_idletime > tg->idletime_threshold || 1782 tg->idletime_threshold == DFL_IDLE_THRESHOLD ||
1773 (tg->latency_target && tg->bio_cnt && 1783 (ktime_get_ns() >> 10) - tg->last_finish_time > time ||
1784 tg->avg_idletime > tg->idletime_threshold ||
1785 (tg->latency_target && tg->bio_cnt &&
1774 tg->bad_bio_cnt * 5 < tg->bio_cnt); 1786 tg->bad_bio_cnt * 5 < tg->bio_cnt);
1775 throtl_log(&tg->service_queue, 1787 throtl_log(&tg->service_queue,
1776 "avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d", 1788 "avg_idle=%ld, idle_threshold=%ld, bad_bio=%d, total_bio=%d, is_idle=%d, scale=%d",
@@ -2405,19 +2417,14 @@ void blk_throtl_exit(struct request_queue *q)
2405void blk_throtl_register_queue(struct request_queue *q) 2417void blk_throtl_register_queue(struct request_queue *q)
2406{ 2418{
2407 struct throtl_data *td; 2419 struct throtl_data *td;
2408 struct cgroup_subsys_state *pos_css;
2409 struct blkcg_gq *blkg;
2410 2420
2411 td = q->td; 2421 td = q->td;
2412 BUG_ON(!td); 2422 BUG_ON(!td);
2413 2423
2414 if (blk_queue_nonrot(q)) { 2424 if (blk_queue_nonrot(q))
2415 td->throtl_slice = DFL_THROTL_SLICE_SSD; 2425 td->throtl_slice = DFL_THROTL_SLICE_SSD;
2416 td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_SSD; 2426 else
2417 } else {
2418 td->throtl_slice = DFL_THROTL_SLICE_HD; 2427 td->throtl_slice = DFL_THROTL_SLICE_HD;
2419 td->dft_idletime_threshold = DFL_IDLE_THRESHOLD_HD;
2420 }
2421#ifndef CONFIG_BLK_DEV_THROTTLING_LOW 2428#ifndef CONFIG_BLK_DEV_THROTTLING_LOW
2422 /* if no low limit, use previous default */ 2429 /* if no low limit, use previous default */
2423 td->throtl_slice = DFL_THROTL_SLICE_HD; 2430 td->throtl_slice = DFL_THROTL_SLICE_HD;
@@ -2426,19 +2433,6 @@ void blk_throtl_register_queue(struct request_queue *q)
2426 td->track_bio_latency = !q->mq_ops && !q->request_fn; 2433 td->track_bio_latency = !q->mq_ops && !q->request_fn;
2427 if (!td->track_bio_latency) 2434 if (!td->track_bio_latency)
2428 blk_stat_enable_accounting(q); 2435 blk_stat_enable_accounting(q);
2429
2430 /*
2431 * some tg are created before queue is fully initialized, eg, nonrot
2432 * isn't initialized yet
2433 */
2434 rcu_read_lock();
2435 blkg_for_each_descendant_post(blkg, pos_css, q->root_blkg) {
2436 struct throtl_grp *tg = blkg_to_tg(blkg);
2437
2438 tg->idletime_threshold = td->dft_idletime_threshold;
2439 tg->idletime_threshold_conf = td->dft_idletime_threshold;
2440 }
2441 rcu_read_unlock();
2442} 2436}
2443 2437
2444#ifdef CONFIG_BLK_DEV_THROTTLING_LOW 2438#ifdef CONFIG_BLK_DEV_THROTTLING_LOW