summaryrefslogtreecommitdiffstats
path: root/block/blk-throttle.c
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2017-03-27 13:51:44 -0400
committerJens Axboe <axboe@fb.com>2017-03-28 10:02:20 -0400
commitec80991d6fc2cb17abfc5427ac1512d2ccd40589 (patch)
treece3709e88bfdee940afa7729e2358505f8a6dcae /block/blk-throttle.c
parentfa6fb5aab85f3e2d9cf017ec516a8c234402a7cd (diff)
blk-throttle: add interface for per-cgroup target latency
Here we introduce per-cgroup latency target. The target determines how a cgroup can afford latency increasement. We will use the target latency to calculate a threshold and use it to schedule IO for cgroups. If a cgroup's bandwidth is below its low limit but its average latency is below the threshold, other cgroups can safely dispatch more IO even their bandwidth is higher than their low limits. On the other hand, if the first cgroup's latency is higher than the threshold, other cgroups are throttled to their low limits. So the target latency determines how we efficiently utilize free disk resource without sacifice of worload's IO latency. For example, assume 4k IO average latency is 50us when disk isn't congested. A cgroup sets the target latency to 30us. Then the cgroup can accept 50+30=80us IO latency. If the cgroupt's average IO latency is 90us and its bandwidth is below low limit, other cgroups are throttled to their low limit. If the cgroup's average IO latency is 60us, other cgroups are allowed to dispatch more IO. When other cgroups dispatch more IO, the first cgroup's IO latency will increase. If it increases to 81us, we then throttle other cgroups. User will configure the interface in this way: echo "8:16 rbps=2097152 wbps=max latency=100 idle=200" > io.low latency is in microsecond unit By default, latency target is 0, which means to guarantee IO latency. Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r--block/blk-throttle.c28
1 files changed, 24 insertions, 4 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 0ea8698b2b1f..6e1c29860eec 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -25,6 +25,8 @@ static int throtl_quantum = 32;
25#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */ 25#define DFL_IDLE_THRESHOLD_SSD (1000L) /* 1 ms */
26#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */ 26#define DFL_IDLE_THRESHOLD_HD (100L * 1000) /* 100 ms */
27#define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */ 27#define MAX_IDLE_TIME (5L * 1000 * 1000) /* 5 s */
28/* default latency target is 0, eg, guarantee IO latency by default */
29#define DFL_LATENCY_TARGET (0)
28 30
29static struct blkcg_policy blkcg_policy_throtl; 31static struct blkcg_policy blkcg_policy_throtl;
30 32
@@ -152,6 +154,7 @@ struct throtl_grp {
152 154
153 unsigned long last_check_time; 155 unsigned long last_check_time;
154 156
157 unsigned long latency_target; /* us */
155 /* When did we start a new slice */ 158 /* When did we start a new slice */
156 unsigned long slice_start[2]; 159 unsigned long slice_start[2];
157 unsigned long slice_end[2]; 160 unsigned long slice_end[2];
@@ -449,6 +452,8 @@ static struct blkg_policy_data *throtl_pd_alloc(gfp_t gfp, int node)
449 tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX; 452 tg->iops_conf[WRITE][LIMIT_MAX] = UINT_MAX;
450 /* LIMIT_LOW will have default value 0 */ 453 /* LIMIT_LOW will have default value 0 */
451 454
455 tg->latency_target = DFL_LATENCY_TARGET;
456
452 return &tg->pd; 457 return &tg->pd;
453} 458}
454 459
@@ -1445,6 +1450,7 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
1445 u64 bps_dft; 1450 u64 bps_dft;
1446 unsigned int iops_dft; 1451 unsigned int iops_dft;
1447 char idle_time[26] = ""; 1452 char idle_time[26] = "";
1453 char latency_time[26] = "";
1448 1454
1449 if (!dname) 1455 if (!dname)
1450 return 0; 1456 return 0;
@@ -1461,8 +1467,9 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
1461 tg->bps_conf[WRITE][off] == bps_dft && 1467 tg->bps_conf[WRITE][off] == bps_dft &&
1462 tg->iops_conf[READ][off] == iops_dft && 1468 tg->iops_conf[READ][off] == iops_dft &&
1463 tg->iops_conf[WRITE][off] == iops_dft && 1469 tg->iops_conf[WRITE][off] == iops_dft &&
1464 (off != LIMIT_LOW || tg->idletime_threshold == 1470 (off != LIMIT_LOW ||
1465 tg->td->dft_idletime_threshold)) 1471 (tg->idletime_threshold == tg->td->dft_idletime_threshold &&
1472 tg->latency_target == DFL_LATENCY_TARGET)))
1466 return 0; 1473 return 0;
1467 1474
1468 if (tg->bps_conf[READ][off] != bps_dft) 1475 if (tg->bps_conf[READ][off] != bps_dft)
@@ -1483,10 +1490,17 @@ static u64 tg_prfill_limit(struct seq_file *sf, struct blkg_policy_data *pd,
1483 else 1490 else
1484 snprintf(idle_time, sizeof(idle_time), " idle=%lu", 1491 snprintf(idle_time, sizeof(idle_time), " idle=%lu",
1485 tg->idletime_threshold); 1492 tg->idletime_threshold);
1493
1494 if (tg->latency_target == ULONG_MAX)
1495 strcpy(latency_time, " latency=max");
1496 else
1497 snprintf(latency_time, sizeof(latency_time),
1498 " latency=%lu", tg->latency_target);
1486 } 1499 }
1487 1500
1488 seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s\n", 1501 seq_printf(sf, "%s rbps=%s wbps=%s riops=%s wiops=%s%s%s\n",
1489 dname, bufs[0], bufs[1], bufs[2], bufs[3], idle_time); 1502 dname, bufs[0], bufs[1], bufs[2], bufs[3], idle_time,
1503 latency_time);
1490 return 0; 1504 return 0;
1491} 1505}
1492 1506
@@ -1505,6 +1519,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
1505 struct throtl_grp *tg; 1519 struct throtl_grp *tg;
1506 u64 v[4]; 1520 u64 v[4];
1507 unsigned long idle_time; 1521 unsigned long idle_time;
1522 unsigned long latency_time;
1508 int ret; 1523 int ret;
1509 int index = of_cft(of)->private; 1524 int index = of_cft(of)->private;
1510 1525
@@ -1520,6 +1535,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
1520 v[3] = tg->iops_conf[WRITE][index]; 1535 v[3] = tg->iops_conf[WRITE][index];
1521 1536
1522 idle_time = tg->idletime_threshold; 1537 idle_time = tg->idletime_threshold;
1538 latency_time = tg->latency_target;
1523 while (true) { 1539 while (true) {
1524 char tok[27]; /* wiops=18446744073709551616 */ 1540 char tok[27]; /* wiops=18446744073709551616 */
1525 char *p; 1541 char *p;
@@ -1553,6 +1569,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
1553 v[3] = min_t(u64, val, UINT_MAX); 1569 v[3] = min_t(u64, val, UINT_MAX);
1554 else if (off == LIMIT_LOW && !strcmp(tok, "idle")) 1570 else if (off == LIMIT_LOW && !strcmp(tok, "idle"))
1555 idle_time = val; 1571 idle_time = val;
1572 else if (off == LIMIT_LOW && !strcmp(tok, "latency"))
1573 latency_time = val;
1556 else 1574 else
1557 goto out_finish; 1575 goto out_finish;
1558 } 1576 }
@@ -1583,6 +1601,8 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of,
1583 tg->td->limit_index = LIMIT_LOW; 1601 tg->td->limit_index = LIMIT_LOW;
1584 tg->idletime_threshold = (idle_time == ULONG_MAX) ? 1602 tg->idletime_threshold = (idle_time == ULONG_MAX) ?
1585 ULONG_MAX : idle_time; 1603 ULONG_MAX : idle_time;
1604 tg->latency_target = (latency_time == ULONG_MAX) ?
1605 ULONG_MAX : latency_time;
1586 } 1606 }
1587 tg_conf_updated(tg); 1607 tg_conf_updated(tg);
1588 ret = 0; 1608 ret = 0;