summaryrefslogtreecommitdiffstats
path: root/block/blk-throttle.c
diff options
context:
space:
mode:
authorShaohua Li <shli@fb.com>2017-03-27 13:51:34 -0400
committerJens Axboe <axboe@fb.com>2017-03-28 10:02:20 -0400
commitc79892c5576163b3c7403b9d75cbe8dcae65e428 (patch)
tree09dd9d4d38045a8daf7d0a06aa25bce03611973d /block/blk-throttle.c
parentb22c417c885ea973149ecf56286aabec060153e2 (diff)
blk-throttle: add upgrade logic for LIMIT_LOW state
When queue is in LIMIT_LOW state and all cgroups with low limit cross the bps/iops limitation, we will upgrade queue's state to LIMIT_MAX. To determine if a cgroup exceeds its limitation, we check if the cgroup has pending request. Since cgroup is throttled according to the limit, pending request means the cgroup reaches the limit. If a cgroup has limit set for both read and write, we consider the combination of them for upgrade. The reason is read IO and write IO can interfere with each other. If we do the upgrade based in one direction IO, the other direction IO could be severly harmed. For a cgroup hierarchy, there are two cases. Children has lower low limit than parent. Parent's low limit is meaningless. If children's bps/iops cross low limit, we can upgrade queue state. The other case is children has higher low limit than parent. Children's low limit is meaningless. As long as parent's bps/iops (which is a sum of childrens bps/iops) cross low limit, we can upgrade queue state. Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r--block/blk-throttle.c100
1 files changed, 96 insertions, 4 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 1fade5078fc1..dd382d849c39 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -457,6 +457,7 @@ static void blk_throtl_update_limit_valid(struct throtl_data *td)
457 td->limit_valid[LIMIT_LOW] = low_valid; 457 td->limit_valid[LIMIT_LOW] = low_valid;
458} 458}
459 459
460static void throtl_upgrade_state(struct throtl_data *td);
460static void throtl_pd_offline(struct blkg_policy_data *pd) 461static void throtl_pd_offline(struct blkg_policy_data *pd)
461{ 462{
462 struct throtl_grp *tg = pd_to_tg(pd); 463 struct throtl_grp *tg = pd_to_tg(pd);
@@ -468,9 +469,8 @@ static void throtl_pd_offline(struct blkg_policy_data *pd)
468 469
469 blk_throtl_update_limit_valid(tg->td); 470 blk_throtl_update_limit_valid(tg->td);
470 471
471 if (tg->td->limit_index == LIMIT_LOW && 472 if (!tg->td->limit_valid[tg->td->limit_index])
472 !tg->td->limit_valid[LIMIT_LOW]) 473 throtl_upgrade_state(tg->td);
473 tg->td->limit_index = LIMIT_MAX;
474} 474}
475 475
476static void throtl_pd_free(struct blkg_policy_data *pd) 476static void throtl_pd_free(struct blkg_policy_data *pd)
@@ -1081,6 +1081,8 @@ static int throtl_select_dispatch(struct throtl_service_queue *parent_sq)
1081 return nr_disp; 1081 return nr_disp;
1082} 1082}
1083 1083
1084static bool throtl_can_upgrade(struct throtl_data *td,
1085 struct throtl_grp *this_tg);
1084/** 1086/**
1085 * throtl_pending_timer_fn - timer function for service_queue->pending_timer 1087 * throtl_pending_timer_fn - timer function for service_queue->pending_timer
1086 * @arg: the throtl_service_queue being serviced 1088 * @arg: the throtl_service_queue being serviced
@@ -1107,6 +1109,9 @@ static void throtl_pending_timer_fn(unsigned long arg)
1107 int ret; 1109 int ret;
1108 1110
1109 spin_lock_irq(q->queue_lock); 1111 spin_lock_irq(q->queue_lock);
1112 if (throtl_can_upgrade(td, NULL))
1113 throtl_upgrade_state(td);
1114
1110again: 1115again:
1111 parent_sq = sq->parent_sq; 1116 parent_sq = sq->parent_sq;
1112 dispatched = false; 1117 dispatched = false;
@@ -1522,6 +1527,87 @@ static struct blkcg_policy blkcg_policy_throtl = {
1522 .pd_free_fn = throtl_pd_free, 1527 .pd_free_fn = throtl_pd_free,
1523}; 1528};
1524 1529
1530static bool throtl_tg_can_upgrade(struct throtl_grp *tg)
1531{
1532 struct throtl_service_queue *sq = &tg->service_queue;
1533 bool read_limit, write_limit;
1534
1535 /*
1536 * if cgroup reaches low limit (if low limit is 0, the cgroup always
1537 * reaches), it's ok to upgrade to next limit
1538 */
1539 read_limit = tg->bps[READ][LIMIT_LOW] || tg->iops[READ][LIMIT_LOW];
1540 write_limit = tg->bps[WRITE][LIMIT_LOW] || tg->iops[WRITE][LIMIT_LOW];
1541 if (!read_limit && !write_limit)
1542 return true;
1543 if (read_limit && sq->nr_queued[READ] &&
1544 (!write_limit || sq->nr_queued[WRITE]))
1545 return true;
1546 if (write_limit && sq->nr_queued[WRITE] &&
1547 (!read_limit || sq->nr_queued[READ]))
1548 return true;
1549 return false;
1550}
1551
1552static bool throtl_hierarchy_can_upgrade(struct throtl_grp *tg)
1553{
1554 while (true) {
1555 if (throtl_tg_can_upgrade(tg))
1556 return true;
1557 tg = sq_to_tg(tg->service_queue.parent_sq);
1558 if (!tg || !tg_to_blkg(tg)->parent)
1559 return false;
1560 }
1561 return false;
1562}
1563
1564static bool throtl_can_upgrade(struct throtl_data *td,
1565 struct throtl_grp *this_tg)
1566{
1567 struct cgroup_subsys_state *pos_css;
1568 struct blkcg_gq *blkg;
1569
1570 if (td->limit_index != LIMIT_LOW)
1571 return false;
1572
1573 rcu_read_lock();
1574 blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
1575 struct throtl_grp *tg = blkg_to_tg(blkg);
1576
1577 if (tg == this_tg)
1578 continue;
1579 if (!list_empty(&tg_to_blkg(tg)->blkcg->css.children))
1580 continue;
1581 if (!throtl_hierarchy_can_upgrade(tg)) {
1582 rcu_read_unlock();
1583 return false;
1584 }
1585 }
1586 rcu_read_unlock();
1587 return true;
1588}
1589
1590static void throtl_upgrade_state(struct throtl_data *td)
1591{
1592 struct cgroup_subsys_state *pos_css;
1593 struct blkcg_gq *blkg;
1594
1595 td->limit_index = LIMIT_MAX;
1596 rcu_read_lock();
1597 blkg_for_each_descendant_post(blkg, pos_css, td->queue->root_blkg) {
1598 struct throtl_grp *tg = blkg_to_tg(blkg);
1599 struct throtl_service_queue *sq = &tg->service_queue;
1600
1601 tg->disptime = jiffies - 1;
1602 throtl_select_dispatch(sq);
1603 throtl_schedule_next_dispatch(sq, false);
1604 }
1605 rcu_read_unlock();
1606 throtl_select_dispatch(&td->service_queue);
1607 throtl_schedule_next_dispatch(&td->service_queue, false);
1608 queue_work(kthrotld_workqueue, &td->dispatch_work);
1609}
1610
1525bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, 1611bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
1526 struct bio *bio) 1612 struct bio *bio)
1527{ 1613{
@@ -1544,14 +1630,20 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
1544 1630
1545 sq = &tg->service_queue; 1631 sq = &tg->service_queue;
1546 1632
1633again:
1547 while (true) { 1634 while (true) {
1548 /* throtl is FIFO - if bios are already queued, should queue */ 1635 /* throtl is FIFO - if bios are already queued, should queue */
1549 if (sq->nr_queued[rw]) 1636 if (sq->nr_queued[rw])
1550 break; 1637 break;
1551 1638
1552 /* if above limits, break to queue */ 1639 /* if above limits, break to queue */
1553 if (!tg_may_dispatch(tg, bio, NULL)) 1640 if (!tg_may_dispatch(tg, bio, NULL)) {
1641 if (throtl_can_upgrade(tg->td, tg)) {
1642 throtl_upgrade_state(tg->td);
1643 goto again;
1644 }
1554 break; 1645 break;
1646 }
1555 1647
1556 /* within limits, let's charge and dispatch directly */ 1648 /* within limits, let's charge and dispatch directly */
1557 throtl_charge_bio(tg, bio); 1649 throtl_charge_bio(tg, bio);