diff options
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 66 |
1 files changed, 61 insertions, 5 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 53b7bd4c7000..24ed26957367 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/ctype.h> | 29 | #include <linux/ctype.h> |
30 | #include <linux/blk-cgroup.h> | 30 | #include <linux/blk-cgroup.h> |
31 | #include <linux/tracehook.h> | 31 | #include <linux/tracehook.h> |
32 | #include <linux/psi.h> | ||
32 | #include "blk.h" | 33 | #include "blk.h" |
33 | 34 | ||
34 | #define MAX_KEY_LEN 100 | 35 | #define MAX_KEY_LEN 100 |
@@ -47,12 +48,14 @@ struct blkcg blkcg_root; | |||
47 | EXPORT_SYMBOL_GPL(blkcg_root); | 48 | EXPORT_SYMBOL_GPL(blkcg_root); |
48 | 49 | ||
49 | struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; | 50 | struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; |
51 | EXPORT_SYMBOL_GPL(blkcg_root_css); | ||
50 | 52 | ||
51 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 53 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
52 | 54 | ||
53 | static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ | 55 | static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ |
54 | 56 | ||
55 | static bool blkcg_debug_stats = false; | 57 | static bool blkcg_debug_stats = false; |
58 | static struct workqueue_struct *blkcg_punt_bio_wq; | ||
56 | 59 | ||
57 | static bool blkcg_policy_enabled(struct request_queue *q, | 60 | static bool blkcg_policy_enabled(struct request_queue *q, |
58 | const struct blkcg_policy *pol) | 61 | const struct blkcg_policy *pol) |
@@ -87,6 +90,8 @@ static void __blkg_release(struct rcu_head *rcu) | |||
87 | { | 90 | { |
88 | struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); | 91 | struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); |
89 | 92 | ||
93 | WARN_ON(!bio_list_empty(&blkg->async_bios)); | ||
94 | |||
90 | /* release the blkcg and parent blkg refs this blkg has been holding */ | 95 | /* release the blkcg and parent blkg refs this blkg has been holding */ |
91 | css_put(&blkg->blkcg->css); | 96 | css_put(&blkg->blkcg->css); |
92 | if (blkg->parent) | 97 | if (blkg->parent) |
@@ -112,6 +117,23 @@ static void blkg_release(struct percpu_ref *ref) | |||
112 | call_rcu(&blkg->rcu_head, __blkg_release); | 117 | call_rcu(&blkg->rcu_head, __blkg_release); |
113 | } | 118 | } |
114 | 119 | ||
120 | static void blkg_async_bio_workfn(struct work_struct *work) | ||
121 | { | ||
122 | struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, | ||
123 | async_bio_work); | ||
124 | struct bio_list bios = BIO_EMPTY_LIST; | ||
125 | struct bio *bio; | ||
126 | |||
127 | /* as long as there are pending bios, @blkg can't go away */ | ||
128 | spin_lock_bh(&blkg->async_bio_lock); | ||
129 | bio_list_merge(&bios, &blkg->async_bios); | ||
130 | bio_list_init(&blkg->async_bios); | ||
131 | spin_unlock_bh(&blkg->async_bio_lock); | ||
132 | |||
133 | while ((bio = bio_list_pop(&bios))) | ||
134 | submit_bio(bio); | ||
135 | } | ||
136 | |||
115 | /** | 137 | /** |
116 | * blkg_alloc - allocate a blkg | 138 | * blkg_alloc - allocate a blkg |
117 | * @blkcg: block cgroup the new blkg is associated with | 139 | * @blkcg: block cgroup the new blkg is associated with |
@@ -140,6 +162,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, | |||
140 | 162 | ||
141 | blkg->q = q; | 163 | blkg->q = q; |
142 | INIT_LIST_HEAD(&blkg->q_node); | 164 | INIT_LIST_HEAD(&blkg->q_node); |
165 | spin_lock_init(&blkg->async_bio_lock); | ||
166 | bio_list_init(&blkg->async_bios); | ||
167 | INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); | ||
143 | blkg->blkcg = blkcg; | 168 | blkg->blkcg = blkcg; |
144 | 169 | ||
145 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 170 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
@@ -1526,6 +1551,25 @@ out_unlock: | |||
1526 | } | 1551 | } |
1527 | EXPORT_SYMBOL_GPL(blkcg_policy_unregister); | 1552 | EXPORT_SYMBOL_GPL(blkcg_policy_unregister); |
1528 | 1553 | ||
1554 | bool __blkcg_punt_bio_submit(struct bio *bio) | ||
1555 | { | ||
1556 | struct blkcg_gq *blkg = bio->bi_blkg; | ||
1557 | |||
1558 | /* consume the flag first */ | ||
1559 | bio->bi_opf &= ~REQ_CGROUP_PUNT; | ||
1560 | |||
1561 | /* never bounce for the root cgroup */ | ||
1562 | if (!blkg->parent) | ||
1563 | return false; | ||
1564 | |||
1565 | spin_lock_bh(&blkg->async_bio_lock); | ||
1566 | bio_list_add(&blkg->async_bios, bio); | ||
1567 | spin_unlock_bh(&blkg->async_bio_lock); | ||
1568 | |||
1569 | queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); | ||
1570 | return true; | ||
1571 | } | ||
1572 | |||
1529 | /* | 1573 | /* |
1530 | * Scale the accumulated delay based on how long it has been since we updated | 1574 | * Scale the accumulated delay based on how long it has been since we updated |
1531 | * the delay. We only call this when we are adding delay, in case it's been a | 1575 | * the delay. We only call this when we are adding delay, in case it's been a |
@@ -1587,6 +1631,7 @@ static void blkcg_scale_delay(struct blkcg_gq *blkg, u64 now) | |||
1587 | */ | 1631 | */ |
1588 | static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) | 1632 | static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) |
1589 | { | 1633 | { |
1634 | unsigned long pflags; | ||
1590 | u64 now = ktime_to_ns(ktime_get()); | 1635 | u64 now = ktime_to_ns(ktime_get()); |
1591 | u64 exp; | 1636 | u64 exp; |
1592 | u64 delay_nsec = 0; | 1637 | u64 delay_nsec = 0; |
@@ -1613,11 +1658,8 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) | |||
1613 | */ | 1658 | */ |
1614 | delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC); | 1659 | delay_nsec = min_t(u64, delay_nsec, 250 * NSEC_PER_MSEC); |
1615 | 1660 | ||
1616 | /* | 1661 | if (use_memdelay) |
1617 | * TODO: the use_memdelay flag is going to be for the upcoming psi stuff | 1662 | psi_memstall_enter(&pflags); |
1618 | * that hasn't landed upstream yet. Once that stuff is in place we need | ||
1619 | * to do a psi_memstall_enter/leave if memdelay is set. | ||
1620 | */ | ||
1621 | 1663 | ||
1622 | exp = ktime_add_ns(now, delay_nsec); | 1664 | exp = ktime_add_ns(now, delay_nsec); |
1623 | tok = io_schedule_prepare(); | 1665 | tok = io_schedule_prepare(); |
@@ -1627,6 +1669,9 @@ static void blkcg_maybe_throttle_blkg(struct blkcg_gq *blkg, bool use_memdelay) | |||
1627 | break; | 1669 | break; |
1628 | } while (!fatal_signal_pending(current)); | 1670 | } while (!fatal_signal_pending(current)); |
1629 | io_schedule_finish(tok); | 1671 | io_schedule_finish(tok); |
1672 | |||
1673 | if (use_memdelay) | ||
1674 | psi_memstall_leave(&pflags); | ||
1630 | } | 1675 | } |
1631 | 1676 | ||
1632 | /** | 1677 | /** |
@@ -1726,5 +1771,16 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta) | |||
1726 | atomic64_add(delta, &blkg->delay_nsec); | 1771 | atomic64_add(delta, &blkg->delay_nsec); |
1727 | } | 1772 | } |
1728 | 1773 | ||
1774 | static int __init blkcg_init(void) | ||
1775 | { | ||
1776 | blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", | ||
1777 | WQ_MEM_RECLAIM | WQ_FREEZABLE | | ||
1778 | WQ_UNBOUND | WQ_SYSFS, 0); | ||
1779 | if (!blkcg_punt_bio_wq) | ||
1780 | return -ENOMEM; | ||
1781 | return 0; | ||
1782 | } | ||
1783 | subsys_initcall(blkcg_init); | ||
1784 | |||
1729 | module_param(blkcg_debug_stats, bool, 0644); | 1785 | module_param(blkcg_debug_stats, bool, 0644); |
1730 | MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); | 1786 | MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); |