diff options
-rw-r--r-- | block/blk-cgroup.c | 53 | ||||
-rw-r--r-- | block/blk-core.c | 3 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 1 | ||||
-rw-r--r-- | include/linux/blk-cgroup.h | 16 | ||||
-rw-r--r-- | include/linux/blk_types.h | 10 | ||||
-rw-r--r-- | include/linux/writeback.h | 13 |
6 files changed, 92 insertions, 4 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index ad7a91dec934..24ed26957367 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -55,6 +55,7 @@ static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | |||
55 | static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ | 55 | static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */ |
56 | 56 | ||
57 | static bool blkcg_debug_stats = false; | 57 | static bool blkcg_debug_stats = false; |
58 | static struct workqueue_struct *blkcg_punt_bio_wq; | ||
58 | 59 | ||
59 | static bool blkcg_policy_enabled(struct request_queue *q, | 60 | static bool blkcg_policy_enabled(struct request_queue *q, |
60 | const struct blkcg_policy *pol) | 61 | const struct blkcg_policy *pol) |
@@ -89,6 +90,8 @@ static void __blkg_release(struct rcu_head *rcu) | |||
89 | { | 90 | { |
90 | struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); | 91 | struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); |
91 | 92 | ||
93 | WARN_ON(!bio_list_empty(&blkg->async_bios)); | ||
94 | |||
92 | /* release the blkcg and parent blkg refs this blkg has been holding */ | 95 | /* release the blkcg and parent blkg refs this blkg has been holding */ |
93 | css_put(&blkg->blkcg->css); | 96 | css_put(&blkg->blkcg->css); |
94 | if (blkg->parent) | 97 | if (blkg->parent) |
@@ -114,6 +117,23 @@ static void blkg_release(struct percpu_ref *ref) | |||
114 | call_rcu(&blkg->rcu_head, __blkg_release); | 117 | call_rcu(&blkg->rcu_head, __blkg_release); |
115 | } | 118 | } |
116 | 119 | ||
120 | static void blkg_async_bio_workfn(struct work_struct *work) | ||
121 | { | ||
122 | struct blkcg_gq *blkg = container_of(work, struct blkcg_gq, | ||
123 | async_bio_work); | ||
124 | struct bio_list bios = BIO_EMPTY_LIST; | ||
125 | struct bio *bio; | ||
126 | |||
127 | /* as long as there are pending bios, @blkg can't go away */ | ||
128 | spin_lock_bh(&blkg->async_bio_lock); | ||
129 | bio_list_merge(&bios, &blkg->async_bios); | ||
130 | bio_list_init(&blkg->async_bios); | ||
131 | spin_unlock_bh(&blkg->async_bio_lock); | ||
132 | |||
133 | while ((bio = bio_list_pop(&bios))) | ||
134 | submit_bio(bio); | ||
135 | } | ||
136 | |||
117 | /** | 137 | /** |
118 | * blkg_alloc - allocate a blkg | 138 | * blkg_alloc - allocate a blkg |
119 | * @blkcg: block cgroup the new blkg is associated with | 139 | * @blkcg: block cgroup the new blkg is associated with |
@@ -142,6 +162,9 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, | |||
142 | 162 | ||
143 | blkg->q = q; | 163 | blkg->q = q; |
144 | INIT_LIST_HEAD(&blkg->q_node); | 164 | INIT_LIST_HEAD(&blkg->q_node); |
165 | spin_lock_init(&blkg->async_bio_lock); | ||
166 | bio_list_init(&blkg->async_bios); | ||
167 | INIT_WORK(&blkg->async_bio_work, blkg_async_bio_workfn); | ||
145 | blkg->blkcg = blkcg; | 168 | blkg->blkcg = blkcg; |
146 | 169 | ||
147 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 170 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
@@ -1528,6 +1551,25 @@ out_unlock: | |||
1528 | } | 1551 | } |
1529 | EXPORT_SYMBOL_GPL(blkcg_policy_unregister); | 1552 | EXPORT_SYMBOL_GPL(blkcg_policy_unregister); |
1530 | 1553 | ||
1554 | bool __blkcg_punt_bio_submit(struct bio *bio) | ||
1555 | { | ||
1556 | struct blkcg_gq *blkg = bio->bi_blkg; | ||
1557 | |||
1558 | /* consume the flag first */ | ||
1559 | bio->bi_opf &= ~REQ_CGROUP_PUNT; | ||
1560 | |||
1561 | /* never bounce for the root cgroup */ | ||
1562 | if (!blkg->parent) | ||
1563 | return false; | ||
1564 | |||
1565 | spin_lock_bh(&blkg->async_bio_lock); | ||
1566 | bio_list_add(&blkg->async_bios, bio); | ||
1567 | spin_unlock_bh(&blkg->async_bio_lock); | ||
1568 | |||
1569 | queue_work(blkcg_punt_bio_wq, &blkg->async_bio_work); | ||
1570 | return true; | ||
1571 | } | ||
1572 | |||
1531 | /* | 1573 | /* |
1532 | * Scale the accumulated delay based on how long it has been since we updated | 1574 | * Scale the accumulated delay based on how long it has been since we updated |
1533 | * the delay. We only call this when we are adding delay, in case it's been a | 1575 | * the delay. We only call this when we are adding delay, in case it's been a |
@@ -1729,5 +1771,16 @@ void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta) | |||
1729 | atomic64_add(delta, &blkg->delay_nsec); | 1771 | atomic64_add(delta, &blkg->delay_nsec); |
1730 | } | 1772 | } |
1731 | 1773 | ||
1774 | static int __init blkcg_init(void) | ||
1775 | { | ||
1776 | blkcg_punt_bio_wq = alloc_workqueue("blkcg_punt_bio", | ||
1777 | WQ_MEM_RECLAIM | WQ_FREEZABLE | | ||
1778 | WQ_UNBOUND | WQ_SYSFS, 0); | ||
1779 | if (!blkcg_punt_bio_wq) | ||
1780 | return -ENOMEM; | ||
1781 | return 0; | ||
1782 | } | ||
1783 | subsys_initcall(blkcg_init); | ||
1784 | |||
1732 | module_param(blkcg_debug_stats, bool, 0644); | 1785 | module_param(blkcg_debug_stats, bool, 0644); |
1733 | MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); | 1786 | MODULE_PARM_DESC(blkcg_debug_stats, "True if you want debug stats, false if not"); |
diff --git a/block/blk-core.c b/block/blk-core.c index edd009213f5b..260e36a2c343 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -1128,6 +1128,9 @@ EXPORT_SYMBOL_GPL(direct_make_request); | |||
1128 | */ | 1128 | */ |
1129 | blk_qc_t submit_bio(struct bio *bio) | 1129 | blk_qc_t submit_bio(struct bio *bio) |
1130 | { | 1130 | { |
1131 | if (blkcg_punt_bio_submit(bio)) | ||
1132 | return BLK_QC_T_NONE; | ||
1133 | |||
1131 | /* | 1134 | /* |
1132 | * If it's a regular read/write or a barrier with data attached, | 1135 | * If it's a regular read/write or a barrier with data attached, |
1133 | * go through the normal accounting stuff before submission. | 1136 | * go through the normal accounting stuff before submission. |
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f9b029180241..35b31d176f74 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
@@ -48,6 +48,7 @@ extern spinlock_t bdi_lock; | |||
48 | extern struct list_head bdi_list; | 48 | extern struct list_head bdi_list; |
49 | 49 | ||
50 | extern struct workqueue_struct *bdi_wq; | 50 | extern struct workqueue_struct *bdi_wq; |
51 | extern struct workqueue_struct *bdi_async_bio_wq; | ||
51 | 52 | ||
52 | static inline bool wb_has_dirty_io(struct bdi_writeback *wb) | 53 | static inline bool wb_has_dirty_io(struct bdi_writeback *wb) |
53 | { | 54 | { |
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 33f23a858438..689a58231288 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h | |||
@@ -132,13 +132,17 @@ struct blkcg_gq { | |||
132 | 132 | ||
133 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; | 133 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
134 | 134 | ||
135 | struct rcu_head rcu_head; | 135 | spinlock_t async_bio_lock; |
136 | struct bio_list async_bios; | ||
137 | struct work_struct async_bio_work; | ||
136 | 138 | ||
137 | atomic_t use_delay; | 139 | atomic_t use_delay; |
138 | atomic64_t delay_nsec; | 140 | atomic64_t delay_nsec; |
139 | atomic64_t delay_start; | 141 | atomic64_t delay_start; |
140 | u64 last_delay; | 142 | u64 last_delay; |
141 | int last_use; | 143 | int last_use; |
144 | |||
145 | struct rcu_head rcu_head; | ||
142 | }; | 146 | }; |
143 | 147 | ||
144 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); | 148 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); |
@@ -701,6 +705,15 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg | |||
701 | struct bio *bio) { return false; } | 705 | struct bio *bio) { return false; } |
702 | #endif | 706 | #endif |
703 | 707 | ||
708 | bool __blkcg_punt_bio_submit(struct bio *bio); | ||
709 | |||
710 | static inline bool blkcg_punt_bio_submit(struct bio *bio) | ||
711 | { | ||
712 | if (bio->bi_opf & REQ_CGROUP_PUNT) | ||
713 | return __blkcg_punt_bio_submit(bio); | ||
714 | else | ||
715 | return false; | ||
716 | } | ||
704 | 717 | ||
705 | static inline void blkcg_bio_issue_init(struct bio *bio) | 718 | static inline void blkcg_bio_issue_init(struct bio *bio) |
706 | { | 719 | { |
@@ -848,6 +861,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } | |||
848 | static inline void blkg_get(struct blkcg_gq *blkg) { } | 861 | static inline void blkg_get(struct blkcg_gq *blkg) { } |
849 | static inline void blkg_put(struct blkcg_gq *blkg) { } | 862 | static inline void blkg_put(struct blkcg_gq *blkg) { } |
850 | 863 | ||
864 | static inline bool blkcg_punt_bio_submit(struct bio *bio) { return false; } | ||
851 | static inline void blkcg_bio_issue_init(struct bio *bio) { } | 865 | static inline void blkcg_bio_issue_init(struct bio *bio) { } |
852 | static inline bool blkcg_bio_issue_check(struct request_queue *q, | 866 | static inline bool blkcg_bio_issue_check(struct request_queue *q, |
853 | struct bio *bio) { return true; } | 867 | struct bio *bio) { return true; } |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6a53799c3fe2..feff3fe4467e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -311,6 +311,14 @@ enum req_flag_bits { | |||
311 | __REQ_RAHEAD, /* read ahead, can fail anytime */ | 311 | __REQ_RAHEAD, /* read ahead, can fail anytime */ |
312 | __REQ_BACKGROUND, /* background IO */ | 312 | __REQ_BACKGROUND, /* background IO */ |
313 | __REQ_NOWAIT, /* Don't wait if request will block */ | 313 | __REQ_NOWAIT, /* Don't wait if request will block */ |
314 | /* | ||
315 | * When a shared kthread needs to issue a bio for a cgroup, doing | ||
316 | * so synchronously can lead to priority inversions as the kthread | ||
317 | * can be trapped waiting for that cgroup. CGROUP_PUNT flag makes | ||
318 | * submit_bio() punt the actual issuing to a dedicated per-blkcg | ||
319 | * work item to avoid such priority inversions. | ||
320 | */ | ||
321 | __REQ_CGROUP_PUNT, | ||
314 | 322 | ||
315 | /* command specific flags for REQ_OP_WRITE_ZEROES: */ | 323 | /* command specific flags for REQ_OP_WRITE_ZEROES: */ |
316 | __REQ_NOUNMAP, /* do not free blocks when zeroing */ | 324 | __REQ_NOUNMAP, /* do not free blocks when zeroing */ |
@@ -337,6 +345,8 @@ enum req_flag_bits { | |||
337 | #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) | 345 | #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) |
338 | #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) | 346 | #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) |
339 | #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) | 347 | #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) |
348 | #define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT) | ||
349 | |||
340 | #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) | 350 | #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) |
341 | #define REQ_HIPRI (1ULL << __REQ_HIPRI) | 351 | #define REQ_HIPRI (1ULL << __REQ_HIPRI) |
342 | 352 | ||
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index e056a22075cf..8945aac31392 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -78,6 +78,8 @@ struct writeback_control { | |||
78 | */ | 78 | */ |
79 | unsigned no_cgroup_owner:1; | 79 | unsigned no_cgroup_owner:1; |
80 | 80 | ||
81 | unsigned punt_to_cgroup:1; /* cgrp punting, see __REQ_CGROUP_PUNT */ | ||
82 | |||
81 | #ifdef CONFIG_CGROUP_WRITEBACK | 83 | #ifdef CONFIG_CGROUP_WRITEBACK |
82 | struct bdi_writeback *wb; /* wb this writeback is issued under */ | 84 | struct bdi_writeback *wb; /* wb this writeback is issued under */ |
83 | struct inode *inode; /* inode being written out */ | 85 | struct inode *inode; /* inode being written out */ |
@@ -94,12 +96,17 @@ struct writeback_control { | |||
94 | 96 | ||
95 | static inline int wbc_to_write_flags(struct writeback_control *wbc) | 97 | static inline int wbc_to_write_flags(struct writeback_control *wbc) |
96 | { | 98 | { |
99 | int flags = 0; | ||
100 | |||
101 | if (wbc->punt_to_cgroup) | ||
102 | flags = REQ_CGROUP_PUNT; | ||
103 | |||
97 | if (wbc->sync_mode == WB_SYNC_ALL) | 104 | if (wbc->sync_mode == WB_SYNC_ALL) |
98 | return REQ_SYNC; | 105 | flags |= REQ_SYNC; |
99 | else if (wbc->for_kupdate || wbc->for_background) | 106 | else if (wbc->for_kupdate || wbc->for_background) |
100 | return REQ_BACKGROUND; | 107 | flags |= REQ_BACKGROUND; |
101 | 108 | ||
102 | return 0; | 109 | return flags; |
103 | } | 110 | } |
104 | 111 | ||
105 | static inline struct cgroup_subsys_state * | 112 | static inline struct cgroup_subsys_state * |