aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux/blk-cgroup.h
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2018-07-03 11:14:55 -0400
committerJens Axboe <axboe@kernel.dk>2018-07-09 11:07:54 -0400
commitd09d8df3a29403693d9d20cc34ed101f2c558e2b (patch)
treeef13236fd3cab8b7a3d6c27a7484862561afcd32 /include/linux/blk-cgroup.h
parent0d3bd88d54f513723602b361dccfc71639f50779 (diff)
blkcg: add generic throttling mechanism
Since IO can be issued from literally anywhere it's almost impossible to do throttling without having some sort of adverse effect somewhere else in the system because of locking or other dependencies. The best way to solve this is to do the throttling when we know we aren't holding any other kernel resources. Do this by tracking throttling in a per-blkg basis, and if we require throttling flag the task that it needs to check before it returns to user space and possibly sleep there. This is to address the case where a process is doing work that is generating IO that can't be throttled, whether that is directly with a lot of REQ_META IO, or indirectly by allocating so much memory that it is swamping the disk with REQ_SWAP. We can't use task_add_work as we don't want to induce a memory allocation in the IO path, so simply saving the request queue in the task and flagging it to do the notify_resume thing achieves the same result without the overhead of a memory allocation. Signed-off-by: Josef Bacik <jbacik@fb.com> Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'include/linux/blk-cgroup.h')
-rw-r--r--include/linux/blk-cgroup.h99
1 files changed, 99 insertions, 0 deletions
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index a8f9ba8f33a4..de57de4831d5 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -136,6 +136,12 @@ struct blkcg_gq {
136 struct blkg_policy_data *pd[BLKCG_MAX_POLS]; 136 struct blkg_policy_data *pd[BLKCG_MAX_POLS];
137 137
138 struct rcu_head rcu_head; 138 struct rcu_head rcu_head;
139
140 atomic_t use_delay;
141 atomic64_t delay_nsec;
142 atomic64_t delay_start;
143 u64 last_delay;
144 int last_use;
139}; 145};
140 146
141typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); 147typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
@@ -241,6 +247,26 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
241 return css_to_blkcg(task_css(current, io_cgrp_id)); 247 return css_to_blkcg(task_css(current, io_cgrp_id));
242} 248}
243 249
250static inline bool blk_cgroup_congested(void)
251{
252 struct cgroup_subsys_state *css;
253 bool ret = false;
254
255 rcu_read_lock();
256 css = kthread_blkcg();
257 if (!css)
258 css = task_css(current, io_cgrp_id);
259 while (css) {
260 if (atomic_read(&css->cgroup->congestion_count)) {
261 ret = true;
262 break;
263 }
264 css = css->parent;
265 }
266 rcu_read_unlock();
267 return ret;
268}
269
244/** 270/**
245 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg 271 * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
246 * @return: true if this bio needs to be submitted with the root blkg context. 272 * @return: true if this bio needs to be submitted with the root blkg context.
@@ -374,6 +400,21 @@ static inline void blkg_get(struct blkcg_gq *blkg)
374 atomic_inc(&blkg->refcnt); 400 atomic_inc(&blkg->refcnt);
375} 401}
376 402
403/**
404 * blkg_try_get - try and get a blkg reference
405 * @blkg: blkg to get
406 *
407 * This is for use when doing an RCU lookup of the blkg. We may be in the midst
408 * of freeing this blkg, so we can only use it if the refcnt is not zero.
409 */
410static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
411{
412 if (atomic_inc_not_zero(&blkg->refcnt))
413 return blkg;
414 return NULL;
415}
416
417
377void __blkg_release_rcu(struct rcu_head *rcu); 418void __blkg_release_rcu(struct rcu_head *rcu);
378 419
379/** 420/**
@@ -734,6 +775,59 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
734 return !throtl; 775 return !throtl;
735} 776}
736 777
778static inline void blkcg_use_delay(struct blkcg_gq *blkg)
779{
780 if (atomic_add_return(1, &blkg->use_delay) == 1)
781 atomic_inc(&blkg->blkcg->css.cgroup->congestion_count);
782}
783
784static inline int blkcg_unuse_delay(struct blkcg_gq *blkg)
785{
786 int old = atomic_read(&blkg->use_delay);
787
788 if (old == 0)
789 return 0;
790
791 /*
792 * We do this song and dance because we can race with somebody else
793 * adding or removing delay. If we just did an atomic_dec we'd end up
794 * negative and we'd already be in trouble. We need to subtract 1 and
795 * then check to see if we were the last delay so we can drop the
796 * congestion count on the cgroup.
797 */
798 while (old) {
799 int cur = atomic_cmpxchg(&blkg->use_delay, old, old - 1);
800 if (cur == old)
801 break;
802 old = cur;
803 }
804
805 if (old == 0)
806 return 0;
807 if (old == 1)
808 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
809 return 1;
810}
811
812static inline void blkcg_clear_delay(struct blkcg_gq *blkg)
813{
814 int old = atomic_read(&blkg->use_delay);
815 if (!old)
816 return;
817 /* We only want 1 person clearing the congestion count for this blkg. */
818 while (old) {
819 int cur = atomic_cmpxchg(&blkg->use_delay, old, 0);
820 if (cur == old) {
821 atomic_dec(&blkg->blkcg->css.cgroup->congestion_count);
822 break;
823 }
824 old = cur;
825 }
826}
827
828void blkcg_add_delay(struct blkcg_gq *blkg, u64 now, u64 delta);
829void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay);
830void blkcg_maybe_throttle_current(void);
737#else /* CONFIG_BLK_CGROUP */ 831#else /* CONFIG_BLK_CGROUP */
738 832
739struct blkcg { 833struct blkcg {
@@ -753,8 +847,13 @@ struct blkcg_policy {
753 847
754#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) 848#define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL))
755 849
850static inline void blkcg_maybe_throttle_current(void) { }
851static inline bool blk_cgroup_congested(void) { return false; }
852
756#ifdef CONFIG_BLOCK 853#ifdef CONFIG_BLOCK
757 854
855static inline void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) { }
856
758static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } 857static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; }
759static inline int blkcg_init_queue(struct request_queue *q) { return 0; } 858static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
760static inline void blkcg_drain_queue(struct request_queue *q) { } 859static inline void blkcg_drain_queue(struct request_queue *q) { }