diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-25 19:00:17 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-06-25 19:00:17 -0400 |
commit | e4bc13adfd016fc1036838170288b5680d1a98b0 (patch) | |
tree | 8d2cb749397749439732f3a827cb7f2336408337 /block | |
parent | ad90fb97515b732bc27a0109baa10af636c3c8cd (diff) | |
parent | 3e1534cf4a2a8278e811e7c84a79da1a02347b8b (diff) |
Merge branch 'for-4.2/writeback' of git://git.kernel.dk/linux-block
Pull cgroup writeback support from Jens Axboe:
"This is the big pull request for adding cgroup writeback support.
This code has been in development for a long time, and it has been
simmering in for-next for a good chunk of this cycle too. This is one
of those problems that has been talked about for at least half a
decade, finally there's a solution and code to go with it.
Also see last weeks writeup on LWN:
http://lwn.net/Articles/648292/"
* 'for-4.2/writeback' of git://git.kernel.dk/linux-block: (85 commits)
writeback, blkio: add documentation for cgroup writeback support
vfs, writeback: replace FS_CGROUP_WRITEBACK with SB_I_CGROUPWB
writeback: do foreign inode detection iff cgroup writeback is enabled
v9fs: fix error handling in v9fs_session_init()
bdi: fix wrong error return value in cgwb_create()
buffer: remove unusued 'ret' variable
writeback: disassociate inodes from dying bdi_writebacks
writeback: implement foreign cgroup inode bdi_writeback switching
writeback: add lockdep annotation to inode_to_wb()
writeback: use unlocked_inode_to_wb transaction in inode_congested()
writeback: implement unlocked_inode_to_wb transaction and use it for stat updates
writeback: implement [locked_]inode_to_wb_and_lock_list()
writeback: implement foreign cgroup inode detection
writeback: make writeback_control track the inode being written back
writeback: relocate wb[_try]_get(), wb_put(), inode_{attach|detach}_wb()
mm: vmscan: disable memcg direct reclaim stalling if cgroup writeback support is in use
writeback: implement memcg writeback domain based throttling
writeback: reset wb_domain->dirty_limit[_tstmp] when memcg domain size changes
writeback: implement memcg wb_domain
writeback: update wb_over_bg_thresh() to use wb_domain aware operations
...
Diffstat (limited to 'block')
-rw-r--r-- | block/bio.c | 35 | ||||
-rw-r--r-- | block/blk-cgroup.c | 123 | ||||
-rw-r--r-- | block/blk-cgroup.h | 627 | ||||
-rw-r--r-- | block/blk-core.c | 70 | ||||
-rw-r--r-- | block/blk-integrity.c | 1 | ||||
-rw-r--r-- | block/blk-sysfs.c | 3 | ||||
-rw-r--r-- | block/blk-throttle.c | 2 | ||||
-rw-r--r-- | block/bounce.c | 1 | ||||
-rw-r--r-- | block/cfq-iosched.c | 2 | ||||
-rw-r--r-- | block/elevator.c | 2 | ||||
-rw-r--r-- | block/genhd.c | 1 |
11 files changed, 139 insertions, 728 deletions
diff --git a/block/bio.c b/block/bio.c index 259197d97de1..2a00d349cd68 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -1988,6 +1988,28 @@ struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_ | |||
1988 | EXPORT_SYMBOL(bioset_create_nobvec); | 1988 | EXPORT_SYMBOL(bioset_create_nobvec); |
1989 | 1989 | ||
1990 | #ifdef CONFIG_BLK_CGROUP | 1990 | #ifdef CONFIG_BLK_CGROUP |
1991 | |||
1992 | /** | ||
1993 | * bio_associate_blkcg - associate a bio with the specified blkcg | ||
1994 | * @bio: target bio | ||
1995 | * @blkcg_css: css of the blkcg to associate | ||
1996 | * | ||
1997 | * Associate @bio with the blkcg specified by @blkcg_css. Block layer will | ||
1998 | * treat @bio as if it were issued by a task which belongs to the blkcg. | ||
1999 | * | ||
2000 | * This function takes an extra reference of @blkcg_css which will be put | ||
2001 | * when @bio is released. The caller must own @bio and is responsible for | ||
2002 | * synchronizing calls to this function. | ||
2003 | */ | ||
2004 | int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) | ||
2005 | { | ||
2006 | if (unlikely(bio->bi_css)) | ||
2007 | return -EBUSY; | ||
2008 | css_get(blkcg_css); | ||
2009 | bio->bi_css = blkcg_css; | ||
2010 | return 0; | ||
2011 | } | ||
2012 | |||
1991 | /** | 2013 | /** |
1992 | * bio_associate_current - associate a bio with %current | 2014 | * bio_associate_current - associate a bio with %current |
1993 | * @bio: target bio | 2015 | * @bio: target bio |
@@ -2004,26 +2026,17 @@ EXPORT_SYMBOL(bioset_create_nobvec); | |||
2004 | int bio_associate_current(struct bio *bio) | 2026 | int bio_associate_current(struct bio *bio) |
2005 | { | 2027 | { |
2006 | struct io_context *ioc; | 2028 | struct io_context *ioc; |
2007 | struct cgroup_subsys_state *css; | ||
2008 | 2029 | ||
2009 | if (bio->bi_ioc) | 2030 | if (bio->bi_css) |
2010 | return -EBUSY; | 2031 | return -EBUSY; |
2011 | 2032 | ||
2012 | ioc = current->io_context; | 2033 | ioc = current->io_context; |
2013 | if (!ioc) | 2034 | if (!ioc) |
2014 | return -ENOENT; | 2035 | return -ENOENT; |
2015 | 2036 | ||
2016 | /* acquire active ref on @ioc and associate */ | ||
2017 | get_io_context_active(ioc); | 2037 | get_io_context_active(ioc); |
2018 | bio->bi_ioc = ioc; | 2038 | bio->bi_ioc = ioc; |
2019 | 2039 | bio->bi_css = task_get_css(current, blkio_cgrp_id); | |
2020 | /* associate blkcg if exists */ | ||
2021 | rcu_read_lock(); | ||
2022 | css = task_css(current, blkio_cgrp_id); | ||
2023 | if (css && css_tryget_online(css)) | ||
2024 | bio->bi_css = css; | ||
2025 | rcu_read_unlock(); | ||
2026 | |||
2027 | return 0; | 2040 | return 0; |
2028 | } | 2041 | } |
2029 | 2042 | ||
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 6e43fa355e71..9f97da52d006 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -19,11 +19,12 @@ | |||
19 | #include <linux/module.h> | 19 | #include <linux/module.h> |
20 | #include <linux/err.h> | 20 | #include <linux/err.h> |
21 | #include <linux/blkdev.h> | 21 | #include <linux/blkdev.h> |
22 | #include <linux/backing-dev.h> | ||
22 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
23 | #include <linux/genhd.h> | 24 | #include <linux/genhd.h> |
24 | #include <linux/delay.h> | 25 | #include <linux/delay.h> |
25 | #include <linux/atomic.h> | 26 | #include <linux/atomic.h> |
26 | #include "blk-cgroup.h" | 27 | #include <linux/blk-cgroup.h> |
27 | #include "blk.h" | 28 | #include "blk.h" |
28 | 29 | ||
29 | #define MAX_KEY_LEN 100 | 30 | #define MAX_KEY_LEN 100 |
@@ -33,6 +34,8 @@ static DEFINE_MUTEX(blkcg_pol_mutex); | |||
33 | struct blkcg blkcg_root; | 34 | struct blkcg blkcg_root; |
34 | EXPORT_SYMBOL_GPL(blkcg_root); | 35 | EXPORT_SYMBOL_GPL(blkcg_root); |
35 | 36 | ||
37 | struct cgroup_subsys_state * const blkcg_root_css = &blkcg_root.css; | ||
38 | |||
36 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 39 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
37 | 40 | ||
38 | static bool blkcg_policy_enabled(struct request_queue *q, | 41 | static bool blkcg_policy_enabled(struct request_queue *q, |
@@ -182,6 +185,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, | |||
182 | struct blkcg_gq *new_blkg) | 185 | struct blkcg_gq *new_blkg) |
183 | { | 186 | { |
184 | struct blkcg_gq *blkg; | 187 | struct blkcg_gq *blkg; |
188 | struct bdi_writeback_congested *wb_congested; | ||
185 | int i, ret; | 189 | int i, ret; |
186 | 190 | ||
187 | WARN_ON_ONCE(!rcu_read_lock_held()); | 191 | WARN_ON_ONCE(!rcu_read_lock_held()); |
@@ -193,22 +197,30 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, | |||
193 | goto err_free_blkg; | 197 | goto err_free_blkg; |
194 | } | 198 | } |
195 | 199 | ||
200 | wb_congested = wb_congested_get_create(&q->backing_dev_info, | ||
201 | blkcg->css.id, GFP_ATOMIC); | ||
202 | if (!wb_congested) { | ||
203 | ret = -ENOMEM; | ||
204 | goto err_put_css; | ||
205 | } | ||
206 | |||
196 | /* allocate */ | 207 | /* allocate */ |
197 | if (!new_blkg) { | 208 | if (!new_blkg) { |
198 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); | 209 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); |
199 | if (unlikely(!new_blkg)) { | 210 | if (unlikely(!new_blkg)) { |
200 | ret = -ENOMEM; | 211 | ret = -ENOMEM; |
201 | goto err_put_css; | 212 | goto err_put_congested; |
202 | } | 213 | } |
203 | } | 214 | } |
204 | blkg = new_blkg; | 215 | blkg = new_blkg; |
216 | blkg->wb_congested = wb_congested; | ||
205 | 217 | ||
206 | /* link parent */ | 218 | /* link parent */ |
207 | if (blkcg_parent(blkcg)) { | 219 | if (blkcg_parent(blkcg)) { |
208 | blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); | 220 | blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); |
209 | if (WARN_ON_ONCE(!blkg->parent)) { | 221 | if (WARN_ON_ONCE(!blkg->parent)) { |
210 | ret = -EINVAL; | 222 | ret = -EINVAL; |
211 | goto err_put_css; | 223 | goto err_put_congested; |
212 | } | 224 | } |
213 | blkg_get(blkg->parent); | 225 | blkg_get(blkg->parent); |
214 | } | 226 | } |
@@ -238,18 +250,15 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, | |||
238 | blkg->online = true; | 250 | blkg->online = true; |
239 | spin_unlock(&blkcg->lock); | 251 | spin_unlock(&blkcg->lock); |
240 | 252 | ||
241 | if (!ret) { | 253 | if (!ret) |
242 | if (blkcg == &blkcg_root) { | ||
243 | q->root_blkg = blkg; | ||
244 | q->root_rl.blkg = blkg; | ||
245 | } | ||
246 | return blkg; | 254 | return blkg; |
247 | } | ||
248 | 255 | ||
249 | /* @blkg failed fully initialized, use the usual release path */ | 256 | /* @blkg failed fully initialized, use the usual release path */ |
250 | blkg_put(blkg); | 257 | blkg_put(blkg); |
251 | return ERR_PTR(ret); | 258 | return ERR_PTR(ret); |
252 | 259 | ||
260 | err_put_congested: | ||
261 | wb_congested_put(wb_congested); | ||
253 | err_put_css: | 262 | err_put_css: |
254 | css_put(&blkcg->css); | 263 | css_put(&blkcg->css); |
255 | err_free_blkg: | 264 | err_free_blkg: |
@@ -343,15 +352,6 @@ static void blkg_destroy(struct blkcg_gq *blkg) | |||
343 | rcu_assign_pointer(blkcg->blkg_hint, NULL); | 352 | rcu_assign_pointer(blkcg->blkg_hint, NULL); |
344 | 353 | ||
345 | /* | 354 | /* |
346 | * If root blkg is destroyed. Just clear the pointer since root_rl | ||
347 | * does not take reference on root blkg. | ||
348 | */ | ||
349 | if (blkcg == &blkcg_root) { | ||
350 | blkg->q->root_blkg = NULL; | ||
351 | blkg->q->root_rl.blkg = NULL; | ||
352 | } | ||
353 | |||
354 | /* | ||
355 | * Put the reference taken at the time of creation so that when all | 355 | * Put the reference taken at the time of creation so that when all |
356 | * queues are gone, group can be destroyed. | 356 | * queues are gone, group can be destroyed. |
357 | */ | 357 | */ |
@@ -405,6 +405,8 @@ void __blkg_release_rcu(struct rcu_head *rcu_head) | |||
405 | if (blkg->parent) | 405 | if (blkg->parent) |
406 | blkg_put(blkg->parent); | 406 | blkg_put(blkg->parent); |
407 | 407 | ||
408 | wb_congested_put(blkg->wb_congested); | ||
409 | |||
408 | blkg_free(blkg); | 410 | blkg_free(blkg); |
409 | } | 411 | } |
410 | EXPORT_SYMBOL_GPL(__blkg_release_rcu); | 412 | EXPORT_SYMBOL_GPL(__blkg_release_rcu); |
@@ -812,6 +814,8 @@ static void blkcg_css_offline(struct cgroup_subsys_state *css) | |||
812 | } | 814 | } |
813 | 815 | ||
814 | spin_unlock_irq(&blkcg->lock); | 816 | spin_unlock_irq(&blkcg->lock); |
817 | |||
818 | wb_blkcg_offline(blkcg); | ||
815 | } | 819 | } |
816 | 820 | ||
817 | static void blkcg_css_free(struct cgroup_subsys_state *css) | 821 | static void blkcg_css_free(struct cgroup_subsys_state *css) |
@@ -868,7 +872,9 @@ done: | |||
868 | spin_lock_init(&blkcg->lock); | 872 | spin_lock_init(&blkcg->lock); |
869 | INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); | 873 | INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC); |
870 | INIT_HLIST_HEAD(&blkcg->blkg_list); | 874 | INIT_HLIST_HEAD(&blkcg->blkg_list); |
871 | 875 | #ifdef CONFIG_CGROUP_WRITEBACK | |
876 | INIT_LIST_HEAD(&blkcg->cgwb_list); | ||
877 | #endif | ||
872 | return &blkcg->css; | 878 | return &blkcg->css; |
873 | 879 | ||
874 | free_pd_blkcg: | 880 | free_pd_blkcg: |
@@ -892,9 +898,45 @@ free_blkcg: | |||
892 | */ | 898 | */ |
893 | int blkcg_init_queue(struct request_queue *q) | 899 | int blkcg_init_queue(struct request_queue *q) |
894 | { | 900 | { |
895 | might_sleep(); | 901 | struct blkcg_gq *new_blkg, *blkg; |
902 | bool preloaded; | ||
903 | int ret; | ||
904 | |||
905 | new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); | ||
906 | if (!new_blkg) | ||
907 | return -ENOMEM; | ||
908 | |||
909 | preloaded = !radix_tree_preload(GFP_KERNEL); | ||
910 | |||
911 | /* | ||
912 | * Make sure the root blkg exists and count the existing blkgs. As | ||
913 | * @q is bypassing at this point, blkg_lookup_create() can't be | ||
914 | * used. Open code insertion. | ||
915 | */ | ||
916 | rcu_read_lock(); | ||
917 | spin_lock_irq(q->queue_lock); | ||
918 | blkg = blkg_create(&blkcg_root, q, new_blkg); | ||
919 | spin_unlock_irq(q->queue_lock); | ||
920 | rcu_read_unlock(); | ||
921 | |||
922 | if (preloaded) | ||
923 | radix_tree_preload_end(); | ||
924 | |||
925 | if (IS_ERR(blkg)) { | ||
926 | kfree(new_blkg); | ||
927 | return PTR_ERR(blkg); | ||
928 | } | ||
896 | 929 | ||
897 | return blk_throtl_init(q); | 930 | q->root_blkg = blkg; |
931 | q->root_rl.blkg = blkg; | ||
932 | |||
933 | ret = blk_throtl_init(q); | ||
934 | if (ret) { | ||
935 | spin_lock_irq(q->queue_lock); | ||
936 | blkg_destroy_all(q); | ||
937 | spin_unlock_irq(q->queue_lock); | ||
938 | } | ||
939 | return ret; | ||
898 | } | 940 | } |
899 | 941 | ||
900 | /** | 942 | /** |
@@ -996,50 +1038,19 @@ int blkcg_activate_policy(struct request_queue *q, | |||
996 | { | 1038 | { |
997 | LIST_HEAD(pds); | 1039 | LIST_HEAD(pds); |
998 | LIST_HEAD(cpds); | 1040 | LIST_HEAD(cpds); |
999 | struct blkcg_gq *blkg, *new_blkg; | 1041 | struct blkcg_gq *blkg; |
1000 | struct blkg_policy_data *pd, *nd; | 1042 | struct blkg_policy_data *pd, *nd; |
1001 | struct blkcg_policy_data *cpd, *cnd; | 1043 | struct blkcg_policy_data *cpd, *cnd; |
1002 | int cnt = 0, ret; | 1044 | int cnt = 0, ret; |
1003 | bool preloaded; | ||
1004 | 1045 | ||
1005 | if (blkcg_policy_enabled(q, pol)) | 1046 | if (blkcg_policy_enabled(q, pol)) |
1006 | return 0; | 1047 | return 0; |
1007 | 1048 | ||
1008 | /* preallocations for root blkg */ | 1049 | /* count and allocate policy_data for all existing blkgs */ |
1009 | new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); | ||
1010 | if (!new_blkg) | ||
1011 | return -ENOMEM; | ||
1012 | |||
1013 | blk_queue_bypass_start(q); | 1050 | blk_queue_bypass_start(q); |
1014 | |||
1015 | preloaded = !radix_tree_preload(GFP_KERNEL); | ||
1016 | |||
1017 | /* | ||
1018 | * Make sure the root blkg exists and count the existing blkgs. As | ||
1019 | * @q is bypassing at this point, blkg_lookup_create() can't be | ||
1020 | * used. Open code it. | ||
1021 | */ | ||
1022 | spin_lock_irq(q->queue_lock); | 1051 | spin_lock_irq(q->queue_lock); |
1023 | |||
1024 | rcu_read_lock(); | ||
1025 | blkg = __blkg_lookup(&blkcg_root, q, false); | ||
1026 | if (blkg) | ||
1027 | blkg_free(new_blkg); | ||
1028 | else | ||
1029 | blkg = blkg_create(&blkcg_root, q, new_blkg); | ||
1030 | rcu_read_unlock(); | ||
1031 | |||
1032 | if (preloaded) | ||
1033 | radix_tree_preload_end(); | ||
1034 | |||
1035 | if (IS_ERR(blkg)) { | ||
1036 | ret = PTR_ERR(blkg); | ||
1037 | goto out_unlock; | ||
1038 | } | ||
1039 | |||
1040 | list_for_each_entry(blkg, &q->blkg_list, q_node) | 1052 | list_for_each_entry(blkg, &q->blkg_list, q_node) |
1041 | cnt++; | 1053 | cnt++; |
1042 | |||
1043 | spin_unlock_irq(q->queue_lock); | 1054 | spin_unlock_irq(q->queue_lock); |
1044 | 1055 | ||
1045 | /* | 1056 | /* |
@@ -1140,10 +1151,6 @@ void blkcg_deactivate_policy(struct request_queue *q, | |||
1140 | 1151 | ||
1141 | __clear_bit(pol->plid, q->blkcg_pols); | 1152 | __clear_bit(pol->plid, q->blkcg_pols); |
1142 | 1153 | ||
1143 | /* if no policy is left, no need for blkgs - shoot them down */ | ||
1144 | if (bitmap_empty(q->blkcg_pols, BLKCG_MAX_POLS)) | ||
1145 | blkg_destroy_all(q); | ||
1146 | |||
1147 | list_for_each_entry(blkg, &q->blkg_list, q_node) { | 1154 | list_for_each_entry(blkg, &q->blkg_list, q_node) { |
1148 | /* grab blkcg lock too while removing @pd from @blkg */ | 1155 | /* grab blkcg lock too while removing @pd from @blkg */ |
1149 | spin_lock(&blkg->blkcg->lock); | 1156 | spin_lock(&blkg->blkcg->lock); |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h deleted file mode 100644 index 74296a78bba1..000000000000 --- a/block/blk-cgroup.h +++ /dev/null | |||
@@ -1,627 +0,0 @@ | |||
1 | #ifndef _BLK_CGROUP_H | ||
2 | #define _BLK_CGROUP_H | ||
3 | /* | ||
4 | * Common Block IO controller cgroup interface | ||
5 | * | ||
6 | * Based on ideas and code from CFQ, CFS and BFQ: | ||
7 | * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk> | ||
8 | * | ||
9 | * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it> | ||
10 | * Paolo Valente <paolo.valente@unimore.it> | ||
11 | * | ||
12 | * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com> | ||
13 | * Nauman Rafique <nauman@google.com> | ||
14 | */ | ||
15 | |||
16 | #include <linux/cgroup.h> | ||
17 | #include <linux/u64_stats_sync.h> | ||
18 | #include <linux/seq_file.h> | ||
19 | #include <linux/radix-tree.h> | ||
20 | #include <linux/blkdev.h> | ||
21 | #include <linux/atomic.h> | ||
22 | |||
23 | /* Max limits for throttle policy */ | ||
24 | #define THROTL_IOPS_MAX UINT_MAX | ||
25 | |||
26 | #ifdef CONFIG_BLK_CGROUP | ||
27 | |||
28 | enum blkg_rwstat_type { | ||
29 | BLKG_RWSTAT_READ, | ||
30 | BLKG_RWSTAT_WRITE, | ||
31 | BLKG_RWSTAT_SYNC, | ||
32 | BLKG_RWSTAT_ASYNC, | ||
33 | |||
34 | BLKG_RWSTAT_NR, | ||
35 | BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, | ||
36 | }; | ||
37 | |||
38 | struct blkcg_gq; | ||
39 | |||
40 | struct blkcg { | ||
41 | struct cgroup_subsys_state css; | ||
42 | spinlock_t lock; | ||
43 | |||
44 | struct radix_tree_root blkg_tree; | ||
45 | struct blkcg_gq *blkg_hint; | ||
46 | struct hlist_head blkg_list; | ||
47 | |||
48 | struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; | ||
49 | }; | ||
50 | |||
51 | struct blkg_stat { | ||
52 | struct u64_stats_sync syncp; | ||
53 | uint64_t cnt; | ||
54 | }; | ||
55 | |||
56 | struct blkg_rwstat { | ||
57 | struct u64_stats_sync syncp; | ||
58 | uint64_t cnt[BLKG_RWSTAT_NR]; | ||
59 | }; | ||
60 | |||
61 | /* | ||
62 | * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a | ||
63 | * request_queue (q). This is used by blkcg policies which need to track | ||
64 | * information per blkcg - q pair. | ||
65 | * | ||
66 | * There can be multiple active blkcg policies and each has its private | ||
67 | * data on each blkg, the size of which is determined by | ||
68 | * blkcg_policy->pd_size. blkcg core allocates and frees such areas | ||
69 | * together with blkg and invokes pd_init/exit_fn() methods. | ||
70 | * | ||
71 | * Such private data must embed struct blkg_policy_data (pd) at the | ||
72 | * beginning and pd_size can't be smaller than pd. | ||
73 | */ | ||
74 | struct blkg_policy_data { | ||
75 | /* the blkg and policy id this per-policy data belongs to */ | ||
76 | struct blkcg_gq *blkg; | ||
77 | int plid; | ||
78 | |||
79 | /* used during policy activation */ | ||
80 | struct list_head alloc_node; | ||
81 | }; | ||
82 | |||
83 | /* | ||
84 | * Policies that need to keep per-blkcg data which is independent | ||
85 | * from any request_queue associated to it must specify its size | ||
86 | * with the cpd_size field of the blkcg_policy structure and | ||
87 | * embed a blkcg_policy_data in it. blkcg core allocates | ||
88 | * policy-specific per-blkcg structures lazily the first time | ||
89 | * they are actually needed, so it handles them together with | ||
90 | * blkgs. cpd_init() is invoked to let each policy handle | ||
91 | * per-blkcg data. | ||
92 | */ | ||
93 | struct blkcg_policy_data { | ||
94 | /* the policy id this per-policy data belongs to */ | ||
95 | int plid; | ||
96 | |||
97 | /* used during policy activation */ | ||
98 | struct list_head alloc_node; | ||
99 | }; | ||
100 | |||
101 | /* association between a blk cgroup and a request queue */ | ||
102 | struct blkcg_gq { | ||
103 | /* Pointer to the associated request_queue */ | ||
104 | struct request_queue *q; | ||
105 | struct list_head q_node; | ||
106 | struct hlist_node blkcg_node; | ||
107 | struct blkcg *blkcg; | ||
108 | |||
109 | /* all non-root blkcg_gq's are guaranteed to have access to parent */ | ||
110 | struct blkcg_gq *parent; | ||
111 | |||
112 | /* request allocation list for this blkcg-q pair */ | ||
113 | struct request_list rl; | ||
114 | |||
115 | /* reference count */ | ||
116 | atomic_t refcnt; | ||
117 | |||
118 | /* is this blkg online? protected by both blkcg and q locks */ | ||
119 | bool online; | ||
120 | |||
121 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; | ||
122 | |||
123 | struct rcu_head rcu_head; | ||
124 | }; | ||
125 | |||
126 | typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg); | ||
127 | typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); | ||
128 | typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); | ||
129 | typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); | ||
130 | typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); | ||
131 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); | ||
132 | |||
133 | struct blkcg_policy { | ||
134 | int plid; | ||
135 | /* policy specific private data size */ | ||
136 | size_t pd_size; | ||
137 | /* policy specific per-blkcg data size */ | ||
138 | size_t cpd_size; | ||
139 | /* cgroup files for the policy */ | ||
140 | struct cftype *cftypes; | ||
141 | |||
142 | /* operations */ | ||
143 | blkcg_pol_init_cpd_fn *cpd_init_fn; | ||
144 | blkcg_pol_init_pd_fn *pd_init_fn; | ||
145 | blkcg_pol_online_pd_fn *pd_online_fn; | ||
146 | blkcg_pol_offline_pd_fn *pd_offline_fn; | ||
147 | blkcg_pol_exit_pd_fn *pd_exit_fn; | ||
148 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; | ||
149 | }; | ||
150 | |||
151 | extern struct blkcg blkcg_root; | ||
152 | |||
153 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); | ||
154 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | ||
155 | struct request_queue *q); | ||
156 | int blkcg_init_queue(struct request_queue *q); | ||
157 | void blkcg_drain_queue(struct request_queue *q); | ||
158 | void blkcg_exit_queue(struct request_queue *q); | ||
159 | |||
160 | /* Blkio controller policy registration */ | ||
161 | int blkcg_policy_register(struct blkcg_policy *pol); | ||
162 | void blkcg_policy_unregister(struct blkcg_policy *pol); | ||
163 | int blkcg_activate_policy(struct request_queue *q, | ||
164 | const struct blkcg_policy *pol); | ||
165 | void blkcg_deactivate_policy(struct request_queue *q, | ||
166 | const struct blkcg_policy *pol); | ||
167 | |||
168 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, | ||
169 | u64 (*prfill)(struct seq_file *, | ||
170 | struct blkg_policy_data *, int), | ||
171 | const struct blkcg_policy *pol, int data, | ||
172 | bool show_total); | ||
173 | u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); | ||
174 | u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | ||
175 | const struct blkg_rwstat *rwstat); | ||
176 | u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); | ||
177 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | ||
178 | int off); | ||
179 | |||
180 | u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); | ||
181 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, | ||
182 | int off); | ||
183 | |||
184 | struct blkg_conf_ctx { | ||
185 | struct gendisk *disk; | ||
186 | struct blkcg_gq *blkg; | ||
187 | u64 v; | ||
188 | }; | ||
189 | |||
190 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, | ||
191 | const char *input, struct blkg_conf_ctx *ctx); | ||
192 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); | ||
193 | |||
194 | |||
195 | static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) | ||
196 | { | ||
197 | return css ? container_of(css, struct blkcg, css) : NULL; | ||
198 | } | ||
199 | |||
200 | static inline struct blkcg *task_blkcg(struct task_struct *tsk) | ||
201 | { | ||
202 | return css_to_blkcg(task_css(tsk, blkio_cgrp_id)); | ||
203 | } | ||
204 | |||
205 | static inline struct blkcg *bio_blkcg(struct bio *bio) | ||
206 | { | ||
207 | if (bio && bio->bi_css) | ||
208 | return css_to_blkcg(bio->bi_css); | ||
209 | return task_blkcg(current); | ||
210 | } | ||
211 | |||
212 | /** | ||
213 | * blkcg_parent - get the parent of a blkcg | ||
214 | * @blkcg: blkcg of interest | ||
215 | * | ||
216 | * Return the parent blkcg of @blkcg. Can be called anytime. | ||
217 | */ | ||
218 | static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) | ||
219 | { | ||
220 | return css_to_blkcg(blkcg->css.parent); | ||
221 | } | ||
222 | |||
223 | /** | ||
224 | * blkg_to_pdata - get policy private data | ||
225 | * @blkg: blkg of interest | ||
226 | * @pol: policy of interest | ||
227 | * | ||
228 | * Return pointer to private data associated with the @blkg-@pol pair. | ||
229 | */ | ||
230 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | ||
231 | struct blkcg_policy *pol) | ||
232 | { | ||
233 | return blkg ? blkg->pd[pol->plid] : NULL; | ||
234 | } | ||
235 | |||
236 | static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, | ||
237 | struct blkcg_policy *pol) | ||
238 | { | ||
239 | return blkcg ? blkcg->pd[pol->plid] : NULL; | ||
240 | } | ||
241 | |||
242 | /** | ||
243 | * pdata_to_blkg - get blkg associated with policy private data | ||
244 | * @pd: policy private data of interest | ||
245 | * | ||
246 | * @pd is policy private data. Determine the blkg it's associated with. | ||
247 | */ | ||
248 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) | ||
249 | { | ||
250 | return pd ? pd->blkg : NULL; | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * blkg_path - format cgroup path of blkg | ||
255 | * @blkg: blkg of interest | ||
256 | * @buf: target buffer | ||
257 | * @buflen: target buffer length | ||
258 | * | ||
259 | * Format the path of the cgroup of @blkg into @buf. | ||
260 | */ | ||
261 | static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) | ||
262 | { | ||
263 | char *p; | ||
264 | |||
265 | p = cgroup_path(blkg->blkcg->css.cgroup, buf, buflen); | ||
266 | if (!p) { | ||
267 | strncpy(buf, "<unavailable>", buflen); | ||
268 | return -ENAMETOOLONG; | ||
269 | } | ||
270 | |||
271 | memmove(buf, p, buf + buflen - p); | ||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | /** | ||
276 | * blkg_get - get a blkg reference | ||
277 | * @blkg: blkg to get | ||
278 | * | ||
279 | * The caller should be holding an existing reference. | ||
280 | */ | ||
281 | static inline void blkg_get(struct blkcg_gq *blkg) | ||
282 | { | ||
283 | WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); | ||
284 | atomic_inc(&blkg->refcnt); | ||
285 | } | ||
286 | |||
287 | void __blkg_release_rcu(struct rcu_head *rcu); | ||
288 | |||
289 | /** | ||
290 | * blkg_put - put a blkg reference | ||
291 | * @blkg: blkg to put | ||
292 | */ | ||
293 | static inline void blkg_put(struct blkcg_gq *blkg) | ||
294 | { | ||
295 | WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); | ||
296 | if (atomic_dec_and_test(&blkg->refcnt)) | ||
297 | call_rcu(&blkg->rcu_head, __blkg_release_rcu); | ||
298 | } | ||
299 | |||
300 | struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, | ||
301 | bool update_hint); | ||
302 | |||
303 | /** | ||
304 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants | ||
305 | * @d_blkg: loop cursor pointing to the current descendant | ||
306 | * @pos_css: used for iteration | ||
307 | * @p_blkg: target blkg to walk descendants of | ||
308 | * | ||
309 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU | ||
310 | * read locked. If called under either blkcg or queue lock, the iteration | ||
311 | * is guaranteed to include all and only online blkgs. The caller may | ||
312 | * update @pos_css by calling css_rightmost_descendant() to skip subtree. | ||
313 | * @p_blkg is included in the iteration and the first node to be visited. | ||
314 | */ | ||
315 | #define blkg_for_each_descendant_pre(d_blkg, pos_css, p_blkg) \ | ||
316 | css_for_each_descendant_pre((pos_css), &(p_blkg)->blkcg->css) \ | ||
317 | if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ | ||
318 | (p_blkg)->q, false))) | ||
319 | |||
320 | /** | ||
321 | * blkg_for_each_descendant_post - post-order walk of a blkg's descendants | ||
322 | * @d_blkg: loop cursor pointing to the current descendant | ||
323 | * @pos_css: used for iteration | ||
324 | * @p_blkg: target blkg to walk descendants of | ||
325 | * | ||
326 | * Similar to blkg_for_each_descendant_pre() but performs post-order | ||
327 | * traversal instead. Synchronization rules are the same. @p_blkg is | ||
328 | * included in the iteration and the last node to be visited. | ||
329 | */ | ||
330 | #define blkg_for_each_descendant_post(d_blkg, pos_css, p_blkg) \ | ||
331 | css_for_each_descendant_post((pos_css), &(p_blkg)->blkcg->css) \ | ||
332 | if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ | ||
333 | (p_blkg)->q, false))) | ||
334 | |||
335 | /** | ||
336 | * blk_get_rl - get request_list to use | ||
337 | * @q: request_queue of interest | ||
338 | * @bio: bio which will be attached to the allocated request (may be %NULL) | ||
339 | * | ||
340 | * The caller wants to allocate a request from @q to use for @bio. Find | ||
341 | * the request_list to use and obtain a reference on it. Should be called | ||
342 | * under queue_lock. This function is guaranteed to return non-%NULL | ||
343 | * request_list. | ||
344 | */ | ||
345 | static inline struct request_list *blk_get_rl(struct request_queue *q, | ||
346 | struct bio *bio) | ||
347 | { | ||
348 | struct blkcg *blkcg; | ||
349 | struct blkcg_gq *blkg; | ||
350 | |||
351 | rcu_read_lock(); | ||
352 | |||
353 | blkcg = bio_blkcg(bio); | ||
354 | |||
355 | /* bypass blkg lookup and use @q->root_rl directly for root */ | ||
356 | if (blkcg == &blkcg_root) | ||
357 | goto root_rl; | ||
358 | |||
359 | /* | ||
360 | * Try to use blkg->rl. blkg lookup may fail under memory pressure | ||
361 | * or if either the blkcg or queue is going away. Fall back to | ||
362 | * root_rl in such cases. | ||
363 | */ | ||
364 | blkg = blkg_lookup_create(blkcg, q); | ||
365 | if (unlikely(IS_ERR(blkg))) | ||
366 | goto root_rl; | ||
367 | |||
368 | blkg_get(blkg); | ||
369 | rcu_read_unlock(); | ||
370 | return &blkg->rl; | ||
371 | root_rl: | ||
372 | rcu_read_unlock(); | ||
373 | return &q->root_rl; | ||
374 | } | ||
375 | |||
376 | /** | ||
377 | * blk_put_rl - put request_list | ||
378 | * @rl: request_list to put | ||
379 | * | ||
380 | * Put the reference acquired by blk_get_rl(). Should be called under | ||
381 | * queue_lock. | ||
382 | */ | ||
383 | static inline void blk_put_rl(struct request_list *rl) | ||
384 | { | ||
385 | /* root_rl may not have blkg set */ | ||
386 | if (rl->blkg && rl->blkg->blkcg != &blkcg_root) | ||
387 | blkg_put(rl->blkg); | ||
388 | } | ||
389 | |||
390 | /** | ||
391 | * blk_rq_set_rl - associate a request with a request_list | ||
392 | * @rq: request of interest | ||
393 | * @rl: target request_list | ||
394 | * | ||
395 | * Associate @rq with @rl so that accounting and freeing can know the | ||
396 | * request_list @rq came from. | ||
397 | */ | ||
398 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) | ||
399 | { | ||
400 | rq->rl = rl; | ||
401 | } | ||
402 | |||
403 | /** | ||
404 | * blk_rq_rl - return the request_list a request came from | ||
405 | * @rq: request of interest | ||
406 | * | ||
407 | * Return the request_list @rq is allocated from. | ||
408 | */ | ||
409 | static inline struct request_list *blk_rq_rl(struct request *rq) | ||
410 | { | ||
411 | return rq->rl; | ||
412 | } | ||
413 | |||
414 | struct request_list *__blk_queue_next_rl(struct request_list *rl, | ||
415 | struct request_queue *q); | ||
416 | /** | ||
417 | * blk_queue_for_each_rl - iterate through all request_lists of a request_queue | ||
418 | * | ||
419 | * Should be used under queue_lock. | ||
420 | */ | ||
421 | #define blk_queue_for_each_rl(rl, q) \ | ||
422 | for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) | ||
423 | |||
424 | static inline void blkg_stat_init(struct blkg_stat *stat) | ||
425 | { | ||
426 | u64_stats_init(&stat->syncp); | ||
427 | } | ||
428 | |||
429 | /** | ||
430 | * blkg_stat_add - add a value to a blkg_stat | ||
431 | * @stat: target blkg_stat | ||
432 | * @val: value to add | ||
433 | * | ||
434 | * Add @val to @stat. The caller is responsible for synchronizing calls to | ||
435 | * this function. | ||
436 | */ | ||
437 | static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) | ||
438 | { | ||
439 | u64_stats_update_begin(&stat->syncp); | ||
440 | stat->cnt += val; | ||
441 | u64_stats_update_end(&stat->syncp); | ||
442 | } | ||
443 | |||
444 | /** | ||
445 | * blkg_stat_read - read the current value of a blkg_stat | ||
446 | * @stat: blkg_stat to read | ||
447 | * | ||
448 | * Read the current value of @stat. This function can be called without | ||
449 | * synchroniztion and takes care of u64 atomicity. | ||
450 | */ | ||
451 | static inline uint64_t blkg_stat_read(struct blkg_stat *stat) | ||
452 | { | ||
453 | unsigned int start; | ||
454 | uint64_t v; | ||
455 | |||
456 | do { | ||
457 | start = u64_stats_fetch_begin_irq(&stat->syncp); | ||
458 | v = stat->cnt; | ||
459 | } while (u64_stats_fetch_retry_irq(&stat->syncp, start)); | ||
460 | |||
461 | return v; | ||
462 | } | ||
463 | |||
464 | /** | ||
465 | * blkg_stat_reset - reset a blkg_stat | ||
466 | * @stat: blkg_stat to reset | ||
467 | */ | ||
468 | static inline void blkg_stat_reset(struct blkg_stat *stat) | ||
469 | { | ||
470 | stat->cnt = 0; | ||
471 | } | ||
472 | |||
473 | /** | ||
474 | * blkg_stat_merge - merge a blkg_stat into another | ||
475 | * @to: the destination blkg_stat | ||
476 | * @from: the source | ||
477 | * | ||
478 | * Add @from's count to @to. | ||
479 | */ | ||
480 | static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) | ||
481 | { | ||
482 | blkg_stat_add(to, blkg_stat_read(from)); | ||
483 | } | ||
484 | |||
485 | static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) | ||
486 | { | ||
487 | u64_stats_init(&rwstat->syncp); | ||
488 | } | ||
489 | |||
490 | /** | ||
491 | * blkg_rwstat_add - add a value to a blkg_rwstat | ||
492 | * @rwstat: target blkg_rwstat | ||
493 | * @rw: mask of REQ_{WRITE|SYNC} | ||
494 | * @val: value to add | ||
495 | * | ||
496 | * Add @val to @rwstat. The counters are chosen according to @rw. The | ||
497 | * caller is responsible for synchronizing calls to this function. | ||
498 | */ | ||
499 | static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, | ||
500 | int rw, uint64_t val) | ||
501 | { | ||
502 | u64_stats_update_begin(&rwstat->syncp); | ||
503 | |||
504 | if (rw & REQ_WRITE) | ||
505 | rwstat->cnt[BLKG_RWSTAT_WRITE] += val; | ||
506 | else | ||
507 | rwstat->cnt[BLKG_RWSTAT_READ] += val; | ||
508 | if (rw & REQ_SYNC) | ||
509 | rwstat->cnt[BLKG_RWSTAT_SYNC] += val; | ||
510 | else | ||
511 | rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; | ||
512 | |||
513 | u64_stats_update_end(&rwstat->syncp); | ||
514 | } | ||
515 | |||
516 | /** | ||
517 | * blkg_rwstat_read - read the current values of a blkg_rwstat | ||
518 | * @rwstat: blkg_rwstat to read | ||
519 | * | ||
520 | * Read the current snapshot of @rwstat and return it as the return value. | ||
521 | * This function can be called without synchronization and takes care of | ||
522 | * u64 atomicity. | ||
523 | */ | ||
524 | static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) | ||
525 | { | ||
526 | unsigned int start; | ||
527 | struct blkg_rwstat tmp; | ||
528 | |||
529 | do { | ||
530 | start = u64_stats_fetch_begin_irq(&rwstat->syncp); | ||
531 | tmp = *rwstat; | ||
532 | } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start)); | ||
533 | |||
534 | return tmp; | ||
535 | } | ||
536 | |||
537 | /** | ||
538 | * blkg_rwstat_total - read the total count of a blkg_rwstat | ||
539 | * @rwstat: blkg_rwstat to read | ||
540 | * | ||
541 | * Return the total count of @rwstat regardless of the IO direction. This | ||
542 | * function can be called without synchronization and takes care of u64 | ||
543 | * atomicity. | ||
544 | */ | ||
545 | static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) | ||
546 | { | ||
547 | struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); | ||
548 | |||
549 | return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; | ||
550 | } | ||
551 | |||
552 | /** | ||
553 | * blkg_rwstat_reset - reset a blkg_rwstat | ||
554 | * @rwstat: blkg_rwstat to reset | ||
555 | */ | ||
556 | static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) | ||
557 | { | ||
558 | memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); | ||
559 | } | ||
560 | |||
561 | /** | ||
562 | * blkg_rwstat_merge - merge a blkg_rwstat into another | ||
563 | * @to: the destination blkg_rwstat | ||
564 | * @from: the source | ||
565 | * | ||
566 | * Add @from's counts to @to. | ||
567 | */ | ||
568 | static inline void blkg_rwstat_merge(struct blkg_rwstat *to, | ||
569 | struct blkg_rwstat *from) | ||
570 | { | ||
571 | struct blkg_rwstat v = blkg_rwstat_read(from); | ||
572 | int i; | ||
573 | |||
574 | u64_stats_update_begin(&to->syncp); | ||
575 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | ||
576 | to->cnt[i] += v.cnt[i]; | ||
577 | u64_stats_update_end(&to->syncp); | ||
578 | } | ||
579 | |||
580 | #else /* CONFIG_BLK_CGROUP */ | ||
581 | |||
582 | struct cgroup; | ||
583 | struct blkcg; | ||
584 | |||
585 | struct blkg_policy_data { | ||
586 | }; | ||
587 | |||
588 | struct blkcg_policy_data { | ||
589 | }; | ||
590 | |||
591 | struct blkcg_gq { | ||
592 | }; | ||
593 | |||
594 | struct blkcg_policy { | ||
595 | }; | ||
596 | |||
597 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } | ||
598 | static inline int blkcg_init_queue(struct request_queue *q) { return 0; } | ||
599 | static inline void blkcg_drain_queue(struct request_queue *q) { } | ||
600 | static inline void blkcg_exit_queue(struct request_queue *q) { } | ||
601 | static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } | ||
602 | static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } | ||
603 | static inline int blkcg_activate_policy(struct request_queue *q, | ||
604 | const struct blkcg_policy *pol) { return 0; } | ||
605 | static inline void blkcg_deactivate_policy(struct request_queue *q, | ||
606 | const struct blkcg_policy *pol) { } | ||
607 | |||
608 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } | ||
609 | |||
610 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | ||
611 | struct blkcg_policy *pol) { return NULL; } | ||
612 | static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; } | ||
613 | static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } | ||
614 | static inline void blkg_get(struct blkcg_gq *blkg) { } | ||
615 | static inline void blkg_put(struct blkcg_gq *blkg) { } | ||
616 | |||
617 | static inline struct request_list *blk_get_rl(struct request_queue *q, | ||
618 | struct bio *bio) { return &q->root_rl; } | ||
619 | static inline void blk_put_rl(struct request_list *rl) { } | ||
620 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } | ||
621 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } | ||
622 | |||
623 | #define blk_queue_for_each_rl(rl, q) \ | ||
624 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) | ||
625 | |||
626 | #endif /* CONFIG_BLK_CGROUP */ | ||
627 | #endif /* _BLK_CGROUP_H */ | ||
diff --git a/block/blk-core.c b/block/blk-core.c index f6ab750060fe..688ae9482cb8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -32,12 +32,12 @@ | |||
32 | #include <linux/delay.h> | 32 | #include <linux/delay.h> |
33 | #include <linux/ratelimit.h> | 33 | #include <linux/ratelimit.h> |
34 | #include <linux/pm_runtime.h> | 34 | #include <linux/pm_runtime.h> |
35 | #include <linux/blk-cgroup.h> | ||
35 | 36 | ||
36 | #define CREATE_TRACE_POINTS | 37 | #define CREATE_TRACE_POINTS |
37 | #include <trace/events/block.h> | 38 | #include <trace/events/block.h> |
38 | 39 | ||
39 | #include "blk.h" | 40 | #include "blk.h" |
40 | #include "blk-cgroup.h" | ||
41 | #include "blk-mq.h" | 41 | #include "blk-mq.h" |
42 | 42 | ||
43 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); | 43 | EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); |
@@ -63,6 +63,31 @@ struct kmem_cache *blk_requestq_cachep; | |||
63 | */ | 63 | */ |
64 | static struct workqueue_struct *kblockd_workqueue; | 64 | static struct workqueue_struct *kblockd_workqueue; |
65 | 65 | ||
66 | static void blk_clear_congested(struct request_list *rl, int sync) | ||
67 | { | ||
68 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
69 | clear_wb_congested(rl->blkg->wb_congested, sync); | ||
70 | #else | ||
71 | /* | ||
72 | * If !CGROUP_WRITEBACK, all blkg's map to bdi->wb and we shouldn't | ||
73 | * flip its congestion state for events on other blkcgs. | ||
74 | */ | ||
75 | if (rl == &rl->q->root_rl) | ||
76 | clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync); | ||
77 | #endif | ||
78 | } | ||
79 | |||
80 | static void blk_set_congested(struct request_list *rl, int sync) | ||
81 | { | ||
82 | #ifdef CONFIG_CGROUP_WRITEBACK | ||
83 | set_wb_congested(rl->blkg->wb_congested, sync); | ||
84 | #else | ||
85 | /* see blk_clear_congested() */ | ||
86 | if (rl == &rl->q->root_rl) | ||
87 | set_wb_congested(rl->q->backing_dev_info.wb.congested, sync); | ||
88 | #endif | ||
89 | } | ||
90 | |||
66 | void blk_queue_congestion_threshold(struct request_queue *q) | 91 | void blk_queue_congestion_threshold(struct request_queue *q) |
67 | { | 92 | { |
68 | int nr; | 93 | int nr; |
@@ -623,8 +648,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
623 | 648 | ||
624 | q->backing_dev_info.ra_pages = | 649 | q->backing_dev_info.ra_pages = |
625 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 650 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
626 | q->backing_dev_info.state = 0; | 651 | q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK; |
627 | q->backing_dev_info.capabilities = 0; | ||
628 | q->backing_dev_info.name = "block"; | 652 | q->backing_dev_info.name = "block"; |
629 | q->node = node_id; | 653 | q->node = node_id; |
630 | 654 | ||
@@ -847,13 +871,8 @@ static void __freed_request(struct request_list *rl, int sync) | |||
847 | { | 871 | { |
848 | struct request_queue *q = rl->q; | 872 | struct request_queue *q = rl->q; |
849 | 873 | ||
850 | /* | 874 | if (rl->count[sync] < queue_congestion_off_threshold(q)) |
851 | * bdi isn't aware of blkcg yet. As all async IOs end up root | 875 | blk_clear_congested(rl, sync); |
852 | * blkcg anyway, just use root blkcg state. | ||
853 | */ | ||
854 | if (rl == &q->root_rl && | ||
855 | rl->count[sync] < queue_congestion_off_threshold(q)) | ||
856 | blk_clear_queue_congested(q, sync); | ||
857 | 876 | ||
858 | if (rl->count[sync] + 1 <= q->nr_requests) { | 877 | if (rl->count[sync] + 1 <= q->nr_requests) { |
859 | if (waitqueue_active(&rl->wait[sync])) | 878 | if (waitqueue_active(&rl->wait[sync])) |
@@ -886,25 +905,25 @@ static void freed_request(struct request_list *rl, unsigned int flags) | |||
886 | int blk_update_nr_requests(struct request_queue *q, unsigned int nr) | 905 | int blk_update_nr_requests(struct request_queue *q, unsigned int nr) |
887 | { | 906 | { |
888 | struct request_list *rl; | 907 | struct request_list *rl; |
908 | int on_thresh, off_thresh; | ||
889 | 909 | ||
890 | spin_lock_irq(q->queue_lock); | 910 | spin_lock_irq(q->queue_lock); |
891 | q->nr_requests = nr; | 911 | q->nr_requests = nr; |
892 | blk_queue_congestion_threshold(q); | 912 | blk_queue_congestion_threshold(q); |
913 | on_thresh = queue_congestion_on_threshold(q); | ||
914 | off_thresh = queue_congestion_off_threshold(q); | ||
893 | 915 | ||
894 | /* congestion isn't cgroup aware and follows root blkcg for now */ | 916 | blk_queue_for_each_rl(rl, q) { |
895 | rl = &q->root_rl; | 917 | if (rl->count[BLK_RW_SYNC] >= on_thresh) |
896 | 918 | blk_set_congested(rl, BLK_RW_SYNC); | |
897 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) | 919 | else if (rl->count[BLK_RW_SYNC] < off_thresh) |
898 | blk_set_queue_congested(q, BLK_RW_SYNC); | 920 | blk_clear_congested(rl, BLK_RW_SYNC); |
899 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) | ||
900 | blk_clear_queue_congested(q, BLK_RW_SYNC); | ||
901 | 921 | ||
902 | if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) | 922 | if (rl->count[BLK_RW_ASYNC] >= on_thresh) |
903 | blk_set_queue_congested(q, BLK_RW_ASYNC); | 923 | blk_set_congested(rl, BLK_RW_ASYNC); |
904 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) | 924 | else if (rl->count[BLK_RW_ASYNC] < off_thresh) |
905 | blk_clear_queue_congested(q, BLK_RW_ASYNC); | 925 | blk_clear_congested(rl, BLK_RW_ASYNC); |
906 | 926 | ||
907 | blk_queue_for_each_rl(rl, q) { | ||
908 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | 927 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { |
909 | blk_set_rl_full(rl, BLK_RW_SYNC); | 928 | blk_set_rl_full(rl, BLK_RW_SYNC); |
910 | } else { | 929 | } else { |
@@ -1014,12 +1033,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, | |||
1014 | } | 1033 | } |
1015 | } | 1034 | } |
1016 | } | 1035 | } |
1017 | /* | 1036 | blk_set_congested(rl, is_sync); |
1018 | * bdi isn't aware of blkcg yet. As all async IOs end up | ||
1019 | * root blkcg anyway, just use root blkcg state. | ||
1020 | */ | ||
1021 | if (rl == &q->root_rl) | ||
1022 | blk_set_queue_congested(q, is_sync); | ||
1023 | } | 1037 | } |
1024 | 1038 | ||
1025 | /* | 1039 | /* |
diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 79ffb4855af0..f548b64be092 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c | |||
@@ -21,6 +21,7 @@ | |||
21 | */ | 21 | */ |
22 | 22 | ||
23 | #include <linux/blkdev.h> | 23 | #include <linux/blkdev.h> |
24 | #include <linux/backing-dev.h> | ||
24 | #include <linux/mempool.h> | 25 | #include <linux/mempool.h> |
25 | #include <linux/bio.h> | 26 | #include <linux/bio.h> |
26 | #include <linux/scatterlist.h> | 27 | #include <linux/scatterlist.h> |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 2b8fd302f677..6264b382d4d1 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -6,11 +6,12 @@ | |||
6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
7 | #include <linux/bio.h> | 7 | #include <linux/bio.h> |
8 | #include <linux/blkdev.h> | 8 | #include <linux/blkdev.h> |
9 | #include <linux/backing-dev.h> | ||
9 | #include <linux/blktrace_api.h> | 10 | #include <linux/blktrace_api.h> |
10 | #include <linux/blk-mq.h> | 11 | #include <linux/blk-mq.h> |
12 | #include <linux/blk-cgroup.h> | ||
11 | 13 | ||
12 | #include "blk.h" | 14 | #include "blk.h" |
13 | #include "blk-cgroup.h" | ||
14 | #include "blk-mq.h" | 15 | #include "blk-mq.h" |
15 | 16 | ||
16 | struct queue_sysfs_entry { | 17 | struct queue_sysfs_entry { |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 5b9c6d5c3636..b23193518ac7 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/blkdev.h> | 9 | #include <linux/blkdev.h> |
10 | #include <linux/bio.h> | 10 | #include <linux/bio.h> |
11 | #include <linux/blktrace_api.h> | 11 | #include <linux/blktrace_api.h> |
12 | #include "blk-cgroup.h" | 12 | #include <linux/blk-cgroup.h> |
13 | #include "blk.h" | 13 | #include "blk.h" |
14 | 14 | ||
15 | /* Max dispatch from a group in 1 round */ | 15 | /* Max dispatch from a group in 1 round */ |
diff --git a/block/bounce.c b/block/bounce.c index 3ab0bce1c947..b17311227c12 100644 --- a/block/bounce.c +++ b/block/bounce.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/pagemap.h> | 13 | #include <linux/pagemap.h> |
14 | #include <linux/mempool.h> | 14 | #include <linux/mempool.h> |
15 | #include <linux/blkdev.h> | 15 | #include <linux/blkdev.h> |
16 | #include <linux/backing-dev.h> | ||
16 | #include <linux/init.h> | 17 | #include <linux/init.h> |
17 | #include <linux/hash.h> | 18 | #include <linux/hash.h> |
18 | #include <linux/highmem.h> | 19 | #include <linux/highmem.h> |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d8ad45ccd8fa..c62bb2e650b8 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -14,8 +14,8 @@ | |||
14 | #include <linux/rbtree.h> | 14 | #include <linux/rbtree.h> |
15 | #include <linux/ioprio.h> | 15 | #include <linux/ioprio.h> |
16 | #include <linux/blktrace_api.h> | 16 | #include <linux/blktrace_api.h> |
17 | #include <linux/blk-cgroup.h> | ||
17 | #include "blk.h" | 18 | #include "blk.h" |
18 | #include "blk-cgroup.h" | ||
19 | 19 | ||
20 | /* | 20 | /* |
21 | * tunables | 21 | * tunables |
diff --git a/block/elevator.c b/block/elevator.c index 942579d04128..84d63943f2de 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -35,11 +35,11 @@ | |||
35 | #include <linux/hash.h> | 35 | #include <linux/hash.h> |
36 | #include <linux/uaccess.h> | 36 | #include <linux/uaccess.h> |
37 | #include <linux/pm_runtime.h> | 37 | #include <linux/pm_runtime.h> |
38 | #include <linux/blk-cgroup.h> | ||
38 | 39 | ||
39 | #include <trace/events/block.h> | 40 | #include <trace/events/block.h> |
40 | 41 | ||
41 | #include "blk.h" | 42 | #include "blk.h" |
42 | #include "blk-cgroup.h" | ||
43 | 43 | ||
44 | static DEFINE_SPINLOCK(elv_list_lock); | 44 | static DEFINE_SPINLOCK(elv_list_lock); |
45 | static LIST_HEAD(elv_list); | 45 | static LIST_HEAD(elv_list); |
diff --git a/block/genhd.c b/block/genhd.c index ea982eadaf63..59a1395eedac 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/kdev_t.h> | 8 | #include <linux/kdev_t.h> |
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/blkdev.h> | 10 | #include <linux/blkdev.h> |
11 | #include <linux/backing-dev.h> | ||
11 | #include <linux/init.h> | 12 | #include <linux/init.h> |
12 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
13 | #include <linux/proc_fs.h> | 14 | #include <linux/proc_fs.h> |