diff options
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/backing-dev.h | 26 | ||||
| -rw-r--r-- | include/linux/blk-cgroup.h | 340 | ||||
| -rw-r--r-- | include/linux/cgroup_subsys.h | 2 | ||||
| -rw-r--r-- | include/linux/kernfs.h | 4 |
4 files changed, 256 insertions, 116 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0fe9df983ab7..5a5d79ee256f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h | |||
| @@ -286,7 +286,7 @@ static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi | |||
| 286 | * %current's blkcg equals the effective blkcg of its memcg. No | 286 | * %current's blkcg equals the effective blkcg of its memcg. No |
| 287 | * need to use the relatively expensive cgroup_get_e_css(). | 287 | * need to use the relatively expensive cgroup_get_e_css(). |
| 288 | */ | 288 | */ |
| 289 | if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id))) | 289 | if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id))) |
| 290 | return wb; | 290 | return wb; |
| 291 | return NULL; | 291 | return NULL; |
| 292 | } | 292 | } |
| @@ -402,7 +402,7 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) | |||
| 402 | } | 402 | } |
| 403 | 403 | ||
| 404 | struct wb_iter { | 404 | struct wb_iter { |
| 405 | int start_blkcg_id; | 405 | int start_memcg_id; |
| 406 | struct radix_tree_iter tree_iter; | 406 | struct radix_tree_iter tree_iter; |
| 407 | void **slot; | 407 | void **slot; |
| 408 | }; | 408 | }; |
| @@ -414,9 +414,9 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter, | |||
| 414 | 414 | ||
| 415 | WARN_ON_ONCE(!rcu_read_lock_held()); | 415 | WARN_ON_ONCE(!rcu_read_lock_held()); |
| 416 | 416 | ||
| 417 | if (iter->start_blkcg_id >= 0) { | 417 | if (iter->start_memcg_id >= 0) { |
| 418 | iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id); | 418 | iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id); |
| 419 | iter->start_blkcg_id = -1; | 419 | iter->start_memcg_id = -1; |
| 420 | } else { | 420 | } else { |
| 421 | iter->slot = radix_tree_next_slot(iter->slot, titer, 0); | 421 | iter->slot = radix_tree_next_slot(iter->slot, titer, 0); |
| 422 | } | 422 | } |
| @@ -430,30 +430,30 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter, | |||
| 430 | 430 | ||
| 431 | static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, | 431 | static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, |
| 432 | struct backing_dev_info *bdi, | 432 | struct backing_dev_info *bdi, |
| 433 | int start_blkcg_id) | 433 | int start_memcg_id) |
| 434 | { | 434 | { |
| 435 | iter->start_blkcg_id = start_blkcg_id; | 435 | iter->start_memcg_id = start_memcg_id; |
| 436 | 436 | ||
| 437 | if (start_blkcg_id) | 437 | if (start_memcg_id) |
| 438 | return __wb_iter_next(iter, bdi); | 438 | return __wb_iter_next(iter, bdi); |
| 439 | else | 439 | else |
| 440 | return &bdi->wb; | 440 | return &bdi->wb; |
| 441 | } | 441 | } |
| 442 | 442 | ||
| 443 | /** | 443 | /** |
| 444 | * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order | 444 | * bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order |
| 445 | * @wb_cur: cursor struct bdi_writeback pointer | 445 | * @wb_cur: cursor struct bdi_writeback pointer |
| 446 | * @bdi: bdi to walk wb's of | 446 | * @bdi: bdi to walk wb's of |
| 447 | * @iter: pointer to struct wb_iter to be used as iteration buffer | 447 | * @iter: pointer to struct wb_iter to be used as iteration buffer |
| 448 | * @start_blkcg_id: blkcg ID to start iteration from | 448 | * @start_memcg_id: memcg ID to start iteration from |
| 449 | * | 449 | * |
| 450 | * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending | 450 | * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending |
| 451 | * blkcg ID order starting from @start_blkcg_id. @iter is struct wb_iter | 451 | * memcg ID order starting from @start_memcg_id. @iter is struct wb_iter |
| 452 | * to be used as temp storage during iteration. rcu_read_lock() must be | 452 | * to be used as temp storage during iteration. rcu_read_lock() must be |
| 453 | * held throughout iteration. | 453 | * held throughout iteration. |
| 454 | */ | 454 | */ |
| 455 | #define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ | 455 | #define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id) \ |
| 456 | for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id); \ | 456 | for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id); \ |
| 457 | (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) | 457 | (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) |
| 458 | 458 | ||
| 459 | #else /* CONFIG_CGROUP_WRITEBACK */ | 459 | #else /* CONFIG_CGROUP_WRITEBACK */ |
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index a4cd1641e9e2..0a5cc7a1109b 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h | |||
| @@ -14,12 +14,15 @@ | |||
| 14 | */ | 14 | */ |
| 15 | 15 | ||
| 16 | #include <linux/cgroup.h> | 16 | #include <linux/cgroup.h> |
| 17 | #include <linux/u64_stats_sync.h> | 17 | #include <linux/percpu_counter.h> |
| 18 | #include <linux/seq_file.h> | 18 | #include <linux/seq_file.h> |
| 19 | #include <linux/radix-tree.h> | 19 | #include <linux/radix-tree.h> |
| 20 | #include <linux/blkdev.h> | 20 | #include <linux/blkdev.h> |
| 21 | #include <linux/atomic.h> | 21 | #include <linux/atomic.h> |
| 22 | 22 | ||
| 23 | /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ | ||
| 24 | #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) | ||
| 25 | |||
| 23 | /* Max limits for throttle policy */ | 26 | /* Max limits for throttle policy */ |
| 24 | #define THROTL_IOPS_MAX UINT_MAX | 27 | #define THROTL_IOPS_MAX UINT_MAX |
| 25 | 28 | ||
| @@ -45,7 +48,7 @@ struct blkcg { | |||
| 45 | struct blkcg_gq *blkg_hint; | 48 | struct blkcg_gq *blkg_hint; |
| 46 | struct hlist_head blkg_list; | 49 | struct hlist_head blkg_list; |
| 47 | 50 | ||
| 48 | struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; | 51 | struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; |
| 49 | 52 | ||
| 50 | struct list_head all_blkcgs_node; | 53 | struct list_head all_blkcgs_node; |
| 51 | #ifdef CONFIG_CGROUP_WRITEBACK | 54 | #ifdef CONFIG_CGROUP_WRITEBACK |
| @@ -53,14 +56,19 @@ struct blkcg { | |||
| 53 | #endif | 56 | #endif |
| 54 | }; | 57 | }; |
| 55 | 58 | ||
| 59 | /* | ||
| 60 | * blkg_[rw]stat->aux_cnt is excluded for local stats but included for | ||
| 61 | * recursive. Used to carry stats of dead children, and, for blkg_rwstat, | ||
| 62 | * to carry result values from read and sum operations. | ||
| 63 | */ | ||
| 56 | struct blkg_stat { | 64 | struct blkg_stat { |
| 57 | struct u64_stats_sync syncp; | 65 | struct percpu_counter cpu_cnt; |
| 58 | uint64_t cnt; | 66 | atomic64_t aux_cnt; |
| 59 | }; | 67 | }; |
| 60 | 68 | ||
| 61 | struct blkg_rwstat { | 69 | struct blkg_rwstat { |
| 62 | struct u64_stats_sync syncp; | 70 | struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; |
| 63 | uint64_t cnt[BLKG_RWSTAT_NR]; | 71 | atomic64_t aux_cnt[BLKG_RWSTAT_NR]; |
| 64 | }; | 72 | }; |
| 65 | 73 | ||
| 66 | /* | 74 | /* |
| @@ -68,32 +76,28 @@ struct blkg_rwstat { | |||
| 68 | * request_queue (q). This is used by blkcg policies which need to track | 76 | * request_queue (q). This is used by blkcg policies which need to track |
| 69 | * information per blkcg - q pair. | 77 | * information per blkcg - q pair. |
| 70 | * | 78 | * |
| 71 | * There can be multiple active blkcg policies and each has its private | 79 | * There can be multiple active blkcg policies and each blkg:policy pair is |
| 72 | * data on each blkg, the size of which is determined by | 80 | * represented by a blkg_policy_data which is allocated and freed by each |
| 73 | * blkcg_policy->pd_size. blkcg core allocates and frees such areas | 81 | * policy's pd_alloc/free_fn() methods. A policy can allocate private data |
| 74 | * together with blkg and invokes pd_init/exit_fn() methods. | 82 | * area by allocating larger data structure which embeds blkg_policy_data |
| 75 | * | 83 | * at the beginning. |
| 76 | * Such private data must embed struct blkg_policy_data (pd) at the | ||
| 77 | * beginning and pd_size can't be smaller than pd. | ||
| 78 | */ | 84 | */ |
| 79 | struct blkg_policy_data { | 85 | struct blkg_policy_data { |
| 80 | /* the blkg and policy id this per-policy data belongs to */ | 86 | /* the blkg and policy id this per-policy data belongs to */ |
| 81 | struct blkcg_gq *blkg; | 87 | struct blkcg_gq *blkg; |
| 82 | int plid; | 88 | int plid; |
| 83 | |||
| 84 | /* used during policy activation */ | ||
| 85 | struct list_head alloc_node; | ||
| 86 | }; | 89 | }; |
| 87 | 90 | ||
| 88 | /* | 91 | /* |
| 89 | * Policies that need to keep per-blkcg data which is independent | 92 | * Policies that need to keep per-blkcg data which is independent from any |
| 90 | * from any request_queue associated to it must specify its size | 93 | * request_queue associated to it should implement cpd_alloc/free_fn() |
| 91 | * with the cpd_size field of the blkcg_policy structure and | 94 | * methods. A policy can allocate private data area by allocating larger |
| 92 | * embed a blkcg_policy_data in it. cpd_init() is invoked to let | 95 | * data structure which embeds blkcg_policy_data at the beginning. |
| 93 | * each policy handle per-blkcg data. | 96 | * cpd_init() is invoked to let each policy handle per-blkcg data. |
| 94 | */ | 97 | */ |
| 95 | struct blkcg_policy_data { | 98 | struct blkcg_policy_data { |
| 96 | /* the policy id this per-policy data belongs to */ | 99 | /* the blkcg and policy id this per-policy data belongs to */ |
| 100 | struct blkcg *blkcg; | ||
| 97 | int plid; | 101 | int plid; |
| 98 | }; | 102 | }; |
| 99 | 103 | ||
| @@ -123,40 +127,50 @@ struct blkcg_gq { | |||
| 123 | /* is this blkg online? protected by both blkcg and q locks */ | 127 | /* is this blkg online? protected by both blkcg and q locks */ |
| 124 | bool online; | 128 | bool online; |
| 125 | 129 | ||
| 130 | struct blkg_rwstat stat_bytes; | ||
| 131 | struct blkg_rwstat stat_ios; | ||
| 132 | |||
| 126 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; | 133 | struct blkg_policy_data *pd[BLKCG_MAX_POLS]; |
| 127 | 134 | ||
| 128 | struct rcu_head rcu_head; | 135 | struct rcu_head rcu_head; |
| 129 | }; | 136 | }; |
| 130 | 137 | ||
| 131 | typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg); | 138 | typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp); |
| 132 | typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); | 139 | typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd); |
| 133 | typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); | 140 | typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd); |
| 134 | typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); | 141 | typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd); |
| 135 | typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); | 142 | typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node); |
| 136 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); | 143 | typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd); |
| 144 | typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); | ||
| 145 | typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); | ||
| 146 | typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); | ||
| 147 | typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); | ||
| 137 | 148 | ||
| 138 | struct blkcg_policy { | 149 | struct blkcg_policy { |
| 139 | int plid; | 150 | int plid; |
| 140 | /* policy specific private data size */ | ||
| 141 | size_t pd_size; | ||
| 142 | /* policy specific per-blkcg data size */ | ||
| 143 | size_t cpd_size; | ||
| 144 | /* cgroup files for the policy */ | 151 | /* cgroup files for the policy */ |
| 145 | struct cftype *cftypes; | 152 | struct cftype *dfl_cftypes; |
| 153 | struct cftype *legacy_cftypes; | ||
| 146 | 154 | ||
| 147 | /* operations */ | 155 | /* operations */ |
| 156 | blkcg_pol_alloc_cpd_fn *cpd_alloc_fn; | ||
| 148 | blkcg_pol_init_cpd_fn *cpd_init_fn; | 157 | blkcg_pol_init_cpd_fn *cpd_init_fn; |
| 158 | blkcg_pol_free_cpd_fn *cpd_free_fn; | ||
| 159 | blkcg_pol_bind_cpd_fn *cpd_bind_fn; | ||
| 160 | |||
| 161 | blkcg_pol_alloc_pd_fn *pd_alloc_fn; | ||
| 149 | blkcg_pol_init_pd_fn *pd_init_fn; | 162 | blkcg_pol_init_pd_fn *pd_init_fn; |
| 150 | blkcg_pol_online_pd_fn *pd_online_fn; | 163 | blkcg_pol_online_pd_fn *pd_online_fn; |
| 151 | blkcg_pol_offline_pd_fn *pd_offline_fn; | 164 | blkcg_pol_offline_pd_fn *pd_offline_fn; |
| 152 | blkcg_pol_exit_pd_fn *pd_exit_fn; | 165 | blkcg_pol_free_pd_fn *pd_free_fn; |
| 153 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; | 166 | blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; |
| 154 | }; | 167 | }; |
| 155 | 168 | ||
| 156 | extern struct blkcg blkcg_root; | 169 | extern struct blkcg blkcg_root; |
| 157 | extern struct cgroup_subsys_state * const blkcg_root_css; | 170 | extern struct cgroup_subsys_state * const blkcg_root_css; |
| 158 | 171 | ||
| 159 | struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); | 172 | struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, |
| 173 | struct request_queue *q, bool update_hint); | ||
| 160 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 174 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
| 161 | struct request_queue *q); | 175 | struct request_queue *q); |
| 162 | int blkcg_init_queue(struct request_queue *q); | 176 | int blkcg_init_queue(struct request_queue *q); |
| @@ -171,6 +185,7 @@ int blkcg_activate_policy(struct request_queue *q, | |||
| 171 | void blkcg_deactivate_policy(struct request_queue *q, | 185 | void blkcg_deactivate_policy(struct request_queue *q, |
| 172 | const struct blkcg_policy *pol); | 186 | const struct blkcg_policy *pol); |
| 173 | 187 | ||
| 188 | const char *blkg_dev_name(struct blkcg_gq *blkg); | ||
| 174 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, | 189 | void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, |
| 175 | u64 (*prfill)(struct seq_file *, | 190 | u64 (*prfill)(struct seq_file *, |
| 176 | struct blkg_policy_data *, int), | 191 | struct blkg_policy_data *, int), |
| @@ -182,19 +197,24 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | |||
| 182 | u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); | 197 | u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); |
| 183 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | 198 | u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, |
| 184 | int off); | 199 | int off); |
| 200 | int blkg_print_stat_bytes(struct seq_file *sf, void *v); | ||
| 201 | int blkg_print_stat_ios(struct seq_file *sf, void *v); | ||
| 202 | int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); | ||
| 203 | int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); | ||
| 185 | 204 | ||
| 186 | u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); | 205 | u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, |
| 187 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, | 206 | struct blkcg_policy *pol, int off); |
| 188 | int off); | 207 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, |
| 208 | struct blkcg_policy *pol, int off); | ||
| 189 | 209 | ||
| 190 | struct blkg_conf_ctx { | 210 | struct blkg_conf_ctx { |
| 191 | struct gendisk *disk; | 211 | struct gendisk *disk; |
| 192 | struct blkcg_gq *blkg; | 212 | struct blkcg_gq *blkg; |
| 193 | u64 v; | 213 | char *body; |
| 194 | }; | 214 | }; |
| 195 | 215 | ||
| 196 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, | 216 | int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, |
| 197 | const char *input, struct blkg_conf_ctx *ctx); | 217 | char *input, struct blkg_conf_ctx *ctx); |
| 198 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); | 218 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); |
| 199 | 219 | ||
| 200 | 220 | ||
| @@ -205,7 +225,7 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) | |||
| 205 | 225 | ||
| 206 | static inline struct blkcg *task_blkcg(struct task_struct *tsk) | 226 | static inline struct blkcg *task_blkcg(struct task_struct *tsk) |
| 207 | { | 227 | { |
| 208 | return css_to_blkcg(task_css(tsk, blkio_cgrp_id)); | 228 | return css_to_blkcg(task_css(tsk, io_cgrp_id)); |
| 209 | } | 229 | } |
| 210 | 230 | ||
| 211 | static inline struct blkcg *bio_blkcg(struct bio *bio) | 231 | static inline struct blkcg *bio_blkcg(struct bio *bio) |
| @@ -218,7 +238,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) | |||
| 218 | static inline struct cgroup_subsys_state * | 238 | static inline struct cgroup_subsys_state * |
| 219 | task_get_blkcg_css(struct task_struct *task) | 239 | task_get_blkcg_css(struct task_struct *task) |
| 220 | { | 240 | { |
| 221 | return task_get_css(task, blkio_cgrp_id); | 241 | return task_get_css(task, io_cgrp_id); |
| 222 | } | 242 | } |
| 223 | 243 | ||
| 224 | /** | 244 | /** |
| @@ -233,6 +253,52 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg) | |||
| 233 | } | 253 | } |
| 234 | 254 | ||
| 235 | /** | 255 | /** |
| 256 | * __blkg_lookup - internal version of blkg_lookup() | ||
| 257 | * @blkcg: blkcg of interest | ||
| 258 | * @q: request_queue of interest | ||
| 259 | * @update_hint: whether to update lookup hint with the result or not | ||
| 260 | * | ||
| 261 | * This is internal version and shouldn't be used by policy | ||
| 262 | * implementations. Looks up blkgs for the @blkcg - @q pair regardless of | ||
| 263 | * @q's bypass state. If @update_hint is %true, the caller should be | ||
| 264 | * holding @q->queue_lock and lookup hint is updated on success. | ||
| 265 | */ | ||
| 266 | static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | ||
| 267 | struct request_queue *q, | ||
| 268 | bool update_hint) | ||
| 269 | { | ||
| 270 | struct blkcg_gq *blkg; | ||
| 271 | |||
| 272 | if (blkcg == &blkcg_root) | ||
| 273 | return q->root_blkg; | ||
| 274 | |||
| 275 | blkg = rcu_dereference(blkcg->blkg_hint); | ||
| 276 | if (blkg && blkg->q == q) | ||
| 277 | return blkg; | ||
| 278 | |||
| 279 | return blkg_lookup_slowpath(blkcg, q, update_hint); | ||
| 280 | } | ||
| 281 | |||
| 282 | /** | ||
| 283 | * blkg_lookup - lookup blkg for the specified blkcg - q pair | ||
| 284 | * @blkcg: blkcg of interest | ||
| 285 | * @q: request_queue of interest | ||
| 286 | * | ||
| 287 | * Lookup blkg for the @blkcg - @q pair. This function should be called | ||
| 288 | * under RCU read lock and is guaranteed to return %NULL if @q is bypassing | ||
| 289 | * - see blk_queue_bypass_start() for details. | ||
| 290 | */ | ||
| 291 | static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, | ||
| 292 | struct request_queue *q) | ||
| 293 | { | ||
| 294 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
| 295 | |||
| 296 | if (unlikely(blk_queue_bypass(q))) | ||
| 297 | return NULL; | ||
| 298 | return __blkg_lookup(blkcg, q, false); | ||
| 299 | } | ||
| 300 | |||
| 301 | /** | ||
| 236 | * blkg_to_pdata - get policy private data | 302 | * blkg_to_pdata - get policy private data |
| 237 | * @blkg: blkg of interest | 303 | * @blkg: blkg of interest |
| 238 | * @pol: policy of interest | 304 | * @pol: policy of interest |
| @@ -248,7 +314,7 @@ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | |||
| 248 | static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, | 314 | static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, |
| 249 | struct blkcg_policy *pol) | 315 | struct blkcg_policy *pol) |
| 250 | { | 316 | { |
| 251 | return blkcg ? blkcg->pd[pol->plid] : NULL; | 317 | return blkcg ? blkcg->cpd[pol->plid] : NULL; |
| 252 | } | 318 | } |
| 253 | 319 | ||
| 254 | /** | 320 | /** |
| @@ -262,6 +328,11 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) | |||
| 262 | return pd ? pd->blkg : NULL; | 328 | return pd ? pd->blkg : NULL; |
| 263 | } | 329 | } |
| 264 | 330 | ||
| 331 | static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd) | ||
| 332 | { | ||
| 333 | return cpd ? cpd->blkcg : NULL; | ||
| 334 | } | ||
| 335 | |||
| 265 | /** | 336 | /** |
| 266 | * blkg_path - format cgroup path of blkg | 337 | * blkg_path - format cgroup path of blkg |
| 267 | * @blkg: blkg of interest | 338 | * @blkg: blkg of interest |
| @@ -309,9 +380,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) | |||
| 309 | call_rcu(&blkg->rcu_head, __blkg_release_rcu); | 380 | call_rcu(&blkg->rcu_head, __blkg_release_rcu); |
| 310 | } | 381 | } |
| 311 | 382 | ||
| 312 | struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, | ||
| 313 | bool update_hint); | ||
| 314 | |||
| 315 | /** | 383 | /** |
| 316 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants | 384 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants |
| 317 | * @d_blkg: loop cursor pointing to the current descendant | 385 | * @d_blkg: loop cursor pointing to the current descendant |
| @@ -373,8 +441,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, | |||
| 373 | * or if either the blkcg or queue is going away. Fall back to | 441 | * or if either the blkcg or queue is going away. Fall back to |
| 374 | * root_rl in such cases. | 442 | * root_rl in such cases. |
| 375 | */ | 443 | */ |
| 376 | blkg = blkg_lookup_create(blkcg, q); | 444 | blkg = blkg_lookup(blkcg, q); |
| 377 | if (IS_ERR(blkg)) | 445 | if (unlikely(!blkg)) |
| 378 | goto root_rl; | 446 | goto root_rl; |
| 379 | 447 | ||
| 380 | blkg_get(blkg); | 448 | blkg_get(blkg); |
| @@ -394,8 +462,7 @@ root_rl: | |||
| 394 | */ | 462 | */ |
| 395 | static inline void blk_put_rl(struct request_list *rl) | 463 | static inline void blk_put_rl(struct request_list *rl) |
| 396 | { | 464 | { |
| 397 | /* root_rl may not have blkg set */ | 465 | if (rl->blkg->blkcg != &blkcg_root) |
| 398 | if (rl->blkg && rl->blkg->blkcg != &blkcg_root) | ||
| 399 | blkg_put(rl->blkg); | 466 | blkg_put(rl->blkg); |
| 400 | } | 467 | } |
| 401 | 468 | ||
| @@ -433,9 +500,21 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl, | |||
| 433 | #define blk_queue_for_each_rl(rl, q) \ | 500 | #define blk_queue_for_each_rl(rl, q) \ |
| 434 | for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) | 501 | for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) |
| 435 | 502 | ||
| 436 | static inline void blkg_stat_init(struct blkg_stat *stat) | 503 | static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) |
| 437 | { | 504 | { |
| 438 | u64_stats_init(&stat->syncp); | 505 | int ret; |
| 506 | |||
| 507 | ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); | ||
| 508 | if (ret) | ||
| 509 | return ret; | ||
| 510 | |||
| 511 | atomic64_set(&stat->aux_cnt, 0); | ||
| 512 | return 0; | ||
| 513 | } | ||
| 514 | |||
| 515 | static inline void blkg_stat_exit(struct blkg_stat *stat) | ||
| 516 | { | ||
| 517 | percpu_counter_destroy(&stat->cpu_cnt); | ||
| 439 | } | 518 | } |
| 440 | 519 | ||
| 441 | /** | 520 | /** |
| @@ -443,34 +522,21 @@ static inline void blkg_stat_init(struct blkg_stat *stat) | |||
| 443 | * @stat: target blkg_stat | 522 | * @stat: target blkg_stat |
| 444 | * @val: value to add | 523 | * @val: value to add |
| 445 | * | 524 | * |
| 446 | * Add @val to @stat. The caller is responsible for synchronizing calls to | 525 | * Add @val to @stat. The caller must ensure that IRQ on the same CPU |
| 447 | * this function. | 526 | * don't re-enter this function for the same counter. |
| 448 | */ | 527 | */ |
| 449 | static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) | 528 | static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) |
| 450 | { | 529 | { |
| 451 | u64_stats_update_begin(&stat->syncp); | 530 | __percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); |
| 452 | stat->cnt += val; | ||
| 453 | u64_stats_update_end(&stat->syncp); | ||
| 454 | } | 531 | } |
| 455 | 532 | ||
| 456 | /** | 533 | /** |
| 457 | * blkg_stat_read - read the current value of a blkg_stat | 534 | * blkg_stat_read - read the current value of a blkg_stat |
| 458 | * @stat: blkg_stat to read | 535 | * @stat: blkg_stat to read |
| 459 | * | ||
| 460 | * Read the current value of @stat. This function can be called without | ||
| 461 | * synchroniztion and takes care of u64 atomicity. | ||
| 462 | */ | 536 | */ |
| 463 | static inline uint64_t blkg_stat_read(struct blkg_stat *stat) | 537 | static inline uint64_t blkg_stat_read(struct blkg_stat *stat) |
| 464 | { | 538 | { |
| 465 | unsigned int start; | 539 | return percpu_counter_sum_positive(&stat->cpu_cnt); |
| 466 | uint64_t v; | ||
| 467 | |||
| 468 | do { | ||
| 469 | start = u64_stats_fetch_begin_irq(&stat->syncp); | ||
| 470 | v = stat->cnt; | ||
| 471 | } while (u64_stats_fetch_retry_irq(&stat->syncp, start)); | ||
| 472 | |||
| 473 | return v; | ||
| 474 | } | 540 | } |
| 475 | 541 | ||
| 476 | /** | 542 | /** |
| @@ -479,24 +545,46 @@ static inline uint64_t blkg_stat_read(struct blkg_stat *stat) | |||
| 479 | */ | 545 | */ |
| 480 | static inline void blkg_stat_reset(struct blkg_stat *stat) | 546 | static inline void blkg_stat_reset(struct blkg_stat *stat) |
| 481 | { | 547 | { |
| 482 | stat->cnt = 0; | 548 | percpu_counter_set(&stat->cpu_cnt, 0); |
| 549 | atomic64_set(&stat->aux_cnt, 0); | ||
| 483 | } | 550 | } |
| 484 | 551 | ||
| 485 | /** | 552 | /** |
| 486 | * blkg_stat_merge - merge a blkg_stat into another | 553 | * blkg_stat_add_aux - add a blkg_stat into another's aux count |
| 487 | * @to: the destination blkg_stat | 554 | * @to: the destination blkg_stat |
| 488 | * @from: the source | 555 | * @from: the source |
| 489 | * | 556 | * |
| 490 | * Add @from's count to @to. | 557 | * Add @from's count including the aux one to @to's aux count. |
| 491 | */ | 558 | */ |
| 492 | static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) | 559 | static inline void blkg_stat_add_aux(struct blkg_stat *to, |
| 560 | struct blkg_stat *from) | ||
| 493 | { | 561 | { |
| 494 | blkg_stat_add(to, blkg_stat_read(from)); | 562 | atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt), |
| 563 | &to->aux_cnt); | ||
| 495 | } | 564 | } |
| 496 | 565 | ||
| 497 | static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) | 566 | static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) |
| 498 | { | 567 | { |
| 499 | u64_stats_init(&rwstat->syncp); | 568 | int i, ret; |
| 569 | |||
| 570 | for (i = 0; i < BLKG_RWSTAT_NR; i++) { | ||
| 571 | ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp); | ||
| 572 | if (ret) { | ||
| 573 | while (--i >= 0) | ||
| 574 | percpu_counter_destroy(&rwstat->cpu_cnt[i]); | ||
| 575 | return ret; | ||
| 576 | } | ||
| 577 | atomic64_set(&rwstat->aux_cnt[i], 0); | ||
| 578 | } | ||
| 579 | return 0; | ||
| 580 | } | ||
| 581 | |||
| 582 | static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) | ||
| 583 | { | ||
| 584 | int i; | ||
| 585 | |||
| 586 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | ||
| 587 | percpu_counter_destroy(&rwstat->cpu_cnt[i]); | ||
| 500 | } | 588 | } |
| 501 | 589 | ||
| 502 | /** | 590 | /** |
| @@ -511,39 +599,38 @@ static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) | |||
| 511 | static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, | 599 | static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, |
| 512 | int rw, uint64_t val) | 600 | int rw, uint64_t val) |
| 513 | { | 601 | { |
| 514 | u64_stats_update_begin(&rwstat->syncp); | 602 | struct percpu_counter *cnt; |
| 515 | 603 | ||
| 516 | if (rw & REQ_WRITE) | 604 | if (rw & REQ_WRITE) |
| 517 | rwstat->cnt[BLKG_RWSTAT_WRITE] += val; | 605 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; |
| 518 | else | 606 | else |
| 519 | rwstat->cnt[BLKG_RWSTAT_READ] += val; | 607 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; |
| 608 | |||
| 609 | __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); | ||
| 610 | |||
| 520 | if (rw & REQ_SYNC) | 611 | if (rw & REQ_SYNC) |
| 521 | rwstat->cnt[BLKG_RWSTAT_SYNC] += val; | 612 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; |
| 522 | else | 613 | else |
| 523 | rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; | 614 | cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; |
| 524 | 615 | ||
| 525 | u64_stats_update_end(&rwstat->syncp); | 616 | __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); |
| 526 | } | 617 | } |
| 527 | 618 | ||
| 528 | /** | 619 | /** |
| 529 | * blkg_rwstat_read - read the current values of a blkg_rwstat | 620 | * blkg_rwstat_read - read the current values of a blkg_rwstat |
| 530 | * @rwstat: blkg_rwstat to read | 621 | * @rwstat: blkg_rwstat to read |
| 531 | * | 622 | * |
| 532 | * Read the current snapshot of @rwstat and return it as the return value. | 623 | * Read the current snapshot of @rwstat and return it in the aux counts. |
| 533 | * This function can be called without synchronization and takes care of | ||
| 534 | * u64 atomicity. | ||
| 535 | */ | 624 | */ |
| 536 | static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) | 625 | static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) |
| 537 | { | 626 | { |
| 538 | unsigned int start; | 627 | struct blkg_rwstat result; |
| 539 | struct blkg_rwstat tmp; | 628 | int i; |
| 540 | |||
| 541 | do { | ||
| 542 | start = u64_stats_fetch_begin_irq(&rwstat->syncp); | ||
| 543 | tmp = *rwstat; | ||
| 544 | } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start)); | ||
| 545 | 629 | ||
| 546 | return tmp; | 630 | for (i = 0; i < BLKG_RWSTAT_NR; i++) |
| 631 | atomic64_set(&result.aux_cnt[i], | ||
| 632 | percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); | ||
| 633 | return result; | ||
| 547 | } | 634 | } |
| 548 | 635 | ||
| 549 | /** | 636 | /** |
| @@ -558,7 +645,8 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) | |||
| 558 | { | 645 | { |
| 559 | struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); | 646 | struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); |
| 560 | 647 | ||
| 561 | return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; | 648 | return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + |
| 649 | atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); | ||
| 562 | } | 650 | } |
| 563 | 651 | ||
| 564 | /** | 652 | /** |
| @@ -567,26 +655,71 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) | |||
| 567 | */ | 655 | */ |
| 568 | static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) | 656 | static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) |
| 569 | { | 657 | { |
| 570 | memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); | 658 | int i; |
| 659 | |||
| 660 | for (i = 0; i < BLKG_RWSTAT_NR; i++) { | ||
| 661 | percpu_counter_set(&rwstat->cpu_cnt[i], 0); | ||
| 662 | atomic64_set(&rwstat->aux_cnt[i], 0); | ||
| 663 | } | ||
| 571 | } | 664 | } |
| 572 | 665 | ||
| 573 | /** | 666 | /** |
| 574 | * blkg_rwstat_merge - merge a blkg_rwstat into another | 667 | * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count |
| 575 | * @to: the destination blkg_rwstat | 668 | * @to: the destination blkg_rwstat |
| 576 | * @from: the source | 669 | * @from: the source |
| 577 | * | 670 | * |
| 578 | * Add @from's counts to @to. | 671 | * Add @from's count including the aux one to @to's aux count. |
| 579 | */ | 672 | */ |
| 580 | static inline void blkg_rwstat_merge(struct blkg_rwstat *to, | 673 | static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to, |
| 581 | struct blkg_rwstat *from) | 674 | struct blkg_rwstat *from) |
| 582 | { | 675 | { |
| 583 | struct blkg_rwstat v = blkg_rwstat_read(from); | 676 | struct blkg_rwstat v = blkg_rwstat_read(from); |
| 584 | int i; | 677 | int i; |
| 585 | 678 | ||
| 586 | u64_stats_update_begin(&to->syncp); | ||
| 587 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | 679 | for (i = 0; i < BLKG_RWSTAT_NR; i++) |
| 588 | to->cnt[i] += v.cnt[i]; | 680 | atomic64_add(atomic64_read(&v.aux_cnt[i]) + |
| 589 | u64_stats_update_end(&to->syncp); | 681 | atomic64_read(&from->aux_cnt[i]), |
| 682 | &to->aux_cnt[i]); | ||
| 683 | } | ||
| 684 | |||
| 685 | #ifdef CONFIG_BLK_DEV_THROTTLING | ||
| 686 | extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, | ||
| 687 | struct bio *bio); | ||
| 688 | #else | ||
| 689 | static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, | ||
| 690 | struct bio *bio) { return false; } | ||
| 691 | #endif | ||
| 692 | |||
| 693 | static inline bool blkcg_bio_issue_check(struct request_queue *q, | ||
| 694 | struct bio *bio) | ||
| 695 | { | ||
| 696 | struct blkcg *blkcg; | ||
| 697 | struct blkcg_gq *blkg; | ||
| 698 | bool throtl = false; | ||
| 699 | |||
| 700 | rcu_read_lock(); | ||
| 701 | blkcg = bio_blkcg(bio); | ||
| 702 | |||
| 703 | blkg = blkg_lookup(blkcg, q); | ||
| 704 | if (unlikely(!blkg)) { | ||
| 705 | spin_lock_irq(q->queue_lock); | ||
| 706 | blkg = blkg_lookup_create(blkcg, q); | ||
| 707 | if (IS_ERR(blkg)) | ||
| 708 | blkg = NULL; | ||
| 709 | spin_unlock_irq(q->queue_lock); | ||
| 710 | } | ||
| 711 | |||
| 712 | throtl = blk_throtl_bio(q, blkg, bio); | ||
| 713 | |||
| 714 | if (!throtl) { | ||
| 715 | blkg = blkg ?: q->root_blkg; | ||
| 716 | blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags, | ||
| 717 | bio->bi_iter.bi_size); | ||
| 718 | blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1); | ||
| 719 | } | ||
| 720 | |||
| 721 | rcu_read_unlock(); | ||
| 722 | return !throtl; | ||
| 590 | } | 723 | } |
| 591 | 724 | ||
| 592 | #else /* CONFIG_BLK_CGROUP */ | 725 | #else /* CONFIG_BLK_CGROUP */ |
| @@ -642,6 +775,9 @@ static inline void blk_put_rl(struct request_list *rl) { } | |||
| 642 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } | 775 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } |
| 643 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } | 776 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } |
| 644 | 777 | ||
| 778 | static inline bool blkcg_bio_issue_check(struct request_queue *q, | ||
| 779 | struct bio *bio) { return true; } | ||
| 780 | |||
| 645 | #define blk_queue_for_each_rl(rl, q) \ | 781 | #define blk_queue_for_each_rl(rl, q) \ |
| 646 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) | 782 | for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) |
| 647 | 783 | ||
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 1f36945fd23d..1a96fdaa33d5 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h | |||
| @@ -27,7 +27,7 @@ SUBSYS(cpuacct) | |||
| 27 | #endif | 27 | #endif |
| 28 | 28 | ||
| 29 | #if IS_ENABLED(CONFIG_BLK_CGROUP) | 29 | #if IS_ENABLED(CONFIG_BLK_CGROUP) |
| 30 | SUBSYS(blkio) | 30 | SUBSYS(io) |
| 31 | #endif | 31 | #endif |
| 32 | 32 | ||
| 33 | #if IS_ENABLED(CONFIG_MEMCG) | 33 | #if IS_ENABLED(CONFIG_MEMCG) |
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 123be25ea15a..5d4e9c4b821d 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h | |||
| @@ -266,6 +266,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) | |||
| 266 | } | 266 | } |
| 267 | 267 | ||
| 268 | int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); | 268 | int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); |
| 269 | size_t kernfs_path_len(struct kernfs_node *kn); | ||
| 269 | char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, | 270 | char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, |
| 270 | size_t buflen); | 271 | size_t buflen); |
| 271 | void pr_cont_kernfs_name(struct kernfs_node *kn); | 272 | void pr_cont_kernfs_name(struct kernfs_node *kn); |
| @@ -332,6 +333,9 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn) | |||
| 332 | static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) | 333 | static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) |
| 333 | { return -ENOSYS; } | 334 | { return -ENOSYS; } |
| 334 | 335 | ||
| 336 | static inline size_t kernfs_path_len(struct kernfs_node *kn) | ||
| 337 | { return 0; } | ||
| 338 | |||
| 335 | static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, | 339 | static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, |
| 336 | size_t buflen) | 340 | size_t buflen) |
| 337 | { return NULL; } | 341 | { return NULL; } |
