aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev.h26
-rw-r--r--include/linux/blk-cgroup.h340
-rw-r--r--include/linux/cgroup_subsys.h2
-rw-r--r--include/linux/kernfs.h4
4 files changed, 256 insertions, 116 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0fe9df983ab7..5a5d79ee256f 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -286,7 +286,7 @@ static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi
286 * %current's blkcg equals the effective blkcg of its memcg. No 286 * %current's blkcg equals the effective blkcg of its memcg. No
287 * need to use the relatively expensive cgroup_get_e_css(). 287 * need to use the relatively expensive cgroup_get_e_css().
288 */ 288 */
289 if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id))) 289 if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
290 return wb; 290 return wb;
291 return NULL; 291 return NULL;
292} 292}
@@ -402,7 +402,7 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
402} 402}
403 403
404struct wb_iter { 404struct wb_iter {
405 int start_blkcg_id; 405 int start_memcg_id;
406 struct radix_tree_iter tree_iter; 406 struct radix_tree_iter tree_iter;
407 void **slot; 407 void **slot;
408}; 408};
@@ -414,9 +414,9 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter,
414 414
415 WARN_ON_ONCE(!rcu_read_lock_held()); 415 WARN_ON_ONCE(!rcu_read_lock_held());
416 416
417 if (iter->start_blkcg_id >= 0) { 417 if (iter->start_memcg_id >= 0) {
418 iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id); 418 iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id);
419 iter->start_blkcg_id = -1; 419 iter->start_memcg_id = -1;
420 } else { 420 } else {
421 iter->slot = radix_tree_next_slot(iter->slot, titer, 0); 421 iter->slot = radix_tree_next_slot(iter->slot, titer, 0);
422 } 422 }
@@ -430,30 +430,30 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter,
430 430
431static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, 431static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter,
432 struct backing_dev_info *bdi, 432 struct backing_dev_info *bdi,
433 int start_blkcg_id) 433 int start_memcg_id)
434{ 434{
435 iter->start_blkcg_id = start_blkcg_id; 435 iter->start_memcg_id = start_memcg_id;
436 436
437 if (start_blkcg_id) 437 if (start_memcg_id)
438 return __wb_iter_next(iter, bdi); 438 return __wb_iter_next(iter, bdi);
439 else 439 else
440 return &bdi->wb; 440 return &bdi->wb;
441} 441}
442 442
443/** 443/**
444 * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order 444 * bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order
445 * @wb_cur: cursor struct bdi_writeback pointer 445 * @wb_cur: cursor struct bdi_writeback pointer
446 * @bdi: bdi to walk wb's of 446 * @bdi: bdi to walk wb's of
447 * @iter: pointer to struct wb_iter to be used as iteration buffer 447 * @iter: pointer to struct wb_iter to be used as iteration buffer
448 * @start_blkcg_id: blkcg ID to start iteration from 448 * @start_memcg_id: memcg ID to start iteration from
449 * 449 *
450 * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending 450 * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending
451 * blkcg ID order starting from @start_blkcg_id. @iter is struct wb_iter 451 * memcg ID order starting from @start_memcg_id. @iter is struct wb_iter
452 * to be used as temp storage during iteration. rcu_read_lock() must be 452 * to be used as temp storage during iteration. rcu_read_lock() must be
453 * held throughout iteration. 453 * held throughout iteration.
454 */ 454 */
455#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ 455#define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id) \
456 for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id); \ 456 for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id); \
457 (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) 457 (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi))
458 458
459#else /* CONFIG_CGROUP_WRITEBACK */ 459#else /* CONFIG_CGROUP_WRITEBACK */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index a4cd1641e9e2..0a5cc7a1109b 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -14,12 +14,15 @@
14 */ 14 */
15 15
16#include <linux/cgroup.h> 16#include <linux/cgroup.h>
17#include <linux/u64_stats_sync.h> 17#include <linux/percpu_counter.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/radix-tree.h> 19#include <linux/radix-tree.h>
20#include <linux/blkdev.h> 20#include <linux/blkdev.h>
21#include <linux/atomic.h> 21#include <linux/atomic.h>
22 22
23/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
24#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
25
23/* Max limits for throttle policy */ 26/* Max limits for throttle policy */
24#define THROTL_IOPS_MAX UINT_MAX 27#define THROTL_IOPS_MAX UINT_MAX
25 28
@@ -45,7 +48,7 @@ struct blkcg {
45 struct blkcg_gq *blkg_hint; 48 struct blkcg_gq *blkg_hint;
46 struct hlist_head blkg_list; 49 struct hlist_head blkg_list;
47 50
48 struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; 51 struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
49 52
50 struct list_head all_blkcgs_node; 53 struct list_head all_blkcgs_node;
51#ifdef CONFIG_CGROUP_WRITEBACK 54#ifdef CONFIG_CGROUP_WRITEBACK
@@ -53,14 +56,19 @@ struct blkcg {
53#endif 56#endif
54}; 57};
55 58
59/*
60 * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
61 * recursive. Used to carry stats of dead children, and, for blkg_rwstat,
62 * to carry result values from read and sum operations.
63 */
56struct blkg_stat { 64struct blkg_stat {
57 struct u64_stats_sync syncp; 65 struct percpu_counter cpu_cnt;
58 uint64_t cnt; 66 atomic64_t aux_cnt;
59}; 67};
60 68
61struct blkg_rwstat { 69struct blkg_rwstat {
62 struct u64_stats_sync syncp; 70 struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
63 uint64_t cnt[BLKG_RWSTAT_NR]; 71 atomic64_t aux_cnt[BLKG_RWSTAT_NR];
64}; 72};
65 73
66/* 74/*
@@ -68,32 +76,28 @@ struct blkg_rwstat {
68 * request_queue (q). This is used by blkcg policies which need to track 76 * request_queue (q). This is used by blkcg policies which need to track
69 * information per blkcg - q pair. 77 * information per blkcg - q pair.
70 * 78 *
71 * There can be multiple active blkcg policies and each has its private 79 * There can be multiple active blkcg policies and each blkg:policy pair is
72 * data on each blkg, the size of which is determined by 80 * represented by a blkg_policy_data which is allocated and freed by each
73 * blkcg_policy->pd_size. blkcg core allocates and frees such areas 81 * policy's pd_alloc/free_fn() methods. A policy can allocate private data
74 * together with blkg and invokes pd_init/exit_fn() methods. 82 * area by allocating larger data structure which embeds blkg_policy_data
75 * 83 * at the beginning.
76 * Such private data must embed struct blkg_policy_data (pd) at the
77 * beginning and pd_size can't be smaller than pd.
78 */ 84 */
79struct blkg_policy_data { 85struct blkg_policy_data {
80 /* the blkg and policy id this per-policy data belongs to */ 86 /* the blkg and policy id this per-policy data belongs to */
81 struct blkcg_gq *blkg; 87 struct blkcg_gq *blkg;
82 int plid; 88 int plid;
83
84 /* used during policy activation */
85 struct list_head alloc_node;
86}; 89};
87 90
88/* 91/*
89 * Policies that need to keep per-blkcg data which is independent 92 * Policies that need to keep per-blkcg data which is independent from any
90 * from any request_queue associated to it must specify its size 93 * request_queue associated to it should implement cpd_alloc/free_fn()
91 * with the cpd_size field of the blkcg_policy structure and 94 * methods. A policy can allocate private data area by allocating larger
92 * embed a blkcg_policy_data in it. cpd_init() is invoked to let 95 * data structure which embeds blkcg_policy_data at the beginning.
93 * each policy handle per-blkcg data. 96 * cpd_init() is invoked to let each policy handle per-blkcg data.
94 */ 97 */
95struct blkcg_policy_data { 98struct blkcg_policy_data {
96 /* the policy id this per-policy data belongs to */ 99 /* the blkcg and policy id this per-policy data belongs to */
100 struct blkcg *blkcg;
97 int plid; 101 int plid;
98}; 102};
99 103
@@ -123,40 +127,50 @@ struct blkcg_gq {
123 /* is this blkg online? protected by both blkcg and q locks */ 127 /* is this blkg online? protected by both blkcg and q locks */
124 bool online; 128 bool online;
125 129
130 struct blkg_rwstat stat_bytes;
131 struct blkg_rwstat stat_ios;
132
126 struct blkg_policy_data *pd[BLKCG_MAX_POLS]; 133 struct blkg_policy_data *pd[BLKCG_MAX_POLS];
127 134
128 struct rcu_head rcu_head; 135 struct rcu_head rcu_head;
129}; 136};
130 137
131typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg); 138typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
132typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); 139typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
133typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); 140typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
134typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); 141typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
135typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); 142typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
136typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); 143typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
144typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
145typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
146typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
147typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
137 148
138struct blkcg_policy { 149struct blkcg_policy {
139 int plid; 150 int plid;
140 /* policy specific private data size */
141 size_t pd_size;
142 /* policy specific per-blkcg data size */
143 size_t cpd_size;
144 /* cgroup files for the policy */ 151 /* cgroup files for the policy */
145 struct cftype *cftypes; 152 struct cftype *dfl_cftypes;
153 struct cftype *legacy_cftypes;
146 154
147 /* operations */ 155 /* operations */
156 blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
148 blkcg_pol_init_cpd_fn *cpd_init_fn; 157 blkcg_pol_init_cpd_fn *cpd_init_fn;
158 blkcg_pol_free_cpd_fn *cpd_free_fn;
159 blkcg_pol_bind_cpd_fn *cpd_bind_fn;
160
161 blkcg_pol_alloc_pd_fn *pd_alloc_fn;
149 blkcg_pol_init_pd_fn *pd_init_fn; 162 blkcg_pol_init_pd_fn *pd_init_fn;
150 blkcg_pol_online_pd_fn *pd_online_fn; 163 blkcg_pol_online_pd_fn *pd_online_fn;
151 blkcg_pol_offline_pd_fn *pd_offline_fn; 164 blkcg_pol_offline_pd_fn *pd_offline_fn;
152 blkcg_pol_exit_pd_fn *pd_exit_fn; 165 blkcg_pol_free_pd_fn *pd_free_fn;
153 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; 166 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
154}; 167};
155 168
156extern struct blkcg blkcg_root; 169extern struct blkcg blkcg_root;
157extern struct cgroup_subsys_state * const blkcg_root_css; 170extern struct cgroup_subsys_state * const blkcg_root_css;
158 171
159struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); 172struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
173 struct request_queue *q, bool update_hint);
160struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, 174struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
161 struct request_queue *q); 175 struct request_queue *q);
162int blkcg_init_queue(struct request_queue *q); 176int blkcg_init_queue(struct request_queue *q);
@@ -171,6 +185,7 @@ int blkcg_activate_policy(struct request_queue *q,
171void blkcg_deactivate_policy(struct request_queue *q, 185void blkcg_deactivate_policy(struct request_queue *q,
172 const struct blkcg_policy *pol); 186 const struct blkcg_policy *pol);
173 187
188const char *blkg_dev_name(struct blkcg_gq *blkg);
174void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, 189void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
175 u64 (*prfill)(struct seq_file *, 190 u64 (*prfill)(struct seq_file *,
176 struct blkg_policy_data *, int), 191 struct blkg_policy_data *, int),
@@ -182,19 +197,24 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
182u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); 197u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
183u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 198u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
184 int off); 199 int off);
200int blkg_print_stat_bytes(struct seq_file *sf, void *v);
201int blkg_print_stat_ios(struct seq_file *sf, void *v);
202int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
203int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
185 204
186u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); 205u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
187struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, 206 struct blkcg_policy *pol, int off);
188 int off); 207struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
208 struct blkcg_policy *pol, int off);
189 209
190struct blkg_conf_ctx { 210struct blkg_conf_ctx {
191 struct gendisk *disk; 211 struct gendisk *disk;
192 struct blkcg_gq *blkg; 212 struct blkcg_gq *blkg;
193 u64 v; 213 char *body;
194}; 214};
195 215
196int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 216int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
197 const char *input, struct blkg_conf_ctx *ctx); 217 char *input, struct blkg_conf_ctx *ctx);
198void blkg_conf_finish(struct blkg_conf_ctx *ctx); 218void blkg_conf_finish(struct blkg_conf_ctx *ctx);
199 219
200 220
@@ -205,7 +225,7 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
205 225
206static inline struct blkcg *task_blkcg(struct task_struct *tsk) 226static inline struct blkcg *task_blkcg(struct task_struct *tsk)
207{ 227{
208 return css_to_blkcg(task_css(tsk, blkio_cgrp_id)); 228 return css_to_blkcg(task_css(tsk, io_cgrp_id));
209} 229}
210 230
211static inline struct blkcg *bio_blkcg(struct bio *bio) 231static inline struct blkcg *bio_blkcg(struct bio *bio)
@@ -218,7 +238,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
218static inline struct cgroup_subsys_state * 238static inline struct cgroup_subsys_state *
219task_get_blkcg_css(struct task_struct *task) 239task_get_blkcg_css(struct task_struct *task)
220{ 240{
221 return task_get_css(task, blkio_cgrp_id); 241 return task_get_css(task, io_cgrp_id);
222} 242}
223 243
224/** 244/**
@@ -233,6 +253,52 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
233} 253}
234 254
235/** 255/**
256 * __blkg_lookup - internal version of blkg_lookup()
257 * @blkcg: blkcg of interest
258 * @q: request_queue of interest
259 * @update_hint: whether to update lookup hint with the result or not
260 *
261 * This is internal version and shouldn't be used by policy
262 * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
263 * @q's bypass state. If @update_hint is %true, the caller should be
264 * holding @q->queue_lock and lookup hint is updated on success.
265 */
266static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
267 struct request_queue *q,
268 bool update_hint)
269{
270 struct blkcg_gq *blkg;
271
272 if (blkcg == &blkcg_root)
273 return q->root_blkg;
274
275 blkg = rcu_dereference(blkcg->blkg_hint);
276 if (blkg && blkg->q == q)
277 return blkg;
278
279 return blkg_lookup_slowpath(blkcg, q, update_hint);
280}
281
282/**
283 * blkg_lookup - lookup blkg for the specified blkcg - q pair
284 * @blkcg: blkcg of interest
285 * @q: request_queue of interest
286 *
287 * Lookup blkg for the @blkcg - @q pair. This function should be called
288 * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
289 * - see blk_queue_bypass_start() for details.
290 */
291static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
292 struct request_queue *q)
293{
294 WARN_ON_ONCE(!rcu_read_lock_held());
295
296 if (unlikely(blk_queue_bypass(q)))
297 return NULL;
298 return __blkg_lookup(blkcg, q, false);
299}
300
301/**
236 * blkg_to_pdata - get policy private data 302 * blkg_to_pdata - get policy private data
237 * @blkg: blkg of interest 303 * @blkg: blkg of interest
238 * @pol: policy of interest 304 * @pol: policy of interest
@@ -248,7 +314,7 @@ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
248static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, 314static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
249 struct blkcg_policy *pol) 315 struct blkcg_policy *pol)
250{ 316{
251 return blkcg ? blkcg->pd[pol->plid] : NULL; 317 return blkcg ? blkcg->cpd[pol->plid] : NULL;
252} 318}
253 319
254/** 320/**
@@ -262,6 +328,11 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
262 return pd ? pd->blkg : NULL; 328 return pd ? pd->blkg : NULL;
263} 329}
264 330
331static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
332{
333 return cpd ? cpd->blkcg : NULL;
334}
335
265/** 336/**
266 * blkg_path - format cgroup path of blkg 337 * blkg_path - format cgroup path of blkg
267 * @blkg: blkg of interest 338 * @blkg: blkg of interest
@@ -309,9 +380,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
309 call_rcu(&blkg->rcu_head, __blkg_release_rcu); 380 call_rcu(&blkg->rcu_head, __blkg_release_rcu);
310} 381}
311 382
312struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
313 bool update_hint);
314
315/** 383/**
316 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants 384 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
317 * @d_blkg: loop cursor pointing to the current descendant 385 * @d_blkg: loop cursor pointing to the current descendant
@@ -373,8 +441,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
373 * or if either the blkcg or queue is going away. Fall back to 441 * or if either the blkcg or queue is going away. Fall back to
374 * root_rl in such cases. 442 * root_rl in such cases.
375 */ 443 */
376 blkg = blkg_lookup_create(blkcg, q); 444 blkg = blkg_lookup(blkcg, q);
377 if (IS_ERR(blkg)) 445 if (unlikely(!blkg))
378 goto root_rl; 446 goto root_rl;
379 447
380 blkg_get(blkg); 448 blkg_get(blkg);
@@ -394,8 +462,7 @@ root_rl:
394 */ 462 */
395static inline void blk_put_rl(struct request_list *rl) 463static inline void blk_put_rl(struct request_list *rl)
396{ 464{
397 /* root_rl may not have blkg set */ 465 if (rl->blkg->blkcg != &blkcg_root)
398 if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
399 blkg_put(rl->blkg); 466 blkg_put(rl->blkg);
400} 467}
401 468
@@ -433,9 +500,21 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl,
433#define blk_queue_for_each_rl(rl, q) \ 500#define blk_queue_for_each_rl(rl, q) \
434 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) 501 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
435 502
436static inline void blkg_stat_init(struct blkg_stat *stat) 503static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
437{ 504{
438 u64_stats_init(&stat->syncp); 505 int ret;
506
507 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
508 if (ret)
509 return ret;
510
511 atomic64_set(&stat->aux_cnt, 0);
512 return 0;
513}
514
515static inline void blkg_stat_exit(struct blkg_stat *stat)
516{
517 percpu_counter_destroy(&stat->cpu_cnt);
439} 518}
440 519
441/** 520/**
@@ -443,34 +522,21 @@ static inline void blkg_stat_init(struct blkg_stat *stat)
443 * @stat: target blkg_stat 522 * @stat: target blkg_stat
444 * @val: value to add 523 * @val: value to add
445 * 524 *
446 * Add @val to @stat. The caller is responsible for synchronizing calls to 525 * Add @val to @stat. The caller must ensure that IRQ on the same CPU
447 * this function. 526 * don't re-enter this function for the same counter.
448 */ 527 */
449static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) 528static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
450{ 529{
451 u64_stats_update_begin(&stat->syncp); 530 __percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
452 stat->cnt += val;
453 u64_stats_update_end(&stat->syncp);
454} 531}
455 532
456/** 533/**
457 * blkg_stat_read - read the current value of a blkg_stat 534 * blkg_stat_read - read the current value of a blkg_stat
458 * @stat: blkg_stat to read 535 * @stat: blkg_stat to read
459 *
460 * Read the current value of @stat. This function can be called without
461 * synchroniztion and takes care of u64 atomicity.
462 */ 536 */
463static inline uint64_t blkg_stat_read(struct blkg_stat *stat) 537static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
464{ 538{
465 unsigned int start; 539 return percpu_counter_sum_positive(&stat->cpu_cnt);
466 uint64_t v;
467
468 do {
469 start = u64_stats_fetch_begin_irq(&stat->syncp);
470 v = stat->cnt;
471 } while (u64_stats_fetch_retry_irq(&stat->syncp, start));
472
473 return v;
474} 540}
475 541
476/** 542/**
@@ -479,24 +545,46 @@ static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
479 */ 545 */
480static inline void blkg_stat_reset(struct blkg_stat *stat) 546static inline void blkg_stat_reset(struct blkg_stat *stat)
481{ 547{
482 stat->cnt = 0; 548 percpu_counter_set(&stat->cpu_cnt, 0);
549 atomic64_set(&stat->aux_cnt, 0);
483} 550}
484 551
485/** 552/**
486 * blkg_stat_merge - merge a blkg_stat into another 553 * blkg_stat_add_aux - add a blkg_stat into another's aux count
487 * @to: the destination blkg_stat 554 * @to: the destination blkg_stat
488 * @from: the source 555 * @from: the source
489 * 556 *
490 * Add @from's count to @to. 557 * Add @from's count including the aux one to @to's aux count.
491 */ 558 */
492static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) 559static inline void blkg_stat_add_aux(struct blkg_stat *to,
560 struct blkg_stat *from)
493{ 561{
494 blkg_stat_add(to, blkg_stat_read(from)); 562 atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
563 &to->aux_cnt);
495} 564}
496 565
497static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) 566static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
498{ 567{
499 u64_stats_init(&rwstat->syncp); 568 int i, ret;
569
570 for (i = 0; i < BLKG_RWSTAT_NR; i++) {
571 ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
572 if (ret) {
573 while (--i >= 0)
574 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
575 return ret;
576 }
577 atomic64_set(&rwstat->aux_cnt[i], 0);
578 }
579 return 0;
580}
581
582static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
583{
584 int i;
585
586 for (i = 0; i < BLKG_RWSTAT_NR; i++)
587 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
500} 588}
501 589
502/** 590/**
@@ -511,39 +599,38 @@ static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat)
511static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, 599static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
512 int rw, uint64_t val) 600 int rw, uint64_t val)
513{ 601{
514 u64_stats_update_begin(&rwstat->syncp); 602 struct percpu_counter *cnt;
515 603
516 if (rw & REQ_WRITE) 604 if (rw & REQ_WRITE)
517 rwstat->cnt[BLKG_RWSTAT_WRITE] += val; 605 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
518 else 606 else
519 rwstat->cnt[BLKG_RWSTAT_READ] += val; 607 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
608
609 __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
610
520 if (rw & REQ_SYNC) 611 if (rw & REQ_SYNC)
521 rwstat->cnt[BLKG_RWSTAT_SYNC] += val; 612 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
522 else 613 else
523 rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; 614 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
524 615
525 u64_stats_update_end(&rwstat->syncp); 616 __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
526} 617}
527 618
528/** 619/**
529 * blkg_rwstat_read - read the current values of a blkg_rwstat 620 * blkg_rwstat_read - read the current values of a blkg_rwstat
530 * @rwstat: blkg_rwstat to read 621 * @rwstat: blkg_rwstat to read
531 * 622 *
532 * Read the current snapshot of @rwstat and return it as the return value. 623 * Read the current snapshot of @rwstat and return it in the aux counts.
533 * This function can be called without synchronization and takes care of
534 * u64 atomicity.
535 */ 624 */
536static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) 625static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
537{ 626{
538 unsigned int start; 627 struct blkg_rwstat result;
539 struct blkg_rwstat tmp; 628 int i;
540
541 do {
542 start = u64_stats_fetch_begin_irq(&rwstat->syncp);
543 tmp = *rwstat;
544 } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start));
545 629
546 return tmp; 630 for (i = 0; i < BLKG_RWSTAT_NR; i++)
631 atomic64_set(&result.aux_cnt[i],
632 percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
633 return result;
547} 634}
548 635
549/** 636/**
@@ -558,7 +645,8 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
558{ 645{
559 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); 646 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
560 647
561 return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; 648 return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
649 atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
562} 650}
563 651
564/** 652/**
@@ -567,26 +655,71 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
567 */ 655 */
568static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) 656static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
569{ 657{
570 memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); 658 int i;
659
660 for (i = 0; i < BLKG_RWSTAT_NR; i++) {
661 percpu_counter_set(&rwstat->cpu_cnt[i], 0);
662 atomic64_set(&rwstat->aux_cnt[i], 0);
663 }
571} 664}
572 665
573/** 666/**
574 * blkg_rwstat_merge - merge a blkg_rwstat into another 667 * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
575 * @to: the destination blkg_rwstat 668 * @to: the destination blkg_rwstat
576 * @from: the source 669 * @from: the source
577 * 670 *
578 * Add @from's counts to @to. 671 * Add @from's count including the aux one to @to's aux count.
579 */ 672 */
580static inline void blkg_rwstat_merge(struct blkg_rwstat *to, 673static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
581 struct blkg_rwstat *from) 674 struct blkg_rwstat *from)
582{ 675{
583 struct blkg_rwstat v = blkg_rwstat_read(from); 676 struct blkg_rwstat v = blkg_rwstat_read(from);
584 int i; 677 int i;
585 678
586 u64_stats_update_begin(&to->syncp);
587 for (i = 0; i < BLKG_RWSTAT_NR; i++) 679 for (i = 0; i < BLKG_RWSTAT_NR; i++)
588 to->cnt[i] += v.cnt[i]; 680 atomic64_add(atomic64_read(&v.aux_cnt[i]) +
589 u64_stats_update_end(&to->syncp); 681 atomic64_read(&from->aux_cnt[i]),
682 &to->aux_cnt[i]);
683}
684
685#ifdef CONFIG_BLK_DEV_THROTTLING
686extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
687 struct bio *bio);
688#else
689static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
690 struct bio *bio) { return false; }
691#endif
692
693static inline bool blkcg_bio_issue_check(struct request_queue *q,
694 struct bio *bio)
695{
696 struct blkcg *blkcg;
697 struct blkcg_gq *blkg;
698 bool throtl = false;
699
700 rcu_read_lock();
701 blkcg = bio_blkcg(bio);
702
703 blkg = blkg_lookup(blkcg, q);
704 if (unlikely(!blkg)) {
705 spin_lock_irq(q->queue_lock);
706 blkg = blkg_lookup_create(blkcg, q);
707 if (IS_ERR(blkg))
708 blkg = NULL;
709 spin_unlock_irq(q->queue_lock);
710 }
711
712 throtl = blk_throtl_bio(q, blkg, bio);
713
714 if (!throtl) {
715 blkg = blkg ?: q->root_blkg;
716 blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags,
717 bio->bi_iter.bi_size);
718 blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1);
719 }
720
721 rcu_read_unlock();
722 return !throtl;
590} 723}
591 724
592#else /* CONFIG_BLK_CGROUP */ 725#else /* CONFIG_BLK_CGROUP */
@@ -642,6 +775,9 @@ static inline void blk_put_rl(struct request_list *rl) { }
642static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } 775static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
643static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } 776static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
644 777
778static inline bool blkcg_bio_issue_check(struct request_queue *q,
779 struct bio *bio) { return true; }
780
645#define blk_queue_for_each_rl(rl, q) \ 781#define blk_queue_for_each_rl(rl, q) \
646 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) 782 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
647 783
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 1f36945fd23d..1a96fdaa33d5 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -27,7 +27,7 @@ SUBSYS(cpuacct)
27#endif 27#endif
28 28
29#if IS_ENABLED(CONFIG_BLK_CGROUP) 29#if IS_ENABLED(CONFIG_BLK_CGROUP)
30SUBSYS(blkio) 30SUBSYS(io)
31#endif 31#endif
32 32
33#if IS_ENABLED(CONFIG_MEMCG) 33#if IS_ENABLED(CONFIG_MEMCG)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 123be25ea15a..5d4e9c4b821d 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -266,6 +266,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
266} 266}
267 267
268int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); 268int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
269size_t kernfs_path_len(struct kernfs_node *kn);
269char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, 270char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
270 size_t buflen); 271 size_t buflen);
271void pr_cont_kernfs_name(struct kernfs_node *kn); 272void pr_cont_kernfs_name(struct kernfs_node *kn);
@@ -332,6 +333,9 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
332static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) 333static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
333{ return -ENOSYS; } 334{ return -ENOSYS; }
334 335
336static inline size_t kernfs_path_len(struct kernfs_node *kn)
337{ return 0; }
338
335static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, 339static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
336 size_t buflen) 340 size_t buflen)
337{ return NULL; } 341{ return NULL; }