aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 21:56:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 21:56:14 -0400
commitb0a1ea51bda4c2bcdde460221e1772f3a4f8c44f (patch)
tree9684c11b72718cd7e96e5eb93298690269ecf447 /include/linux
parent33e247c7e58d335d70ecb84fd869091e2e4b8dcb (diff)
parent69d7fde5909b614114343974cfc52cb8ff30b544 (diff)
Merge branch 'for-4.3/blkcg' of git://git.kernel.dk/linux-block
Pull blk-cg updates from Jens Axboe: "A bit later in the cycle, but this has been in the block tree for a a while. This is basically four patchsets from Tejun, that improve our buffered cgroup writeback. It was dependent on the other cgroup changes, but they went in earlier in this cycle. Series 1 is set of 5 patches that has cgroup writeback updates: - bdi_writeback iteration fix which could lead to some wb's being skipped or repeated during e.g. sync under memory pressure. - Simplification of wb work wait mechanism. - Writeback tracepoints updated to report cgroup. Series 2 is is a set of updates for the CFQ cgroup writeback handling: cfq has always charged all async IOs to the root cgroup. It didn't have much choice as writeback didn't know about cgroups and there was no way to tell who to blame for a given writeback IO. writeback finally grew support for cgroups and now tags each writeback IO with the appropriate cgroup to charge it against. This patchset updates cfq so that it follows the blkcg each bio is tagged with. Async cfq_queues are now shared across cfq_group, which is per-cgroup, instead of per-request_queue cfq_data. This makes all IOs follow the weight based IO resource distribution implemented by cfq. - Switched from GFP_ATOMIC to GFP_NOWAIT as suggested by Jeff. - Other misc review points addressed, acks added and rebased. Series 3 is the blkcg policy cleanup patches: This patchset contains assorted cleanups for blkcg_policy methods and blk[c]g_policy_data handling. - alloc/free added for blkg_policy_data. exit dropped. - alloc/free added for blkcg_policy_data. - blk-throttle's async percpu allocation is replaced with direct allocation. - all methods now take blk[c]g_policy_data instead of blkcg_gq or blkcg. And finally, series 4 is a set of patches cleaning up the blkcg stats handling: blkcg's stats have always been somwhat of a mess. This patchset tries to improve the situation a bit. - The following patches added to consolidate blkcg entry point and blkg creation. This is in itself is an improvement and helps colllecting common stats on bio issue. - per-blkg stats now accounted on bio issue rather than request completion so that bio based and request based drivers can behave the same way. The issue was spotted by Vivek. - cfq-iosched implements custom recursive stats and blk-throttle implements custom per-cpu stats. This patchset make blkcg core support both by default. - cfq-iosched and blk-throttle keep track of the same stats multiple times. Unify them" * 'for-4.3/blkcg' of git://git.kernel.dk/linux-block: (45 commits) blkcg: use CGROUP_WEIGHT_* scale for io.weight on the unified hierarchy blkcg: s/CFQ_WEIGHT_*/CFQ_WEIGHT_LEGACY_*/ blkcg: implement interface for the unified hierarchy blkcg: misc preparations for unified hierarchy interface blkcg: separate out tg_conf_updated() from tg_set_conf() blkcg: move body parsing from blkg_conf_prep() to its callers blkcg: mark existing cftypes as legacy blkcg: rename subsystem name from blkio to io blkcg: refine error codes returned during blkcg configuration blkcg: remove unnecessary NULL checks from __cfqg_set_weight_device() blkcg: reduce stack usage of blkg_rwstat_recursive_sum() blkcg: remove cfqg_stats->sectors blkcg: move io_service_bytes and io_serviced stats into blkcg_gq blkcg: make blkg_[rw]stat_recursive_sum() to be able to index into blkcg_gq blkcg: make blkcg_[rw]stat per-cpu blkcg: add blkg_[rw]stat->aux_cnt and replace cfq_group->dead_stats with it blkcg: consolidate blkg creation in blkcg_bio_issue_check() blk-throttle: improve queue bypass handling blkcg: move root blkg lookup optimization from throtl_lookup_tg() to __blkg_lookup() blkcg: inline [__]blkg_lookup() ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev.h26
-rw-r--r--include/linux/blk-cgroup.h340
-rw-r--r--include/linux/cgroup_subsys.h2
-rw-r--r--include/linux/kernfs.h4
4 files changed, 256 insertions, 116 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0fe9df983ab7..5a5d79ee256f 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -286,7 +286,7 @@ static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi
286 * %current's blkcg equals the effective blkcg of its memcg. No 286 * %current's blkcg equals the effective blkcg of its memcg. No
287 * need to use the relatively expensive cgroup_get_e_css(). 287 * need to use the relatively expensive cgroup_get_e_css().
288 */ 288 */
289 if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id))) 289 if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
290 return wb; 290 return wb;
291 return NULL; 291 return NULL;
292} 292}
@@ -402,7 +402,7 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
402} 402}
403 403
404struct wb_iter { 404struct wb_iter {
405 int start_blkcg_id; 405 int start_memcg_id;
406 struct radix_tree_iter tree_iter; 406 struct radix_tree_iter tree_iter;
407 void **slot; 407 void **slot;
408}; 408};
@@ -414,9 +414,9 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter,
414 414
415 WARN_ON_ONCE(!rcu_read_lock_held()); 415 WARN_ON_ONCE(!rcu_read_lock_held());
416 416
417 if (iter->start_blkcg_id >= 0) { 417 if (iter->start_memcg_id >= 0) {
418 iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id); 418 iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id);
419 iter->start_blkcg_id = -1; 419 iter->start_memcg_id = -1;
420 } else { 420 } else {
421 iter->slot = radix_tree_next_slot(iter->slot, titer, 0); 421 iter->slot = radix_tree_next_slot(iter->slot, titer, 0);
422 } 422 }
@@ -430,30 +430,30 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter,
430 430
431static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, 431static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter,
432 struct backing_dev_info *bdi, 432 struct backing_dev_info *bdi,
433 int start_blkcg_id) 433 int start_memcg_id)
434{ 434{
435 iter->start_blkcg_id = start_blkcg_id; 435 iter->start_memcg_id = start_memcg_id;
436 436
437 if (start_blkcg_id) 437 if (start_memcg_id)
438 return __wb_iter_next(iter, bdi); 438 return __wb_iter_next(iter, bdi);
439 else 439 else
440 return &bdi->wb; 440 return &bdi->wb;
441} 441}
442 442
443/** 443/**
444 * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order 444 * bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order
445 * @wb_cur: cursor struct bdi_writeback pointer 445 * @wb_cur: cursor struct bdi_writeback pointer
446 * @bdi: bdi to walk wb's of 446 * @bdi: bdi to walk wb's of
447 * @iter: pointer to struct wb_iter to be used as iteration buffer 447 * @iter: pointer to struct wb_iter to be used as iteration buffer
448 * @start_blkcg_id: blkcg ID to start iteration from 448 * @start_memcg_id: memcg ID to start iteration from
449 * 449 *
450 * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending 450 * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending
451 * blkcg ID order starting from @start_blkcg_id. @iter is struct wb_iter 451 * memcg ID order starting from @start_memcg_id. @iter is struct wb_iter
452 * to be used as temp storage during iteration. rcu_read_lock() must be 452 * to be used as temp storage during iteration. rcu_read_lock() must be
453 * held throughout iteration. 453 * held throughout iteration.
454 */ 454 */
455#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ 455#define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id) \
456 for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id); \ 456 for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id); \
457 (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) 457 (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi))
458 458
459#else /* CONFIG_CGROUP_WRITEBACK */ 459#else /* CONFIG_CGROUP_WRITEBACK */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index a4cd1641e9e2..0a5cc7a1109b 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -14,12 +14,15 @@
14 */ 14 */
15 15
16#include <linux/cgroup.h> 16#include <linux/cgroup.h>
17#include <linux/u64_stats_sync.h> 17#include <linux/percpu_counter.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/radix-tree.h> 19#include <linux/radix-tree.h>
20#include <linux/blkdev.h> 20#include <linux/blkdev.h>
21#include <linux/atomic.h> 21#include <linux/atomic.h>
22 22
23/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
24#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
25
23/* Max limits for throttle policy */ 26/* Max limits for throttle policy */
24#define THROTL_IOPS_MAX UINT_MAX 27#define THROTL_IOPS_MAX UINT_MAX
25 28
@@ -45,7 +48,7 @@ struct blkcg {
45 struct blkcg_gq *blkg_hint; 48 struct blkcg_gq *blkg_hint;
46 struct hlist_head blkg_list; 49 struct hlist_head blkg_list;
47 50
48 struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; 51 struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
49 52
50 struct list_head all_blkcgs_node; 53 struct list_head all_blkcgs_node;
51#ifdef CONFIG_CGROUP_WRITEBACK 54#ifdef CONFIG_CGROUP_WRITEBACK
@@ -53,14 +56,19 @@ struct blkcg {
53#endif 56#endif
54}; 57};
55 58
59/*
60 * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
61 * recursive. Used to carry stats of dead children, and, for blkg_rwstat,
62 * to carry result values from read and sum operations.
63 */
56struct blkg_stat { 64struct blkg_stat {
57 struct u64_stats_sync syncp; 65 struct percpu_counter cpu_cnt;
58 uint64_t cnt; 66 atomic64_t aux_cnt;
59}; 67};
60 68
61struct blkg_rwstat { 69struct blkg_rwstat {
62 struct u64_stats_sync syncp; 70 struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
63 uint64_t cnt[BLKG_RWSTAT_NR]; 71 atomic64_t aux_cnt[BLKG_RWSTAT_NR];
64}; 72};
65 73
66/* 74/*
@@ -68,32 +76,28 @@ struct blkg_rwstat {
68 * request_queue (q). This is used by blkcg policies which need to track 76 * request_queue (q). This is used by blkcg policies which need to track
69 * information per blkcg - q pair. 77 * information per blkcg - q pair.
70 * 78 *
71 * There can be multiple active blkcg policies and each has its private 79 * There can be multiple active blkcg policies and each blkg:policy pair is
72 * data on each blkg, the size of which is determined by 80 * represented by a blkg_policy_data which is allocated and freed by each
73 * blkcg_policy->pd_size. blkcg core allocates and frees such areas 81 * policy's pd_alloc/free_fn() methods. A policy can allocate private data
74 * together with blkg and invokes pd_init/exit_fn() methods. 82 * area by allocating larger data structure which embeds blkg_policy_data
75 * 83 * at the beginning.
76 * Such private data must embed struct blkg_policy_data (pd) at the
77 * beginning and pd_size can't be smaller than pd.
78 */ 84 */
79struct blkg_policy_data { 85struct blkg_policy_data {
80 /* the blkg and policy id this per-policy data belongs to */ 86 /* the blkg and policy id this per-policy data belongs to */
81 struct blkcg_gq *blkg; 87 struct blkcg_gq *blkg;
82 int plid; 88 int plid;
83
84 /* used during policy activation */
85 struct list_head alloc_node;
86}; 89};
87 90
88/* 91/*
89 * Policies that need to keep per-blkcg data which is independent 92 * Policies that need to keep per-blkcg data which is independent from any
90 * from any request_queue associated to it must specify its size 93 * request_queue associated to it should implement cpd_alloc/free_fn()
91 * with the cpd_size field of the blkcg_policy structure and 94 * methods. A policy can allocate private data area by allocating larger
92 * embed a blkcg_policy_data in it. cpd_init() is invoked to let 95 * data structure which embeds blkcg_policy_data at the beginning.
93 * each policy handle per-blkcg data. 96 * cpd_init() is invoked to let each policy handle per-blkcg data.
94 */ 97 */
95struct blkcg_policy_data { 98struct blkcg_policy_data {
96 /* the policy id this per-policy data belongs to */ 99 /* the blkcg and policy id this per-policy data belongs to */
100 struct blkcg *blkcg;
97 int plid; 101 int plid;
98}; 102};
99 103
@@ -123,40 +127,50 @@ struct blkcg_gq {
123 /* is this blkg online? protected by both blkcg and q locks */ 127 /* is this blkg online? protected by both blkcg and q locks */
124 bool online; 128 bool online;
125 129
130 struct blkg_rwstat stat_bytes;
131 struct blkg_rwstat stat_ios;
132
126 struct blkg_policy_data *pd[BLKCG_MAX_POLS]; 133 struct blkg_policy_data *pd[BLKCG_MAX_POLS];
127 134
128 struct rcu_head rcu_head; 135 struct rcu_head rcu_head;
129}; 136};
130 137
131typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg); 138typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
132typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); 139typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
133typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); 140typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
134typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); 141typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
135typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); 142typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
136typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); 143typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
144typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
145typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
146typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
147typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
137 148
138struct blkcg_policy { 149struct blkcg_policy {
139 int plid; 150 int plid;
140 /* policy specific private data size */
141 size_t pd_size;
142 /* policy specific per-blkcg data size */
143 size_t cpd_size;
144 /* cgroup files for the policy */ 151 /* cgroup files for the policy */
145 struct cftype *cftypes; 152 struct cftype *dfl_cftypes;
153 struct cftype *legacy_cftypes;
146 154
147 /* operations */ 155 /* operations */
156 blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
148 blkcg_pol_init_cpd_fn *cpd_init_fn; 157 blkcg_pol_init_cpd_fn *cpd_init_fn;
158 blkcg_pol_free_cpd_fn *cpd_free_fn;
159 blkcg_pol_bind_cpd_fn *cpd_bind_fn;
160
161 blkcg_pol_alloc_pd_fn *pd_alloc_fn;
149 blkcg_pol_init_pd_fn *pd_init_fn; 162 blkcg_pol_init_pd_fn *pd_init_fn;
150 blkcg_pol_online_pd_fn *pd_online_fn; 163 blkcg_pol_online_pd_fn *pd_online_fn;
151 blkcg_pol_offline_pd_fn *pd_offline_fn; 164 blkcg_pol_offline_pd_fn *pd_offline_fn;
152 blkcg_pol_exit_pd_fn *pd_exit_fn; 165 blkcg_pol_free_pd_fn *pd_free_fn;
153 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; 166 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
154}; 167};
155 168
156extern struct blkcg blkcg_root; 169extern struct blkcg blkcg_root;
157extern struct cgroup_subsys_state * const blkcg_root_css; 170extern struct cgroup_subsys_state * const blkcg_root_css;
158 171
159struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); 172struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
173 struct request_queue *q, bool update_hint);
160struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, 174struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
161 struct request_queue *q); 175 struct request_queue *q);
162int blkcg_init_queue(struct request_queue *q); 176int blkcg_init_queue(struct request_queue *q);
@@ -171,6 +185,7 @@ int blkcg_activate_policy(struct request_queue *q,
171void blkcg_deactivate_policy(struct request_queue *q, 185void blkcg_deactivate_policy(struct request_queue *q,
172 const struct blkcg_policy *pol); 186 const struct blkcg_policy *pol);
173 187
188const char *blkg_dev_name(struct blkcg_gq *blkg);
174void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, 189void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
175 u64 (*prfill)(struct seq_file *, 190 u64 (*prfill)(struct seq_file *,
176 struct blkg_policy_data *, int), 191 struct blkg_policy_data *, int),
@@ -182,19 +197,24 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
182u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); 197u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
183u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 198u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
184 int off); 199 int off);
200int blkg_print_stat_bytes(struct seq_file *sf, void *v);
201int blkg_print_stat_ios(struct seq_file *sf, void *v);
202int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
203int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
185 204
186u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); 205u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
187struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, 206 struct blkcg_policy *pol, int off);
188 int off); 207struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
208 struct blkcg_policy *pol, int off);
189 209
190struct blkg_conf_ctx { 210struct blkg_conf_ctx {
191 struct gendisk *disk; 211 struct gendisk *disk;
192 struct blkcg_gq *blkg; 212 struct blkcg_gq *blkg;
193 u64 v; 213 char *body;
194}; 214};
195 215
196int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 216int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
197 const char *input, struct blkg_conf_ctx *ctx); 217 char *input, struct blkg_conf_ctx *ctx);
198void blkg_conf_finish(struct blkg_conf_ctx *ctx); 218void blkg_conf_finish(struct blkg_conf_ctx *ctx);
199 219
200 220
@@ -205,7 +225,7 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
205 225
206static inline struct blkcg *task_blkcg(struct task_struct *tsk) 226static inline struct blkcg *task_blkcg(struct task_struct *tsk)
207{ 227{
208 return css_to_blkcg(task_css(tsk, blkio_cgrp_id)); 228 return css_to_blkcg(task_css(tsk, io_cgrp_id));
209} 229}
210 230
211static inline struct blkcg *bio_blkcg(struct bio *bio) 231static inline struct blkcg *bio_blkcg(struct bio *bio)
@@ -218,7 +238,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
218static inline struct cgroup_subsys_state * 238static inline struct cgroup_subsys_state *
219task_get_blkcg_css(struct task_struct *task) 239task_get_blkcg_css(struct task_struct *task)
220{ 240{
221 return task_get_css(task, blkio_cgrp_id); 241 return task_get_css(task, io_cgrp_id);
222} 242}
223 243
224/** 244/**
@@ -233,6 +253,52 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
233} 253}
234 254
235/** 255/**
256 * __blkg_lookup - internal version of blkg_lookup()
257 * @blkcg: blkcg of interest
258 * @q: request_queue of interest
259 * @update_hint: whether to update lookup hint with the result or not
260 *
261 * This is internal version and shouldn't be used by policy
262 * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
263 * @q's bypass state. If @update_hint is %true, the caller should be
264 * holding @q->queue_lock and lookup hint is updated on success.
265 */
266static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
267 struct request_queue *q,
268 bool update_hint)
269{
270 struct blkcg_gq *blkg;
271
272 if (blkcg == &blkcg_root)
273 return q->root_blkg;
274
275 blkg = rcu_dereference(blkcg->blkg_hint);
276 if (blkg && blkg->q == q)
277 return blkg;
278
279 return blkg_lookup_slowpath(blkcg, q, update_hint);
280}
281
282/**
283 * blkg_lookup - lookup blkg for the specified blkcg - q pair
284 * @blkcg: blkcg of interest
285 * @q: request_queue of interest
286 *
287 * Lookup blkg for the @blkcg - @q pair. This function should be called
288 * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
289 * - see blk_queue_bypass_start() for details.
290 */
291static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
292 struct request_queue *q)
293{
294 WARN_ON_ONCE(!rcu_read_lock_held());
295
296 if (unlikely(blk_queue_bypass(q)))
297 return NULL;
298 return __blkg_lookup(blkcg, q, false);
299}
300
301/**
236 * blkg_to_pdata - get policy private data 302 * blkg_to_pdata - get policy private data
237 * @blkg: blkg of interest 303 * @blkg: blkg of interest
238 * @pol: policy of interest 304 * @pol: policy of interest
@@ -248,7 +314,7 @@ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
248static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, 314static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
249 struct blkcg_policy *pol) 315 struct blkcg_policy *pol)
250{ 316{
251 return blkcg ? blkcg->pd[pol->plid] : NULL; 317 return blkcg ? blkcg->cpd[pol->plid] : NULL;
252} 318}
253 319
254/** 320/**
@@ -262,6 +328,11 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
262 return pd ? pd->blkg : NULL; 328 return pd ? pd->blkg : NULL;
263} 329}
264 330
331static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
332{
333 return cpd ? cpd->blkcg : NULL;
334}
335
265/** 336/**
266 * blkg_path - format cgroup path of blkg 337 * blkg_path - format cgroup path of blkg
267 * @blkg: blkg of interest 338 * @blkg: blkg of interest
@@ -309,9 +380,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
309 call_rcu(&blkg->rcu_head, __blkg_release_rcu); 380 call_rcu(&blkg->rcu_head, __blkg_release_rcu);
310} 381}
311 382
312struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
313 bool update_hint);
314
315/** 383/**
316 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants 384 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
317 * @d_blkg: loop cursor pointing to the current descendant 385 * @d_blkg: loop cursor pointing to the current descendant
@@ -373,8 +441,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
373 * or if either the blkcg or queue is going away. Fall back to 441 * or if either the blkcg or queue is going away. Fall back to
374 * root_rl in such cases. 442 * root_rl in such cases.
375 */ 443 */
376 blkg = blkg_lookup_create(blkcg, q); 444 blkg = blkg_lookup(blkcg, q);
377 if (IS_ERR(blkg)) 445 if (unlikely(!blkg))
378 goto root_rl; 446 goto root_rl;
379 447
380 blkg_get(blkg); 448 blkg_get(blkg);
@@ -394,8 +462,7 @@ root_rl:
394 */ 462 */
395static inline void blk_put_rl(struct request_list *rl) 463static inline void blk_put_rl(struct request_list *rl)
396{ 464{
397 /* root_rl may not have blkg set */ 465 if (rl->blkg->blkcg != &blkcg_root)
398 if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
399 blkg_put(rl->blkg); 466 blkg_put(rl->blkg);
400} 467}
401 468
@@ -433,9 +500,21 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl,
433#define blk_queue_for_each_rl(rl, q) \ 500#define blk_queue_for_each_rl(rl, q) \
434 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) 501 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
435 502
436static inline void blkg_stat_init(struct blkg_stat *stat) 503static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
437{ 504{
438 u64_stats_init(&stat->syncp); 505 int ret;
506
507 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
508 if (ret)
509 return ret;
510
511 atomic64_set(&stat->aux_cnt, 0);
512 return 0;
513}
514
515static inline void blkg_stat_exit(struct blkg_stat *stat)
516{
517 percpu_counter_destroy(&stat->cpu_cnt);
439} 518}
440 519
441/** 520/**
@@ -443,34 +522,21 @@ static inline void blkg_stat_init(struct blkg_stat *stat)
443 * @stat: target blkg_stat 522 * @stat: target blkg_stat
444 * @val: value to add 523 * @val: value to add
445 * 524 *
446 * Add @val to @stat. The caller is responsible for synchronizing calls to 525 * Add @val to @stat. The caller must ensure that IRQ on the same CPU
447 * this function. 526 * don't re-enter this function for the same counter.
448 */ 527 */
449static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) 528static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
450{ 529{
451 u64_stats_update_begin(&stat->syncp); 530 __percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
452 stat->cnt += val;
453 u64_stats_update_end(&stat->syncp);
454} 531}
455 532
456/** 533/**
457 * blkg_stat_read - read the current value of a blkg_stat 534 * blkg_stat_read - read the current value of a blkg_stat
458 * @stat: blkg_stat to read 535 * @stat: blkg_stat to read
459 *
460 * Read the current value of @stat. This function can be called without
461 * synchroniztion and takes care of u64 atomicity.
462 */ 536 */
463static inline uint64_t blkg_stat_read(struct blkg_stat *stat) 537static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
464{ 538{
465 unsigned int start; 539 return percpu_counter_sum_positive(&stat->cpu_cnt);
466 uint64_t v;
467
468 do {
469 start = u64_stats_fetch_begin_irq(&stat->syncp);
470 v = stat->cnt;
471 } while (u64_stats_fetch_retry_irq(&stat->syncp, start));
472
473 return v;
474} 540}
475 541
476/** 542/**
@@ -479,24 +545,46 @@ static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
479 */ 545 */
480static inline void blkg_stat_reset(struct blkg_stat *stat) 546static inline void blkg_stat_reset(struct blkg_stat *stat)
481{ 547{
482 stat->cnt = 0; 548 percpu_counter_set(&stat->cpu_cnt, 0);
549 atomic64_set(&stat->aux_cnt, 0);
483} 550}
484 551
485/** 552/**
486 * blkg_stat_merge - merge a blkg_stat into another 553 * blkg_stat_add_aux - add a blkg_stat into another's aux count
487 * @to: the destination blkg_stat 554 * @to: the destination blkg_stat
488 * @from: the source 555 * @from: the source
489 * 556 *
490 * Add @from's count to @to. 557 * Add @from's count including the aux one to @to's aux count.
491 */ 558 */
492static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) 559static inline void blkg_stat_add_aux(struct blkg_stat *to,
560 struct blkg_stat *from)
493{ 561{
494 blkg_stat_add(to, blkg_stat_read(from)); 562 atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
563 &to->aux_cnt);
495} 564}
496 565
497static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) 566static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
498{ 567{
499 u64_stats_init(&rwstat->syncp); 568 int i, ret;
569
570 for (i = 0; i < BLKG_RWSTAT_NR; i++) {
571 ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
572 if (ret) {
573 while (--i >= 0)
574 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
575 return ret;
576 }
577 atomic64_set(&rwstat->aux_cnt[i], 0);
578 }
579 return 0;
580}
581
582static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
583{
584 int i;
585
586 for (i = 0; i < BLKG_RWSTAT_NR; i++)
587 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
500} 588}
501 589
502/** 590/**
@@ -511,39 +599,38 @@ static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat)
511static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, 599static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
512 int rw, uint64_t val) 600 int rw, uint64_t val)
513{ 601{
514 u64_stats_update_begin(&rwstat->syncp); 602 struct percpu_counter *cnt;
515 603
516 if (rw & REQ_WRITE) 604 if (rw & REQ_WRITE)
517 rwstat->cnt[BLKG_RWSTAT_WRITE] += val; 605 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
518 else 606 else
519 rwstat->cnt[BLKG_RWSTAT_READ] += val; 607 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
608
609 __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
610
520 if (rw & REQ_SYNC) 611 if (rw & REQ_SYNC)
521 rwstat->cnt[BLKG_RWSTAT_SYNC] += val; 612 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
522 else 613 else
523 rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; 614 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
524 615
525 u64_stats_update_end(&rwstat->syncp); 616 __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
526} 617}
527 618
528/** 619/**
529 * blkg_rwstat_read - read the current values of a blkg_rwstat 620 * blkg_rwstat_read - read the current values of a blkg_rwstat
530 * @rwstat: blkg_rwstat to read 621 * @rwstat: blkg_rwstat to read
531 * 622 *
532 * Read the current snapshot of @rwstat and return it as the return value. 623 * Read the current snapshot of @rwstat and return it in the aux counts.
533 * This function can be called without synchronization and takes care of
534 * u64 atomicity.
535 */ 624 */
536static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) 625static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
537{ 626{
538 unsigned int start; 627 struct blkg_rwstat result;
539 struct blkg_rwstat tmp; 628 int i;
540
541 do {
542 start = u64_stats_fetch_begin_irq(&rwstat->syncp);
543 tmp = *rwstat;
544 } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start));
545 629
546 return tmp; 630 for (i = 0; i < BLKG_RWSTAT_NR; i++)
631 atomic64_set(&result.aux_cnt[i],
632 percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
633 return result;
547} 634}
548 635
549/** 636/**
@@ -558,7 +645,8 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
558{ 645{
559 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); 646 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
560 647
561 return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; 648 return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
649 atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
562} 650}
563 651
564/** 652/**
@@ -567,26 +655,71 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
567 */ 655 */
568static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) 656static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
569{ 657{
570 memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); 658 int i;
659
660 for (i = 0; i < BLKG_RWSTAT_NR; i++) {
661 percpu_counter_set(&rwstat->cpu_cnt[i], 0);
662 atomic64_set(&rwstat->aux_cnt[i], 0);
663 }
571} 664}
572 665
573/** 666/**
574 * blkg_rwstat_merge - merge a blkg_rwstat into another 667 * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
575 * @to: the destination blkg_rwstat 668 * @to: the destination blkg_rwstat
576 * @from: the source 669 * @from: the source
577 * 670 *
578 * Add @from's counts to @to. 671 * Add @from's count including the aux one to @to's aux count.
579 */ 672 */
580static inline void blkg_rwstat_merge(struct blkg_rwstat *to, 673static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
581 struct blkg_rwstat *from) 674 struct blkg_rwstat *from)
582{ 675{
583 struct blkg_rwstat v = blkg_rwstat_read(from); 676 struct blkg_rwstat v = blkg_rwstat_read(from);
584 int i; 677 int i;
585 678
586 u64_stats_update_begin(&to->syncp);
587 for (i = 0; i < BLKG_RWSTAT_NR; i++) 679 for (i = 0; i < BLKG_RWSTAT_NR; i++)
588 to->cnt[i] += v.cnt[i]; 680 atomic64_add(atomic64_read(&v.aux_cnt[i]) +
589 u64_stats_update_end(&to->syncp); 681 atomic64_read(&from->aux_cnt[i]),
682 &to->aux_cnt[i]);
683}
684
685#ifdef CONFIG_BLK_DEV_THROTTLING
686extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
687 struct bio *bio);
688#else
689static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
690 struct bio *bio) { return false; }
691#endif
692
693static inline bool blkcg_bio_issue_check(struct request_queue *q,
694 struct bio *bio)
695{
696 struct blkcg *blkcg;
697 struct blkcg_gq *blkg;
698 bool throtl = false;
699
700 rcu_read_lock();
701 blkcg = bio_blkcg(bio);
702
703 blkg = blkg_lookup(blkcg, q);
704 if (unlikely(!blkg)) {
705 spin_lock_irq(q->queue_lock);
706 blkg = blkg_lookup_create(blkcg, q);
707 if (IS_ERR(blkg))
708 blkg = NULL;
709 spin_unlock_irq(q->queue_lock);
710 }
711
712 throtl = blk_throtl_bio(q, blkg, bio);
713
714 if (!throtl) {
715 blkg = blkg ?: q->root_blkg;
716 blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags,
717 bio->bi_iter.bi_size);
718 blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1);
719 }
720
721 rcu_read_unlock();
722 return !throtl;
590} 723}
591 724
592#else /* CONFIG_BLK_CGROUP */ 725#else /* CONFIG_BLK_CGROUP */
@@ -642,6 +775,9 @@ static inline void blk_put_rl(struct request_list *rl) { }
642static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } 775static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
643static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } 776static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
644 777
778static inline bool blkcg_bio_issue_check(struct request_queue *q,
779 struct bio *bio) { return true; }
780
645#define blk_queue_for_each_rl(rl, q) \ 781#define blk_queue_for_each_rl(rl, q) \
646 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) 782 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
647 783
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 1f36945fd23d..1a96fdaa33d5 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -27,7 +27,7 @@ SUBSYS(cpuacct)
27#endif 27#endif
28 28
29#if IS_ENABLED(CONFIG_BLK_CGROUP) 29#if IS_ENABLED(CONFIG_BLK_CGROUP)
30SUBSYS(blkio) 30SUBSYS(io)
31#endif 31#endif
32 32
33#if IS_ENABLED(CONFIG_MEMCG) 33#if IS_ENABLED(CONFIG_MEMCG)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 123be25ea15a..5d4e9c4b821d 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -266,6 +266,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
266} 266}
267 267
268int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); 268int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
269size_t kernfs_path_len(struct kernfs_node *kn);
269char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, 270char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
270 size_t buflen); 271 size_t buflen);
271void pr_cont_kernfs_name(struct kernfs_node *kn); 272void pr_cont_kernfs_name(struct kernfs_node *kn);
@@ -332,6 +333,9 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
332static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) 333static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
333{ return -ENOSYS; } 334{ return -ENOSYS; }
334 335
336static inline size_t kernfs_path_len(struct kernfs_node *kn)
337{ return 0; }
338
335static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, 339static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
336 size_t buflen) 340 size_t buflen)
337{ return NULL; } 341{ return NULL; }