aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/linux/backing-dev.h26
-rw-r--r--include/linux/blk-cgroup.h340
-rw-r--r--include/linux/cgroup_subsys.h2
-rw-r--r--include/linux/kernfs.h4
-rw-r--r--include/trace/events/writeback.h180
5 files changed, 397 insertions, 155 deletions
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 0fe9df983ab7..5a5d79ee256f 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -286,7 +286,7 @@ static inline struct bdi_writeback *wb_find_current(struct backing_dev_info *bdi
286 * %current's blkcg equals the effective blkcg of its memcg. No 286 * %current's blkcg equals the effective blkcg of its memcg. No
287 * need to use the relatively expensive cgroup_get_e_css(). 287 * need to use the relatively expensive cgroup_get_e_css().
288 */ 288 */
289 if (likely(wb && wb->blkcg_css == task_css(current, blkio_cgrp_id))) 289 if (likely(wb && wb->blkcg_css == task_css(current, io_cgrp_id)))
290 return wb; 290 return wb;
291 return NULL; 291 return NULL;
292} 292}
@@ -402,7 +402,7 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked)
402} 402}
403 403
404struct wb_iter { 404struct wb_iter {
405 int start_blkcg_id; 405 int start_memcg_id;
406 struct radix_tree_iter tree_iter; 406 struct radix_tree_iter tree_iter;
407 void **slot; 407 void **slot;
408}; 408};
@@ -414,9 +414,9 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter,
414 414
415 WARN_ON_ONCE(!rcu_read_lock_held()); 415 WARN_ON_ONCE(!rcu_read_lock_held());
416 416
417 if (iter->start_blkcg_id >= 0) { 417 if (iter->start_memcg_id >= 0) {
418 iter->slot = radix_tree_iter_init(titer, iter->start_blkcg_id); 418 iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id);
419 iter->start_blkcg_id = -1; 419 iter->start_memcg_id = -1;
420 } else { 420 } else {
421 iter->slot = radix_tree_next_slot(iter->slot, titer, 0); 421 iter->slot = radix_tree_next_slot(iter->slot, titer, 0);
422 } 422 }
@@ -430,30 +430,30 @@ static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter,
430 430
431static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, 431static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter,
432 struct backing_dev_info *bdi, 432 struct backing_dev_info *bdi,
433 int start_blkcg_id) 433 int start_memcg_id)
434{ 434{
435 iter->start_blkcg_id = start_blkcg_id; 435 iter->start_memcg_id = start_memcg_id;
436 436
437 if (start_blkcg_id) 437 if (start_memcg_id)
438 return __wb_iter_next(iter, bdi); 438 return __wb_iter_next(iter, bdi);
439 else 439 else
440 return &bdi->wb; 440 return &bdi->wb;
441} 441}
442 442
443/** 443/**
444 * bdi_for_each_wb - walk all wb's of a bdi in ascending blkcg ID order 444 * bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order
445 * @wb_cur: cursor struct bdi_writeback pointer 445 * @wb_cur: cursor struct bdi_writeback pointer
446 * @bdi: bdi to walk wb's of 446 * @bdi: bdi to walk wb's of
447 * @iter: pointer to struct wb_iter to be used as iteration buffer 447 * @iter: pointer to struct wb_iter to be used as iteration buffer
448 * @start_blkcg_id: blkcg ID to start iteration from 448 * @start_memcg_id: memcg ID to start iteration from
449 * 449 *
450 * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending 450 * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending
451 * blkcg ID order starting from @start_blkcg_id. @iter is struct wb_iter 451 * memcg ID order starting from @start_memcg_id. @iter is struct wb_iter
452 * to be used as temp storage during iteration. rcu_read_lock() must be 452 * to be used as temp storage during iteration. rcu_read_lock() must be
453 * held throughout iteration. 453 * held throughout iteration.
454 */ 454 */
455#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ 455#define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id) \
456 for ((wb_cur) = __wb_iter_init(iter, bdi, start_blkcg_id); \ 456 for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id); \
457 (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) 457 (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi))
458 458
459#else /* CONFIG_CGROUP_WRITEBACK */ 459#else /* CONFIG_CGROUP_WRITEBACK */
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index a4cd1641e9e2..0a5cc7a1109b 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -14,12 +14,15 @@
14 */ 14 */
15 15
16#include <linux/cgroup.h> 16#include <linux/cgroup.h>
17#include <linux/u64_stats_sync.h> 17#include <linux/percpu_counter.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/radix-tree.h> 19#include <linux/radix-tree.h>
20#include <linux/blkdev.h> 20#include <linux/blkdev.h>
21#include <linux/atomic.h> 21#include <linux/atomic.h>
22 22
23/* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */
24#define BLKG_STAT_CPU_BATCH (INT_MAX / 2)
25
23/* Max limits for throttle policy */ 26/* Max limits for throttle policy */
24#define THROTL_IOPS_MAX UINT_MAX 27#define THROTL_IOPS_MAX UINT_MAX
25 28
@@ -45,7 +48,7 @@ struct blkcg {
45 struct blkcg_gq *blkg_hint; 48 struct blkcg_gq *blkg_hint;
46 struct hlist_head blkg_list; 49 struct hlist_head blkg_list;
47 50
48 struct blkcg_policy_data *pd[BLKCG_MAX_POLS]; 51 struct blkcg_policy_data *cpd[BLKCG_MAX_POLS];
49 52
50 struct list_head all_blkcgs_node; 53 struct list_head all_blkcgs_node;
51#ifdef CONFIG_CGROUP_WRITEBACK 54#ifdef CONFIG_CGROUP_WRITEBACK
@@ -53,14 +56,19 @@ struct blkcg {
53#endif 56#endif
54}; 57};
55 58
59/*
60 * blkg_[rw]stat->aux_cnt is excluded for local stats but included for
61 * recursive. Used to carry stats of dead children, and, for blkg_rwstat,
62 * to carry result values from read and sum operations.
63 */
56struct blkg_stat { 64struct blkg_stat {
57 struct u64_stats_sync syncp; 65 struct percpu_counter cpu_cnt;
58 uint64_t cnt; 66 atomic64_t aux_cnt;
59}; 67};
60 68
61struct blkg_rwstat { 69struct blkg_rwstat {
62 struct u64_stats_sync syncp; 70 struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR];
63 uint64_t cnt[BLKG_RWSTAT_NR]; 71 atomic64_t aux_cnt[BLKG_RWSTAT_NR];
64}; 72};
65 73
66/* 74/*
@@ -68,32 +76,28 @@ struct blkg_rwstat {
68 * request_queue (q). This is used by blkcg policies which need to track 76 * request_queue (q). This is used by blkcg policies which need to track
69 * information per blkcg - q pair. 77 * information per blkcg - q pair.
70 * 78 *
71 * There can be multiple active blkcg policies and each has its private 79 * There can be multiple active blkcg policies and each blkg:policy pair is
72 * data on each blkg, the size of which is determined by 80 * represented by a blkg_policy_data which is allocated and freed by each
73 * blkcg_policy->pd_size. blkcg core allocates and frees such areas 81 * policy's pd_alloc/free_fn() methods. A policy can allocate private data
74 * together with blkg and invokes pd_init/exit_fn() methods. 82 * area by allocating larger data structure which embeds blkg_policy_data
75 * 83 * at the beginning.
76 * Such private data must embed struct blkg_policy_data (pd) at the
77 * beginning and pd_size can't be smaller than pd.
78 */ 84 */
79struct blkg_policy_data { 85struct blkg_policy_data {
80 /* the blkg and policy id this per-policy data belongs to */ 86 /* the blkg and policy id this per-policy data belongs to */
81 struct blkcg_gq *blkg; 87 struct blkcg_gq *blkg;
82 int plid; 88 int plid;
83
84 /* used during policy activation */
85 struct list_head alloc_node;
86}; 89};
87 90
88/* 91/*
89 * Policies that need to keep per-blkcg data which is independent 92 * Policies that need to keep per-blkcg data which is independent from any
90 * from any request_queue associated to it must specify its size 93 * request_queue associated to it should implement cpd_alloc/free_fn()
91 * with the cpd_size field of the blkcg_policy structure and 94 * methods. A policy can allocate private data area by allocating larger
92 * embed a blkcg_policy_data in it. cpd_init() is invoked to let 95 * data structure which embeds blkcg_policy_data at the beginning.
93 * each policy handle per-blkcg data. 96 * cpd_init() is invoked to let each policy handle per-blkcg data.
94 */ 97 */
95struct blkcg_policy_data { 98struct blkcg_policy_data {
96 /* the policy id this per-policy data belongs to */ 99 /* the blkcg and policy id this per-policy data belongs to */
100 struct blkcg *blkcg;
97 int plid; 101 int plid;
98}; 102};
99 103
@@ -123,40 +127,50 @@ struct blkcg_gq {
123 /* is this blkg online? protected by both blkcg and q locks */ 127 /* is this blkg online? protected by both blkcg and q locks */
124 bool online; 128 bool online;
125 129
130 struct blkg_rwstat stat_bytes;
131 struct blkg_rwstat stat_ios;
132
126 struct blkg_policy_data *pd[BLKCG_MAX_POLS]; 133 struct blkg_policy_data *pd[BLKCG_MAX_POLS];
127 134
128 struct rcu_head rcu_head; 135 struct rcu_head rcu_head;
129}; 136};
130 137
131typedef void (blkcg_pol_init_cpd_fn)(const struct blkcg *blkcg); 138typedef struct blkcg_policy_data *(blkcg_pol_alloc_cpd_fn)(gfp_t gfp);
132typedef void (blkcg_pol_init_pd_fn)(struct blkcg_gq *blkg); 139typedef void (blkcg_pol_init_cpd_fn)(struct blkcg_policy_data *cpd);
133typedef void (blkcg_pol_online_pd_fn)(struct blkcg_gq *blkg); 140typedef void (blkcg_pol_free_cpd_fn)(struct blkcg_policy_data *cpd);
134typedef void (blkcg_pol_offline_pd_fn)(struct blkcg_gq *blkg); 141typedef void (blkcg_pol_bind_cpd_fn)(struct blkcg_policy_data *cpd);
135typedef void (blkcg_pol_exit_pd_fn)(struct blkcg_gq *blkg); 142typedef struct blkg_policy_data *(blkcg_pol_alloc_pd_fn)(gfp_t gfp, int node);
136typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkcg_gq *blkg); 143typedef void (blkcg_pol_init_pd_fn)(struct blkg_policy_data *pd);
144typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd);
145typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd);
146typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd);
147typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd);
137 148
138struct blkcg_policy { 149struct blkcg_policy {
139 int plid; 150 int plid;
140 /* policy specific private data size */
141 size_t pd_size;
142 /* policy specific per-blkcg data size */
143 size_t cpd_size;
144 /* cgroup files for the policy */ 151 /* cgroup files for the policy */
145 struct cftype *cftypes; 152 struct cftype *dfl_cftypes;
153 struct cftype *legacy_cftypes;
146 154
147 /* operations */ 155 /* operations */
156 blkcg_pol_alloc_cpd_fn *cpd_alloc_fn;
148 blkcg_pol_init_cpd_fn *cpd_init_fn; 157 blkcg_pol_init_cpd_fn *cpd_init_fn;
158 blkcg_pol_free_cpd_fn *cpd_free_fn;
159 blkcg_pol_bind_cpd_fn *cpd_bind_fn;
160
161 blkcg_pol_alloc_pd_fn *pd_alloc_fn;
149 blkcg_pol_init_pd_fn *pd_init_fn; 162 blkcg_pol_init_pd_fn *pd_init_fn;
150 blkcg_pol_online_pd_fn *pd_online_fn; 163 blkcg_pol_online_pd_fn *pd_online_fn;
151 blkcg_pol_offline_pd_fn *pd_offline_fn; 164 blkcg_pol_offline_pd_fn *pd_offline_fn;
152 blkcg_pol_exit_pd_fn *pd_exit_fn; 165 blkcg_pol_free_pd_fn *pd_free_fn;
153 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn; 166 blkcg_pol_reset_pd_stats_fn *pd_reset_stats_fn;
154}; 167};
155 168
156extern struct blkcg blkcg_root; 169extern struct blkcg blkcg_root;
157extern struct cgroup_subsys_state * const blkcg_root_css; 170extern struct cgroup_subsys_state * const blkcg_root_css;
158 171
159struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q); 172struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
173 struct request_queue *q, bool update_hint);
160struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, 174struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
161 struct request_queue *q); 175 struct request_queue *q);
162int blkcg_init_queue(struct request_queue *q); 176int blkcg_init_queue(struct request_queue *q);
@@ -171,6 +185,7 @@ int blkcg_activate_policy(struct request_queue *q,
171void blkcg_deactivate_policy(struct request_queue *q, 185void blkcg_deactivate_policy(struct request_queue *q,
172 const struct blkcg_policy *pol); 186 const struct blkcg_policy *pol);
173 187
188const char *blkg_dev_name(struct blkcg_gq *blkg);
174void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, 189void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
175 u64 (*prfill)(struct seq_file *, 190 u64 (*prfill)(struct seq_file *,
176 struct blkg_policy_data *, int), 191 struct blkg_policy_data *, int),
@@ -182,19 +197,24 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
182u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); 197u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off);
183u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, 198u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd,
184 int off); 199 int off);
200int blkg_print_stat_bytes(struct seq_file *sf, void *v);
201int blkg_print_stat_ios(struct seq_file *sf, void *v);
202int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v);
203int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v);
185 204
186u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off); 205u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg,
187struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, 206 struct blkcg_policy *pol, int off);
188 int off); 207struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg,
208 struct blkcg_policy *pol, int off);
189 209
190struct blkg_conf_ctx { 210struct blkg_conf_ctx {
191 struct gendisk *disk; 211 struct gendisk *disk;
192 struct blkcg_gq *blkg; 212 struct blkcg_gq *blkg;
193 u64 v; 213 char *body;
194}; 214};
195 215
196int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, 216int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
197 const char *input, struct blkg_conf_ctx *ctx); 217 char *input, struct blkg_conf_ctx *ctx);
198void blkg_conf_finish(struct blkg_conf_ctx *ctx); 218void blkg_conf_finish(struct blkg_conf_ctx *ctx);
199 219
200 220
@@ -205,7 +225,7 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
205 225
206static inline struct blkcg *task_blkcg(struct task_struct *tsk) 226static inline struct blkcg *task_blkcg(struct task_struct *tsk)
207{ 227{
208 return css_to_blkcg(task_css(tsk, blkio_cgrp_id)); 228 return css_to_blkcg(task_css(tsk, io_cgrp_id));
209} 229}
210 230
211static inline struct blkcg *bio_blkcg(struct bio *bio) 231static inline struct blkcg *bio_blkcg(struct bio *bio)
@@ -218,7 +238,7 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
218static inline struct cgroup_subsys_state * 238static inline struct cgroup_subsys_state *
219task_get_blkcg_css(struct task_struct *task) 239task_get_blkcg_css(struct task_struct *task)
220{ 240{
221 return task_get_css(task, blkio_cgrp_id); 241 return task_get_css(task, io_cgrp_id);
222} 242}
223 243
224/** 244/**
@@ -233,6 +253,52 @@ static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
233} 253}
234 254
235/** 255/**
256 * __blkg_lookup - internal version of blkg_lookup()
257 * @blkcg: blkcg of interest
258 * @q: request_queue of interest
259 * @update_hint: whether to update lookup hint with the result or not
260 *
261 * This is internal version and shouldn't be used by policy
262 * implementations. Looks up blkgs for the @blkcg - @q pair regardless of
263 * @q's bypass state. If @update_hint is %true, the caller should be
264 * holding @q->queue_lock and lookup hint is updated on success.
265 */
266static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
267 struct request_queue *q,
268 bool update_hint)
269{
270 struct blkcg_gq *blkg;
271
272 if (blkcg == &blkcg_root)
273 return q->root_blkg;
274
275 blkg = rcu_dereference(blkcg->blkg_hint);
276 if (blkg && blkg->q == q)
277 return blkg;
278
279 return blkg_lookup_slowpath(blkcg, q, update_hint);
280}
281
282/**
283 * blkg_lookup - lookup blkg for the specified blkcg - q pair
284 * @blkcg: blkcg of interest
285 * @q: request_queue of interest
286 *
287 * Lookup blkg for the @blkcg - @q pair. This function should be called
288 * under RCU read lock and is guaranteed to return %NULL if @q is bypassing
289 * - see blk_queue_bypass_start() for details.
290 */
291static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
292 struct request_queue *q)
293{
294 WARN_ON_ONCE(!rcu_read_lock_held());
295
296 if (unlikely(blk_queue_bypass(q)))
297 return NULL;
298 return __blkg_lookup(blkcg, q, false);
299}
300
301/**
236 * blkg_to_pdata - get policy private data 302 * blkg_to_pdata - get policy private data
237 * @blkg: blkg of interest 303 * @blkg: blkg of interest
238 * @pol: policy of interest 304 * @pol: policy of interest
@@ -248,7 +314,7 @@ static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
248static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg, 314static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
249 struct blkcg_policy *pol) 315 struct blkcg_policy *pol)
250{ 316{
251 return blkcg ? blkcg->pd[pol->plid] : NULL; 317 return blkcg ? blkcg->cpd[pol->plid] : NULL;
252} 318}
253 319
254/** 320/**
@@ -262,6 +328,11 @@ static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd)
262 return pd ? pd->blkg : NULL; 328 return pd ? pd->blkg : NULL;
263} 329}
264 330
331static inline struct blkcg *cpd_to_blkcg(struct blkcg_policy_data *cpd)
332{
333 return cpd ? cpd->blkcg : NULL;
334}
335
265/** 336/**
266 * blkg_path - format cgroup path of blkg 337 * blkg_path - format cgroup path of blkg
267 * @blkg: blkg of interest 338 * @blkg: blkg of interest
@@ -309,9 +380,6 @@ static inline void blkg_put(struct blkcg_gq *blkg)
309 call_rcu(&blkg->rcu_head, __blkg_release_rcu); 380 call_rcu(&blkg->rcu_head, __blkg_release_rcu);
310} 381}
311 382
312struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
313 bool update_hint);
314
315/** 383/**
316 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants 384 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
317 * @d_blkg: loop cursor pointing to the current descendant 385 * @d_blkg: loop cursor pointing to the current descendant
@@ -373,8 +441,8 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
373 * or if either the blkcg or queue is going away. Fall back to 441 * or if either the blkcg or queue is going away. Fall back to
374 * root_rl in such cases. 442 * root_rl in such cases.
375 */ 443 */
376 blkg = blkg_lookup_create(blkcg, q); 444 blkg = blkg_lookup(blkcg, q);
377 if (IS_ERR(blkg)) 445 if (unlikely(!blkg))
378 goto root_rl; 446 goto root_rl;
379 447
380 blkg_get(blkg); 448 blkg_get(blkg);
@@ -394,8 +462,7 @@ root_rl:
394 */ 462 */
395static inline void blk_put_rl(struct request_list *rl) 463static inline void blk_put_rl(struct request_list *rl)
396{ 464{
397 /* root_rl may not have blkg set */ 465 if (rl->blkg->blkcg != &blkcg_root)
398 if (rl->blkg && rl->blkg->blkcg != &blkcg_root)
399 blkg_put(rl->blkg); 466 blkg_put(rl->blkg);
400} 467}
401 468
@@ -433,9 +500,21 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl,
433#define blk_queue_for_each_rl(rl, q) \ 500#define blk_queue_for_each_rl(rl, q) \
434 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) 501 for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q)))
435 502
436static inline void blkg_stat_init(struct blkg_stat *stat) 503static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp)
437{ 504{
438 u64_stats_init(&stat->syncp); 505 int ret;
506
507 ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp);
508 if (ret)
509 return ret;
510
511 atomic64_set(&stat->aux_cnt, 0);
512 return 0;
513}
514
515static inline void blkg_stat_exit(struct blkg_stat *stat)
516{
517 percpu_counter_destroy(&stat->cpu_cnt);
439} 518}
440 519
441/** 520/**
@@ -443,34 +522,21 @@ static inline void blkg_stat_init(struct blkg_stat *stat)
443 * @stat: target blkg_stat 522 * @stat: target blkg_stat
444 * @val: value to add 523 * @val: value to add
445 * 524 *
446 * Add @val to @stat. The caller is responsible for synchronizing calls to 525 * Add @val to @stat. The caller must ensure that IRQ on the same CPU
447 * this function. 526 * don't re-enter this function for the same counter.
448 */ 527 */
449static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) 528static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val)
450{ 529{
451 u64_stats_update_begin(&stat->syncp); 530 __percpu_counter_add(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH);
452 stat->cnt += val;
453 u64_stats_update_end(&stat->syncp);
454} 531}
455 532
456/** 533/**
457 * blkg_stat_read - read the current value of a blkg_stat 534 * blkg_stat_read - read the current value of a blkg_stat
458 * @stat: blkg_stat to read 535 * @stat: blkg_stat to read
459 *
460 * Read the current value of @stat. This function can be called without
461 * synchroniztion and takes care of u64 atomicity.
462 */ 536 */
463static inline uint64_t blkg_stat_read(struct blkg_stat *stat) 537static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
464{ 538{
465 unsigned int start; 539 return percpu_counter_sum_positive(&stat->cpu_cnt);
466 uint64_t v;
467
468 do {
469 start = u64_stats_fetch_begin_irq(&stat->syncp);
470 v = stat->cnt;
471 } while (u64_stats_fetch_retry_irq(&stat->syncp, start));
472
473 return v;
474} 540}
475 541
476/** 542/**
@@ -479,24 +545,46 @@ static inline uint64_t blkg_stat_read(struct blkg_stat *stat)
479 */ 545 */
480static inline void blkg_stat_reset(struct blkg_stat *stat) 546static inline void blkg_stat_reset(struct blkg_stat *stat)
481{ 547{
482 stat->cnt = 0; 548 percpu_counter_set(&stat->cpu_cnt, 0);
549 atomic64_set(&stat->aux_cnt, 0);
483} 550}
484 551
485/** 552/**
486 * blkg_stat_merge - merge a blkg_stat into another 553 * blkg_stat_add_aux - add a blkg_stat into another's aux count
487 * @to: the destination blkg_stat 554 * @to: the destination blkg_stat
488 * @from: the source 555 * @from: the source
489 * 556 *
490 * Add @from's count to @to. 557 * Add @from's count including the aux one to @to's aux count.
491 */ 558 */
492static inline void blkg_stat_merge(struct blkg_stat *to, struct blkg_stat *from) 559static inline void blkg_stat_add_aux(struct blkg_stat *to,
560 struct blkg_stat *from)
493{ 561{
494 blkg_stat_add(to, blkg_stat_read(from)); 562 atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt),
563 &to->aux_cnt);
495} 564}
496 565
497static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat) 566static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp)
498{ 567{
499 u64_stats_init(&rwstat->syncp); 568 int i, ret;
569
570 for (i = 0; i < BLKG_RWSTAT_NR; i++) {
571 ret = percpu_counter_init(&rwstat->cpu_cnt[i], 0, gfp);
572 if (ret) {
573 while (--i >= 0)
574 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
575 return ret;
576 }
577 atomic64_set(&rwstat->aux_cnt[i], 0);
578 }
579 return 0;
580}
581
582static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat)
583{
584 int i;
585
586 for (i = 0; i < BLKG_RWSTAT_NR; i++)
587 percpu_counter_destroy(&rwstat->cpu_cnt[i]);
500} 588}
501 589
502/** 590/**
@@ -511,39 +599,38 @@ static inline void blkg_rwstat_init(struct blkg_rwstat *rwstat)
511static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, 599static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat,
512 int rw, uint64_t val) 600 int rw, uint64_t val)
513{ 601{
514 u64_stats_update_begin(&rwstat->syncp); 602 struct percpu_counter *cnt;
515 603
516 if (rw & REQ_WRITE) 604 if (rw & REQ_WRITE)
517 rwstat->cnt[BLKG_RWSTAT_WRITE] += val; 605 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE];
518 else 606 else
519 rwstat->cnt[BLKG_RWSTAT_READ] += val; 607 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ];
608
609 __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
610
520 if (rw & REQ_SYNC) 611 if (rw & REQ_SYNC)
521 rwstat->cnt[BLKG_RWSTAT_SYNC] += val; 612 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC];
522 else 613 else
523 rwstat->cnt[BLKG_RWSTAT_ASYNC] += val; 614 cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC];
524 615
525 u64_stats_update_end(&rwstat->syncp); 616 __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH);
526} 617}
527 618
528/** 619/**
529 * blkg_rwstat_read - read the current values of a blkg_rwstat 620 * blkg_rwstat_read - read the current values of a blkg_rwstat
530 * @rwstat: blkg_rwstat to read 621 * @rwstat: blkg_rwstat to read
531 * 622 *
532 * Read the current snapshot of @rwstat and return it as the return value. 623 * Read the current snapshot of @rwstat and return it in the aux counts.
533 * This function can be called without synchronization and takes care of
534 * u64 atomicity.
535 */ 624 */
536static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) 625static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat)
537{ 626{
538 unsigned int start; 627 struct blkg_rwstat result;
539 struct blkg_rwstat tmp; 628 int i;
540
541 do {
542 start = u64_stats_fetch_begin_irq(&rwstat->syncp);
543 tmp = *rwstat;
544 } while (u64_stats_fetch_retry_irq(&rwstat->syncp, start));
545 629
546 return tmp; 630 for (i = 0; i < BLKG_RWSTAT_NR; i++)
631 atomic64_set(&result.aux_cnt[i],
632 percpu_counter_sum_positive(&rwstat->cpu_cnt[i]));
633 return result;
547} 634}
548 635
549/** 636/**
@@ -558,7 +645,8 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
558{ 645{
559 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); 646 struct blkg_rwstat tmp = blkg_rwstat_read(rwstat);
560 647
561 return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; 648 return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) +
649 atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]);
562} 650}
563 651
564/** 652/**
@@ -567,26 +655,71 @@ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat)
567 */ 655 */
568static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat) 656static inline void blkg_rwstat_reset(struct blkg_rwstat *rwstat)
569{ 657{
570 memset(rwstat->cnt, 0, sizeof(rwstat->cnt)); 658 int i;
659
660 for (i = 0; i < BLKG_RWSTAT_NR; i++) {
661 percpu_counter_set(&rwstat->cpu_cnt[i], 0);
662 atomic64_set(&rwstat->aux_cnt[i], 0);
663 }
571} 664}
572 665
573/** 666/**
574 * blkg_rwstat_merge - merge a blkg_rwstat into another 667 * blkg_rwstat_add_aux - add a blkg_rwstat into another's aux count
575 * @to: the destination blkg_rwstat 668 * @to: the destination blkg_rwstat
576 * @from: the source 669 * @from: the source
577 * 670 *
578 * Add @from's counts to @to. 671 * Add @from's count including the aux one to @to's aux count.
579 */ 672 */
580static inline void blkg_rwstat_merge(struct blkg_rwstat *to, 673static inline void blkg_rwstat_add_aux(struct blkg_rwstat *to,
581 struct blkg_rwstat *from) 674 struct blkg_rwstat *from)
582{ 675{
583 struct blkg_rwstat v = blkg_rwstat_read(from); 676 struct blkg_rwstat v = blkg_rwstat_read(from);
584 int i; 677 int i;
585 678
586 u64_stats_update_begin(&to->syncp);
587 for (i = 0; i < BLKG_RWSTAT_NR; i++) 679 for (i = 0; i < BLKG_RWSTAT_NR; i++)
588 to->cnt[i] += v.cnt[i]; 680 atomic64_add(atomic64_read(&v.aux_cnt[i]) +
589 u64_stats_update_end(&to->syncp); 681 atomic64_read(&from->aux_cnt[i]),
682 &to->aux_cnt[i]);
683}
684
685#ifdef CONFIG_BLK_DEV_THROTTLING
686extern bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
687 struct bio *bio);
688#else
689static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
690 struct bio *bio) { return false; }
691#endif
692
693static inline bool blkcg_bio_issue_check(struct request_queue *q,
694 struct bio *bio)
695{
696 struct blkcg *blkcg;
697 struct blkcg_gq *blkg;
698 bool throtl = false;
699
700 rcu_read_lock();
701 blkcg = bio_blkcg(bio);
702
703 blkg = blkg_lookup(blkcg, q);
704 if (unlikely(!blkg)) {
705 spin_lock_irq(q->queue_lock);
706 blkg = blkg_lookup_create(blkcg, q);
707 if (IS_ERR(blkg))
708 blkg = NULL;
709 spin_unlock_irq(q->queue_lock);
710 }
711
712 throtl = blk_throtl_bio(q, blkg, bio);
713
714 if (!throtl) {
715 blkg = blkg ?: q->root_blkg;
716 blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags,
717 bio->bi_iter.bi_size);
718 blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1);
719 }
720
721 rcu_read_unlock();
722 return !throtl;
590} 723}
591 724
592#else /* CONFIG_BLK_CGROUP */ 725#else /* CONFIG_BLK_CGROUP */
@@ -642,6 +775,9 @@ static inline void blk_put_rl(struct request_list *rl) { }
642static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } 775static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
643static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } 776static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
644 777
778static inline bool blkcg_bio_issue_check(struct request_queue *q,
779 struct bio *bio) { return true; }
780
645#define blk_queue_for_each_rl(rl, q) \ 781#define blk_queue_for_each_rl(rl, q) \
646 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL) 782 for ((rl) = &(q)->root_rl; (rl); (rl) = NULL)
647 783
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 1f36945fd23d..1a96fdaa33d5 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -27,7 +27,7 @@ SUBSYS(cpuacct)
27#endif 27#endif
28 28
29#if IS_ENABLED(CONFIG_BLK_CGROUP) 29#if IS_ENABLED(CONFIG_BLK_CGROUP)
30SUBSYS(blkio) 30SUBSYS(io)
31#endif 31#endif
32 32
33#if IS_ENABLED(CONFIG_MEMCG) 33#if IS_ENABLED(CONFIG_MEMCG)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 123be25ea15a..5d4e9c4b821d 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -266,6 +266,7 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
266} 266}
267 267
268int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen); 268int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen);
269size_t kernfs_path_len(struct kernfs_node *kn);
269char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, 270char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
270 size_t buflen); 271 size_t buflen);
271void pr_cont_kernfs_name(struct kernfs_node *kn); 272void pr_cont_kernfs_name(struct kernfs_node *kn);
@@ -332,6 +333,9 @@ static inline bool kernfs_ns_enabled(struct kernfs_node *kn)
332static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) 333static inline int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
333{ return -ENOSYS; } 334{ return -ENOSYS; }
334 335
336static inline size_t kernfs_path_len(struct kernfs_node *kn)
337{ return 0; }
338
335static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf, 339static inline char * __must_check kernfs_path(struct kernfs_node *kn, char *buf,
336 size_t buflen) 340 size_t buflen)
337{ return NULL; } 341{ return NULL; }
diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
index a7aa607a4c55..fff846b512e6 100644
--- a/include/trace/events/writeback.h
+++ b/include/trace/events/writeback.h
@@ -131,6 +131,66 @@ DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode,
131 TP_ARGS(inode, flags) 131 TP_ARGS(inode, flags)
132); 132);
133 133
134#ifdef CREATE_TRACE_POINTS
135#ifdef CONFIG_CGROUP_WRITEBACK
136
137static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
138{
139 return kernfs_path_len(wb->memcg_css->cgroup->kn) + 1;
140}
141
142static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
143{
144 struct cgroup *cgrp = wb->memcg_css->cgroup;
145 char *path;
146
147 path = cgroup_path(cgrp, buf, kernfs_path_len(cgrp->kn) + 1);
148 WARN_ON_ONCE(path != buf);
149}
150
151static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
152{
153 if (wbc->wb)
154 return __trace_wb_cgroup_size(wbc->wb);
155 else
156 return 2;
157}
158
159static inline void __trace_wbc_assign_cgroup(char *buf,
160 struct writeback_control *wbc)
161{
162 if (wbc->wb)
163 __trace_wb_assign_cgroup(buf, wbc->wb);
164 else
165 strcpy(buf, "/");
166}
167
168#else /* CONFIG_CGROUP_WRITEBACK */
169
170static inline size_t __trace_wb_cgroup_size(struct bdi_writeback *wb)
171{
172 return 2;
173}
174
175static inline void __trace_wb_assign_cgroup(char *buf, struct bdi_writeback *wb)
176{
177 strcpy(buf, "/");
178}
179
180static inline size_t __trace_wbc_cgroup_size(struct writeback_control *wbc)
181{
182 return 2;
183}
184
185static inline void __trace_wbc_assign_cgroup(char *buf,
186 struct writeback_control *wbc)
187{
188 strcpy(buf, "/");
189}
190
191#endif /* CONFIG_CGROUP_WRITEBACK */
192#endif /* CREATE_TRACE_POINTS */
193
134DECLARE_EVENT_CLASS(writeback_write_inode_template, 194DECLARE_EVENT_CLASS(writeback_write_inode_template,
135 195
136 TP_PROTO(struct inode *inode, struct writeback_control *wbc), 196 TP_PROTO(struct inode *inode, struct writeback_control *wbc),
@@ -141,6 +201,7 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
141 __array(char, name, 32) 201 __array(char, name, 32)
142 __field(unsigned long, ino) 202 __field(unsigned long, ino)
143 __field(int, sync_mode) 203 __field(int, sync_mode)
204 __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
144 ), 205 ),
145 206
146 TP_fast_assign( 207 TP_fast_assign(
@@ -148,12 +209,14 @@ DECLARE_EVENT_CLASS(writeback_write_inode_template,
148 dev_name(inode_to_bdi(inode)->dev), 32); 209 dev_name(inode_to_bdi(inode)->dev), 32);
149 __entry->ino = inode->i_ino; 210 __entry->ino = inode->i_ino;
150 __entry->sync_mode = wbc->sync_mode; 211 __entry->sync_mode = wbc->sync_mode;
212 __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
151 ), 213 ),
152 214
153 TP_printk("bdi %s: ino=%lu sync_mode=%d", 215 TP_printk("bdi %s: ino=%lu sync_mode=%d cgroup=%s",
154 __entry->name, 216 __entry->name,
155 __entry->ino, 217 __entry->ino,
156 __entry->sync_mode 218 __entry->sync_mode,
219 __get_str(cgroup)
157 ) 220 )
158); 221);
159 222
@@ -172,8 +235,8 @@ DEFINE_EVENT(writeback_write_inode_template, writeback_write_inode,
172); 235);
173 236
174DECLARE_EVENT_CLASS(writeback_work_class, 237DECLARE_EVENT_CLASS(writeback_work_class,
175 TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), 238 TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work),
176 TP_ARGS(bdi, work), 239 TP_ARGS(wb, work),
177 TP_STRUCT__entry( 240 TP_STRUCT__entry(
178 __array(char, name, 32) 241 __array(char, name, 32)
179 __field(long, nr_pages) 242 __field(long, nr_pages)
@@ -183,10 +246,11 @@ DECLARE_EVENT_CLASS(writeback_work_class,
183 __field(int, range_cyclic) 246 __field(int, range_cyclic)
184 __field(int, for_background) 247 __field(int, for_background)
185 __field(int, reason) 248 __field(int, reason)
249 __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
186 ), 250 ),
187 TP_fast_assign( 251 TP_fast_assign(
188 strncpy(__entry->name, 252 strncpy(__entry->name,
189 bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); 253 wb->bdi->dev ? dev_name(wb->bdi->dev) : "(unknown)", 32);
190 __entry->nr_pages = work->nr_pages; 254 __entry->nr_pages = work->nr_pages;
191 __entry->sb_dev = work->sb ? work->sb->s_dev : 0; 255 __entry->sb_dev = work->sb ? work->sb->s_dev : 0;
192 __entry->sync_mode = work->sync_mode; 256 __entry->sync_mode = work->sync_mode;
@@ -194,9 +258,10 @@ DECLARE_EVENT_CLASS(writeback_work_class,
194 __entry->range_cyclic = work->range_cyclic; 258 __entry->range_cyclic = work->range_cyclic;
195 __entry->for_background = work->for_background; 259 __entry->for_background = work->for_background;
196 __entry->reason = work->reason; 260 __entry->reason = work->reason;
261 __trace_wb_assign_cgroup(__get_str(cgroup), wb);
197 ), 262 ),
198 TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d " 263 TP_printk("bdi %s: sb_dev %d:%d nr_pages=%ld sync_mode=%d "
199 "kupdate=%d range_cyclic=%d background=%d reason=%s", 264 "kupdate=%d range_cyclic=%d background=%d reason=%s cgroup=%s",
200 __entry->name, 265 __entry->name,
201 MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev), 266 MAJOR(__entry->sb_dev), MINOR(__entry->sb_dev),
202 __entry->nr_pages, 267 __entry->nr_pages,
@@ -204,13 +269,14 @@ DECLARE_EVENT_CLASS(writeback_work_class,
204 __entry->for_kupdate, 269 __entry->for_kupdate,
205 __entry->range_cyclic, 270 __entry->range_cyclic,
206 __entry->for_background, 271 __entry->for_background,
207 __print_symbolic(__entry->reason, WB_WORK_REASON) 272 __print_symbolic(__entry->reason, WB_WORK_REASON),
273 __get_str(cgroup)
208 ) 274 )
209); 275);
210#define DEFINE_WRITEBACK_WORK_EVENT(name) \ 276#define DEFINE_WRITEBACK_WORK_EVENT(name) \
211DEFINE_EVENT(writeback_work_class, name, \ 277DEFINE_EVENT(writeback_work_class, name, \
212 TP_PROTO(struct backing_dev_info *bdi, struct wb_writeback_work *work), \ 278 TP_PROTO(struct bdi_writeback *wb, struct wb_writeback_work *work), \
213 TP_ARGS(bdi, work)) 279 TP_ARGS(wb, work))
214DEFINE_WRITEBACK_WORK_EVENT(writeback_queue); 280DEFINE_WRITEBACK_WORK_EVENT(writeback_queue);
215DEFINE_WRITEBACK_WORK_EVENT(writeback_exec); 281DEFINE_WRITEBACK_WORK_EVENT(writeback_exec);
216DEFINE_WRITEBACK_WORK_EVENT(writeback_start); 282DEFINE_WRITEBACK_WORK_EVENT(writeback_start);
@@ -230,26 +296,42 @@ TRACE_EVENT(writeback_pages_written,
230); 296);
231 297
232DECLARE_EVENT_CLASS(writeback_class, 298DECLARE_EVENT_CLASS(writeback_class,
233 TP_PROTO(struct backing_dev_info *bdi), 299 TP_PROTO(struct bdi_writeback *wb),
234 TP_ARGS(bdi), 300 TP_ARGS(wb),
235 TP_STRUCT__entry( 301 TP_STRUCT__entry(
236 __array(char, name, 32) 302 __array(char, name, 32)
303 __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
237 ), 304 ),
238 TP_fast_assign( 305 TP_fast_assign(
239 strncpy(__entry->name, dev_name(bdi->dev), 32); 306 strncpy(__entry->name, dev_name(wb->bdi->dev), 32);
307 __trace_wb_assign_cgroup(__get_str(cgroup), wb);
240 ), 308 ),
241 TP_printk("bdi %s", 309 TP_printk("bdi %s: cgroup=%s",
242 __entry->name 310 __entry->name,
311 __get_str(cgroup)
243 ) 312 )
244); 313);
245#define DEFINE_WRITEBACK_EVENT(name) \ 314#define DEFINE_WRITEBACK_EVENT(name) \
246DEFINE_EVENT(writeback_class, name, \ 315DEFINE_EVENT(writeback_class, name, \
247 TP_PROTO(struct backing_dev_info *bdi), \ 316 TP_PROTO(struct bdi_writeback *wb), \
248 TP_ARGS(bdi)) 317 TP_ARGS(wb))
249 318
250DEFINE_WRITEBACK_EVENT(writeback_nowork); 319DEFINE_WRITEBACK_EVENT(writeback_nowork);
251DEFINE_WRITEBACK_EVENT(writeback_wake_background); 320DEFINE_WRITEBACK_EVENT(writeback_wake_background);
252DEFINE_WRITEBACK_EVENT(writeback_bdi_register); 321
322TRACE_EVENT(writeback_bdi_register,
323 TP_PROTO(struct backing_dev_info *bdi),
324 TP_ARGS(bdi),
325 TP_STRUCT__entry(
326 __array(char, name, 32)
327 ),
328 TP_fast_assign(
329 strncpy(__entry->name, dev_name(bdi->dev), 32);
330 ),
331 TP_printk("bdi %s",
332 __entry->name
333 )
334);
253 335
254DECLARE_EVENT_CLASS(wbc_class, 336DECLARE_EVENT_CLASS(wbc_class,
255 TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi), 337 TP_PROTO(struct writeback_control *wbc, struct backing_dev_info *bdi),
@@ -265,6 +347,7 @@ DECLARE_EVENT_CLASS(wbc_class,
265 __field(int, range_cyclic) 347 __field(int, range_cyclic)
266 __field(long, range_start) 348 __field(long, range_start)
267 __field(long, range_end) 349 __field(long, range_end)
350 __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
268 ), 351 ),
269 352
270 TP_fast_assign( 353 TP_fast_assign(
@@ -278,11 +361,12 @@ DECLARE_EVENT_CLASS(wbc_class,
278 __entry->range_cyclic = wbc->range_cyclic; 361 __entry->range_cyclic = wbc->range_cyclic;
279 __entry->range_start = (long)wbc->range_start; 362 __entry->range_start = (long)wbc->range_start;
280 __entry->range_end = (long)wbc->range_end; 363 __entry->range_end = (long)wbc->range_end;
364 __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
281 ), 365 ),
282 366
283 TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d " 367 TP_printk("bdi %s: towrt=%ld skip=%ld mode=%d kupd=%d "
284 "bgrd=%d reclm=%d cyclic=%d " 368 "bgrd=%d reclm=%d cyclic=%d "
285 "start=0x%lx end=0x%lx", 369 "start=0x%lx end=0x%lx cgroup=%s",
286 __entry->name, 370 __entry->name,
287 __entry->nr_to_write, 371 __entry->nr_to_write,
288 __entry->pages_skipped, 372 __entry->pages_skipped,
@@ -292,7 +376,9 @@ DECLARE_EVENT_CLASS(wbc_class,
292 __entry->for_reclaim, 376 __entry->for_reclaim,
293 __entry->range_cyclic, 377 __entry->range_cyclic,
294 __entry->range_start, 378 __entry->range_start,
295 __entry->range_end) 379 __entry->range_end,
380 __get_str(cgroup)
381 )
296) 382)
297 383
298#define DEFINE_WBC_EVENT(name) \ 384#define DEFINE_WBC_EVENT(name) \
@@ -312,6 +398,7 @@ TRACE_EVENT(writeback_queue_io,
312 __field(long, age) 398 __field(long, age)
313 __field(int, moved) 399 __field(int, moved)
314 __field(int, reason) 400 __field(int, reason)
401 __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
315 ), 402 ),
316 TP_fast_assign( 403 TP_fast_assign(
317 unsigned long *older_than_this = work->older_than_this; 404 unsigned long *older_than_this = work->older_than_this;
@@ -321,13 +408,15 @@ TRACE_EVENT(writeback_queue_io,
321 (jiffies - *older_than_this) * 1000 / HZ : -1; 408 (jiffies - *older_than_this) * 1000 / HZ : -1;
322 __entry->moved = moved; 409 __entry->moved = moved;
323 __entry->reason = work->reason; 410 __entry->reason = work->reason;
411 __trace_wb_assign_cgroup(__get_str(cgroup), wb);
324 ), 412 ),
325 TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s", 413 TP_printk("bdi %s: older=%lu age=%ld enqueue=%d reason=%s cgroup=%s",
326 __entry->name, 414 __entry->name,
327 __entry->older, /* older_than_this in jiffies */ 415 __entry->older, /* older_than_this in jiffies */
328 __entry->age, /* older_than_this in relative milliseconds */ 416 __entry->age, /* older_than_this in relative milliseconds */
329 __entry->moved, 417 __entry->moved,
330 __print_symbolic(__entry->reason, WB_WORK_REASON) 418 __print_symbolic(__entry->reason, WB_WORK_REASON),
419 __get_str(cgroup)
331 ) 420 )
332); 421);
333 422
@@ -381,11 +470,11 @@ TRACE_EVENT(global_dirty_state,
381 470
382TRACE_EVENT(bdi_dirty_ratelimit, 471TRACE_EVENT(bdi_dirty_ratelimit,
383 472
384 TP_PROTO(struct backing_dev_info *bdi, 473 TP_PROTO(struct bdi_writeback *wb,
385 unsigned long dirty_rate, 474 unsigned long dirty_rate,
386 unsigned long task_ratelimit), 475 unsigned long task_ratelimit),
387 476
388 TP_ARGS(bdi, dirty_rate, task_ratelimit), 477 TP_ARGS(wb, dirty_rate, task_ratelimit),
389 478
390 TP_STRUCT__entry( 479 TP_STRUCT__entry(
391 __array(char, bdi, 32) 480 __array(char, bdi, 32)
@@ -395,36 +484,39 @@ TRACE_EVENT(bdi_dirty_ratelimit,
395 __field(unsigned long, dirty_ratelimit) 484 __field(unsigned long, dirty_ratelimit)
396 __field(unsigned long, task_ratelimit) 485 __field(unsigned long, task_ratelimit)
397 __field(unsigned long, balanced_dirty_ratelimit) 486 __field(unsigned long, balanced_dirty_ratelimit)
487 __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
398 ), 488 ),
399 489
400 TP_fast_assign( 490 TP_fast_assign(
401 strlcpy(__entry->bdi, dev_name(bdi->dev), 32); 491 strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
402 __entry->write_bw = KBps(bdi->wb.write_bandwidth); 492 __entry->write_bw = KBps(wb->write_bandwidth);
403 __entry->avg_write_bw = KBps(bdi->wb.avg_write_bandwidth); 493 __entry->avg_write_bw = KBps(wb->avg_write_bandwidth);
404 __entry->dirty_rate = KBps(dirty_rate); 494 __entry->dirty_rate = KBps(dirty_rate);
405 __entry->dirty_ratelimit = KBps(bdi->wb.dirty_ratelimit); 495 __entry->dirty_ratelimit = KBps(wb->dirty_ratelimit);
406 __entry->task_ratelimit = KBps(task_ratelimit); 496 __entry->task_ratelimit = KBps(task_ratelimit);
407 __entry->balanced_dirty_ratelimit = 497 __entry->balanced_dirty_ratelimit =
408 KBps(bdi->wb.balanced_dirty_ratelimit); 498 KBps(wb->balanced_dirty_ratelimit);
499 __trace_wb_assign_cgroup(__get_str(cgroup), wb);
409 ), 500 ),
410 501
411 TP_printk("bdi %s: " 502 TP_printk("bdi %s: "
412 "write_bw=%lu awrite_bw=%lu dirty_rate=%lu " 503 "write_bw=%lu awrite_bw=%lu dirty_rate=%lu "
413 "dirty_ratelimit=%lu task_ratelimit=%lu " 504 "dirty_ratelimit=%lu task_ratelimit=%lu "
414 "balanced_dirty_ratelimit=%lu", 505 "balanced_dirty_ratelimit=%lu cgroup=%s",
415 __entry->bdi, 506 __entry->bdi,
416 __entry->write_bw, /* write bandwidth */ 507 __entry->write_bw, /* write bandwidth */
417 __entry->avg_write_bw, /* avg write bandwidth */ 508 __entry->avg_write_bw, /* avg write bandwidth */
418 __entry->dirty_rate, /* bdi dirty rate */ 509 __entry->dirty_rate, /* bdi dirty rate */
419 __entry->dirty_ratelimit, /* base ratelimit */ 510 __entry->dirty_ratelimit, /* base ratelimit */
420 __entry->task_ratelimit, /* ratelimit with position control */ 511 __entry->task_ratelimit, /* ratelimit with position control */
421 __entry->balanced_dirty_ratelimit /* the balanced ratelimit */ 512 __entry->balanced_dirty_ratelimit, /* the balanced ratelimit */
513 __get_str(cgroup)
422 ) 514 )
423); 515);
424 516
425TRACE_EVENT(balance_dirty_pages, 517TRACE_EVENT(balance_dirty_pages,
426 518
427 TP_PROTO(struct backing_dev_info *bdi, 519 TP_PROTO(struct bdi_writeback *wb,
428 unsigned long thresh, 520 unsigned long thresh,
429 unsigned long bg_thresh, 521 unsigned long bg_thresh,
430 unsigned long dirty, 522 unsigned long dirty,
@@ -437,7 +529,7 @@ TRACE_EVENT(balance_dirty_pages,
437 long pause, 529 long pause,
438 unsigned long start_time), 530 unsigned long start_time),
439 531
440 TP_ARGS(bdi, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty, 532 TP_ARGS(wb, thresh, bg_thresh, dirty, bdi_thresh, bdi_dirty,
441 dirty_ratelimit, task_ratelimit, 533 dirty_ratelimit, task_ratelimit,
442 dirtied, period, pause, start_time), 534 dirtied, period, pause, start_time),
443 535
@@ -456,11 +548,12 @@ TRACE_EVENT(balance_dirty_pages,
456 __field( long, pause) 548 __field( long, pause)
457 __field(unsigned long, period) 549 __field(unsigned long, period)
458 __field( long, think) 550 __field( long, think)
551 __dynamic_array(char, cgroup, __trace_wb_cgroup_size(wb))
459 ), 552 ),
460 553
461 TP_fast_assign( 554 TP_fast_assign(
462 unsigned long freerun = (thresh + bg_thresh) / 2; 555 unsigned long freerun = (thresh + bg_thresh) / 2;
463 strlcpy(__entry->bdi, dev_name(bdi->dev), 32); 556 strlcpy(__entry->bdi, dev_name(wb->bdi->dev), 32);
464 557
465 __entry->limit = global_wb_domain.dirty_limit; 558 __entry->limit = global_wb_domain.dirty_limit;
466 __entry->setpoint = (global_wb_domain.dirty_limit + 559 __entry->setpoint = (global_wb_domain.dirty_limit +
@@ -478,6 +571,7 @@ TRACE_EVENT(balance_dirty_pages,
478 __entry->period = period * 1000 / HZ; 571 __entry->period = period * 1000 / HZ;
479 __entry->pause = pause * 1000 / HZ; 572 __entry->pause = pause * 1000 / HZ;
480 __entry->paused = (jiffies - start_time) * 1000 / HZ; 573 __entry->paused = (jiffies - start_time) * 1000 / HZ;
574 __trace_wb_assign_cgroup(__get_str(cgroup), wb);
481 ), 575 ),
482 576
483 577
@@ -486,7 +580,7 @@ TRACE_EVENT(balance_dirty_pages,
486 "bdi_setpoint=%lu bdi_dirty=%lu " 580 "bdi_setpoint=%lu bdi_dirty=%lu "
487 "dirty_ratelimit=%lu task_ratelimit=%lu " 581 "dirty_ratelimit=%lu task_ratelimit=%lu "
488 "dirtied=%u dirtied_pause=%u " 582 "dirtied=%u dirtied_pause=%u "
489 "paused=%lu pause=%ld period=%lu think=%ld", 583 "paused=%lu pause=%ld period=%lu think=%ld cgroup=%s",
490 __entry->bdi, 584 __entry->bdi,
491 __entry->limit, 585 __entry->limit,
492 __entry->setpoint, 586 __entry->setpoint,
@@ -500,7 +594,8 @@ TRACE_EVENT(balance_dirty_pages,
500 __entry->paused, /* ms */ 594 __entry->paused, /* ms */
501 __entry->pause, /* ms */ 595 __entry->pause, /* ms */
502 __entry->period, /* ms */ 596 __entry->period, /* ms */
503 __entry->think /* ms */ 597 __entry->think, /* ms */
598 __get_str(cgroup)
504 ) 599 )
505); 600);
506 601
@@ -514,6 +609,8 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
514 __field(unsigned long, ino) 609 __field(unsigned long, ino)
515 __field(unsigned long, state) 610 __field(unsigned long, state)
516 __field(unsigned long, dirtied_when) 611 __field(unsigned long, dirtied_when)
612 __dynamic_array(char, cgroup,
613 __trace_wb_cgroup_size(inode_to_wb(inode)))
517 ), 614 ),
518 615
519 TP_fast_assign( 616 TP_fast_assign(
@@ -522,14 +619,16 @@ TRACE_EVENT(writeback_sb_inodes_requeue,
522 __entry->ino = inode->i_ino; 619 __entry->ino = inode->i_ino;
523 __entry->state = inode->i_state; 620 __entry->state = inode->i_state;
524 __entry->dirtied_when = inode->dirtied_when; 621 __entry->dirtied_when = inode->dirtied_when;
622 __trace_wb_assign_cgroup(__get_str(cgroup), inode_to_wb(inode));
525 ), 623 ),
526 624
527 TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu", 625 TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu cgroup=%s",
528 __entry->name, 626 __entry->name,
529 __entry->ino, 627 __entry->ino,
530 show_inode_state(__entry->state), 628 show_inode_state(__entry->state),
531 __entry->dirtied_when, 629 __entry->dirtied_when,
532 (jiffies - __entry->dirtied_when) / HZ 630 (jiffies - __entry->dirtied_when) / HZ,
631 __get_str(cgroup)
533 ) 632 )
534); 633);
535 634
@@ -585,6 +684,7 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
585 __field(unsigned long, writeback_index) 684 __field(unsigned long, writeback_index)
586 __field(long, nr_to_write) 685 __field(long, nr_to_write)
587 __field(unsigned long, wrote) 686 __field(unsigned long, wrote)
687 __dynamic_array(char, cgroup, __trace_wbc_cgroup_size(wbc))
588 ), 688 ),
589 689
590 TP_fast_assign( 690 TP_fast_assign(
@@ -596,10 +696,11 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
596 __entry->writeback_index = inode->i_mapping->writeback_index; 696 __entry->writeback_index = inode->i_mapping->writeback_index;
597 __entry->nr_to_write = nr_to_write; 697 __entry->nr_to_write = nr_to_write;
598 __entry->wrote = nr_to_write - wbc->nr_to_write; 698 __entry->wrote = nr_to_write - wbc->nr_to_write;
699 __trace_wbc_assign_cgroup(__get_str(cgroup), wbc);
599 ), 700 ),
600 701
601 TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu " 702 TP_printk("bdi %s: ino=%lu state=%s dirtied_when=%lu age=%lu "
602 "index=%lu to_write=%ld wrote=%lu", 703 "index=%lu to_write=%ld wrote=%lu cgroup=%s",
603 __entry->name, 704 __entry->name,
604 __entry->ino, 705 __entry->ino,
605 show_inode_state(__entry->state), 706 show_inode_state(__entry->state),
@@ -607,7 +708,8 @@ DECLARE_EVENT_CLASS(writeback_single_inode_template,
607 (jiffies - __entry->dirtied_when) / HZ, 708 (jiffies - __entry->dirtied_when) / HZ,
608 __entry->writeback_index, 709 __entry->writeback_index,
609 __entry->nr_to_write, 710 __entry->nr_to_write,
610 __entry->wrote 711 __entry->wrote,
712 __get_str(cgroup)
611 ) 713 )
612); 714);
613 715