aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst8
-rw-r--r--block/bfq-cgroup.c4
-rw-r--r--block/bfq-iosched.c2
-rw-r--r--block/bio.c174
-rw-r--r--block/blk-cgroup.c123
-rw-r--r--block/blk-core.c1
-rw-r--r--block/blk-iolatency.c26
-rw-r--r--block/blk-throttle.c13
-rw-r--r--block/bounce.c4
-rw-r--r--block/cfq-iosched.c4
-rw-r--r--drivers/block/loop.c5
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/ext4/page-io.c2
-rw-r--r--include/linux/bio.h26
-rw-r--r--include/linux/blk-cgroup.h145
-rw-r--r--include/linux/blk_types.h1
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/writeback.h5
-rw-r--r--kernel/cgroup/cgroup.c48
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--mm/page_io.c2
22 files changed, 208 insertions, 403 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index caf36105a1c7..184193bcb262 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1857,10 +1857,8 @@ following two functions.
1857 1857
1858 wbc_init_bio(@wbc, @bio) 1858 wbc_init_bio(@wbc, @bio)
1859 Should be called for each bio carrying writeback data and 1859 Should be called for each bio carrying writeback data and
1860 associates the bio with the inode's owner cgroup and the 1860 associates the bio with the inode's owner cgroup. Can be
1861 corresponding request queue. This must be called after 1861 called anytime between bio allocation and submission.
1862 a queue (device) has been associated with the bio and
1863 before submission.
1864 1862
1865 wbc_account_io(@wbc, @page, @bytes) 1863 wbc_account_io(@wbc, @page, @bytes)
1866 Should be called for each data segment being written out. 1864 Should be called for each data segment being written out.
@@ -1879,7 +1877,7 @@ the configuration, the bio may be executed at a lower priority and if
1879the writeback session is holding shared resources, e.g. a journal 1877the writeback session is holding shared resources, e.g. a journal
1880entry, may lead to priority inversion. There is no one easy solution 1878entry, may lead to priority inversion. There is no one easy solution
1881for the problem. Filesystems can try to work around specific problem 1879for the problem. Filesystems can try to work around specific problem
1882cases by skipping wbc_init_bio() or using bio_associate_create_blkg() 1880cases by skipping wbc_init_bio() or using bio_associate_blkcg()
1883directly. 1881directly.
1884 1882
1885 1883
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d9a7916ff0ab..9fe5952d117d 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
642 uint64_t serial_nr; 642 uint64_t serial_nr;
643 643
644 rcu_read_lock(); 644 rcu_read_lock();
645 serial_nr = __bio_blkcg(bio)->css.serial_nr; 645 serial_nr = bio_blkcg(bio)->css.serial_nr;
646 646
647 /* 647 /*
648 * Check whether blkcg has changed. The condition may trigger 648 * Check whether blkcg has changed. The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
651 if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) 651 if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
652 goto out; 652 goto out;
653 653
654 bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); 654 bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
655 /* 655 /*
656 * Update blkg_path for bfq_log_* functions. We cache this 656 * Update blkg_path for bfq_log_* functions. We cache this
657 * path, and update it here, for the following 657 * path, and update it here, for the following
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 6075100f03a5..3a27d31fcda6 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
4384 4384
4385 rcu_read_lock(); 4385 rcu_read_lock();
4386 4386
4387 bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); 4387 bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
4388 if (!bfqg) { 4388 if (!bfqg) {
4389 bfqq = &bfqd->oom_bfqq; 4389 bfqq = &bfqd->oom_bfqq;
4390 goto out; 4390 goto out;
diff --git a/block/bio.c b/block/bio.c
index bbfeb4ee2892..4a5a036268fb 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
609 bio->bi_iter = bio_src->bi_iter; 609 bio->bi_iter = bio_src->bi_iter;
610 bio->bi_io_vec = bio_src->bi_io_vec; 610 bio->bi_io_vec = bio_src->bi_io_vec;
611 611
612 bio_clone_blkg_association(bio, bio_src); 612 bio_clone_blkcg_association(bio, bio_src);
613
614 blkcg_bio_issue_init(bio);
615} 613}
616EXPORT_SYMBOL(__bio_clone_fast); 614EXPORT_SYMBOL(__bio_clone_fast);
617 615
@@ -1956,151 +1954,69 @@ EXPORT_SYMBOL(bioset_init_from_src);
1956 1954
1957#ifdef CONFIG_BLK_CGROUP 1955#ifdef CONFIG_BLK_CGROUP
1958 1956
1959/**
1960 * bio_associate_blkg - associate a bio with the a blkg
1961 * @bio: target bio
1962 * @blkg: the blkg to associate
1963 *
1964 * This tries to associate @bio with the specified blkg. Association failure
1965 * is handled by walking up the blkg tree. Therefore, the blkg associated can
1966 * be anything between @blkg and the root_blkg. This situation only happens
1967 * when a cgroup is dying and then the remaining bios will spill to the closest
1968 * alive blkg.
1969 *
1970 * A reference will be taken on the @blkg and will be released when @bio is
1971 * freed.
1972 */
1973int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
1974{
1975 if (unlikely(bio->bi_blkg))
1976 return -EBUSY;
1977 bio->bi_blkg = blkg_tryget_closest(blkg);
1978 return 0;
1979}
1980
1981/**
1982 * __bio_associate_blkg_from_css - internal blkg association function
1983 *
1984 * This in the core association function that all association paths rely on.
1985 * A blkg reference is taken which is released upon freeing of the bio.
1986 */
1987static int __bio_associate_blkg_from_css(struct bio *bio,
1988 struct cgroup_subsys_state *css)
1989{
1990 struct request_queue *q = bio->bi_disk->queue;
1991 struct blkcg_gq *blkg;
1992 int ret;
1993
1994 rcu_read_lock();
1995
1996 if (!css || !css->parent)
1997 blkg = q->root_blkg;
1998 else
1999 blkg = blkg_lookup_create(css_to_blkcg(css), q);
2000
2001 ret = bio_associate_blkg(bio, blkg);
2002
2003 rcu_read_unlock();
2004 return ret;
2005}
2006
2007/**
2008 * bio_associate_blkg_from_css - associate a bio with a specified css
2009 * @bio: target bio
2010 * @css: target css
2011 *
2012 * Associate @bio with the blkg found by combining the css's blkg and the
2013 * request_queue of the @bio. This falls back to the queue's root_blkg if
2014 * the association fails with the css.
2015 */
2016int bio_associate_blkg_from_css(struct bio *bio,
2017 struct cgroup_subsys_state *css)
2018{
2019 if (unlikely(bio->bi_blkg))
2020 return -EBUSY;
2021 return __bio_associate_blkg_from_css(bio, css);
2022}
2023EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
2024
2025#ifdef CONFIG_MEMCG 1957#ifdef CONFIG_MEMCG
2026/** 1958/**
2027 * bio_associate_blkg_from_page - associate a bio with the page's blkg 1959 * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
2028 * @bio: target bio 1960 * @bio: target bio
2029 * @page: the page to lookup the blkcg from 1961 * @page: the page to lookup the blkcg from
2030 * 1962 *
2031 * Associate @bio with the blkg from @page's owning memcg and the respective 1963 * Associate @bio with the blkcg from @page's owning memcg. This works like
2032 * request_queue. If cgroup_e_css returns NULL, fall back to the queue's 1964 * every other associate function wrt references.
2033 * root_blkg.
2034 *
2035 * Note: this must be called after bio has an associated device.
2036 */ 1965 */
2037int bio_associate_blkg_from_page(struct bio *bio, struct page *page) 1966int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
2038{ 1967{
2039 struct cgroup_subsys_state *css; 1968 struct cgroup_subsys_state *blkcg_css;
2040 int ret;
2041 1969
2042 if (unlikely(bio->bi_blkg)) 1970 if (unlikely(bio->bi_css))
2043 return -EBUSY; 1971 return -EBUSY;
2044 if (!page->mem_cgroup) 1972 if (!page->mem_cgroup)
2045 return 0; 1973 return 0;
2046 1974 blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
2047 rcu_read_lock(); 1975 &io_cgrp_subsys);
2048 1976 bio->bi_css = blkcg_css;
2049 css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); 1977 return 0;
2050
2051 ret = __bio_associate_blkg_from_css(bio, css);
2052
2053 rcu_read_unlock();
2054 return ret;
2055} 1978}
2056#endif /* CONFIG_MEMCG */ 1979#endif /* CONFIG_MEMCG */
2057 1980
2058/** 1981/**
2059 * bio_associate_create_blkg - associate a bio with a blkg from q 1982 * bio_associate_blkcg - associate a bio with the specified blkcg
2060 * @q: request_queue where bio is going
2061 * @bio: target bio 1983 * @bio: target bio
1984 * @blkcg_css: css of the blkcg to associate
1985 *
1986 * Associate @bio with the blkcg specified by @blkcg_css. Block layer will
1987 * treat @bio as if it were issued by a task which belongs to the blkcg.
2062 * 1988 *
2063 * Associate @bio with the blkg found from the bio's css and the request_queue. 1989 * This function takes an extra reference of @blkcg_css which will be put
2064 * If one is not found, bio_lookup_blkg creates the blkg. This falls back to 1990 * when @bio is released. The caller must own @bio and is responsible for
2065 * the queue's root_blkg if association fails. 1991 * synchronizing calls to this function.
2066 */ 1992 */
2067int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) 1993int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
2068{ 1994{
2069 struct cgroup_subsys_state *css; 1995 if (unlikely(bio->bi_css))
2070 int ret = 0; 1996 return -EBUSY;
2071 1997 css_get(blkcg_css);
2072 /* someone has already associated this bio with a blkg */ 1998 bio->bi_css = blkcg_css;
2073 if (bio->bi_blkg) 1999 return 0;
2074 return ret;
2075
2076 rcu_read_lock();
2077
2078 css = blkcg_css();
2079
2080 ret = __bio_associate_blkg_from_css(bio, css);
2081
2082 rcu_read_unlock();
2083 return ret;
2084} 2000}
2001EXPORT_SYMBOL_GPL(bio_associate_blkcg);
2085 2002
2086/** 2003/**
2087 * bio_reassociate_blkg - reassociate a bio with a blkg from q 2004 * bio_associate_blkg - associate a bio with the specified blkg
2088 * @q: request_queue where bio is going
2089 * @bio: target bio 2005 * @bio: target bio
2006 * @blkg: the blkg to associate
2090 * 2007 *
2091 * When submitting a bio, multiple recursive calls to make_request() may occur. 2008 * Associate @bio with the blkg specified by @blkg. This is the queue specific
2092 * This causes the initial associate done in blkcg_bio_issue_check() to be 2009 * blkcg information associated with the @bio, a reference will be taken on the
2093 * incorrect and reference the prior request_queue. This performs reassociation 2010 * @blkg and will be freed when the bio is freed.
2094 * when this situation happens.
2095 */ 2011 */
2096int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) 2012int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
2097{ 2013{
2098 if (bio->bi_blkg) { 2014 if (unlikely(bio->bi_blkg))
2099 blkg_put(bio->bi_blkg); 2015 return -EBUSY;
2100 bio->bi_blkg = NULL; 2016 if (!blkg_try_get(blkg))
2101 } 2017 return -ENODEV;
2102 2018 bio->bi_blkg = blkg;
2103 return bio_associate_create_blkg(q, bio); 2019 return 0;
2104} 2020}
2105 2021
2106/** 2022/**
@@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio)
2113 put_io_context(bio->bi_ioc); 2029 put_io_context(bio->bi_ioc);
2114 bio->bi_ioc = NULL; 2030 bio->bi_ioc = NULL;
2115 } 2031 }
2032 if (bio->bi_css) {
2033 css_put(bio->bi_css);
2034 bio->bi_css = NULL;
2035 }
2116 if (bio->bi_blkg) { 2036 if (bio->bi_blkg) {
2117 blkg_put(bio->bi_blkg); 2037 blkg_put(bio->bi_blkg);
2118 bio->bi_blkg = NULL; 2038 bio->bi_blkg = NULL;
@@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio)
2120} 2040}
2121 2041
2122/** 2042/**
2123 * bio_clone_blkg_association - clone blkg association from src to dst bio 2043 * bio_clone_blkcg_association - clone blkcg association from src to dst bio
2124 * @dst: destination bio 2044 * @dst: destination bio
2125 * @src: source bio 2045 * @src: source bio
2126 */ 2046 */
2127void bio_clone_blkg_association(struct bio *dst, struct bio *src) 2047void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
2128{ 2048{
2129 if (src->bi_blkg) 2049 if (src->bi_css)
2130 bio_associate_blkg(dst, src->bi_blkg); 2050 WARN_ON(bio_associate_blkcg(dst, src->bi_css));
2131} 2051}
2132EXPORT_SYMBOL_GPL(bio_clone_blkg_association); 2052EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
2133#endif /* CONFIG_BLK_CGROUP */ 2053#endif /* CONFIG_BLK_CGROUP */
2134 2054
2135static void __init biovec_init_slabs(void) 2055static void __init biovec_init_slabs(void)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 992da5592c6e..c630e02836a8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg)
84 kfree(blkg); 84 kfree(blkg);
85} 85}
86 86
87static void __blkg_release(struct rcu_head *rcu)
88{
89 struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
90
91 percpu_ref_exit(&blkg->refcnt);
92
93 /* release the blkcg and parent blkg refs this blkg has been holding */
94 css_put(&blkg->blkcg->css);
95 if (blkg->parent)
96 blkg_put(blkg->parent);
97
98 wb_congested_put(blkg->wb_congested);
99
100 blkg_free(blkg);
101}
102
103/*
104 * A group is RCU protected, but having an rcu lock does not mean that one
105 * can access all the fields of blkg and assume these are valid. For
106 * example, don't try to follow throtl_data and request queue links.
107 *
108 * Having a reference to blkg under an rcu allows accesses to only values
109 * local to groups like group stats and group rate limits.
110 */
111static void blkg_release(struct percpu_ref *ref)
112{
113 struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
114
115 call_rcu(&blkg->rcu_head, __blkg_release);
116}
117
118/** 87/**
119 * blkg_alloc - allocate a blkg 88 * blkg_alloc - allocate a blkg
120 * @blkcg: block cgroup the new blkg is associated with 89 * @blkcg: block cgroup the new blkg is associated with
@@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
141 blkg->q = q; 110 blkg->q = q;
142 INIT_LIST_HEAD(&blkg->q_node); 111 INIT_LIST_HEAD(&blkg->q_node);
143 blkg->blkcg = blkcg; 112 blkg->blkcg = blkcg;
113 atomic_set(&blkg->refcnt, 1);
144 114
145 /* root blkg uses @q->root_rl, init rl only for !root blkgs */ 115 /* root blkg uses @q->root_rl, init rl only for !root blkgs */
146 if (blkcg != &blkcg_root) { 116 if (blkcg != &blkcg_root) {
@@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
247 blkg_get(blkg->parent); 217 blkg_get(blkg->parent);
248 } 218 }
249 219
250 ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
251 GFP_NOWAIT | __GFP_NOWARN);
252 if (ret)
253 goto err_cancel_ref;
254
255 /* invoke per-policy init */ 220 /* invoke per-policy init */
256 for (i = 0; i < BLKCG_MAX_POLS; i++) { 221 for (i = 0; i < BLKCG_MAX_POLS; i++) {
257 struct blkcg_policy *pol = blkcg_policy[i]; 222 struct blkcg_policy *pol = blkcg_policy[i];
@@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
284 blkg_put(blkg); 249 blkg_put(blkg);
285 return ERR_PTR(ret); 250 return ERR_PTR(ret);
286 251
287err_cancel_ref:
288 percpu_ref_exit(&blkg->refcnt);
289err_put_congested: 252err_put_congested:
290 wb_congested_put(wb_congested); 253 wb_congested_put(wb_congested);
291err_put_css: 254err_put_css:
@@ -296,7 +259,7 @@ err_free_blkg:
296} 259}
297 260
298/** 261/**
299 * __blkg_lookup_create - lookup blkg, try to create one if not there 262 * blkg_lookup_create - lookup blkg, try to create one if not there
300 * @blkcg: blkcg of interest 263 * @blkcg: blkcg of interest
301 * @q: request_queue of interest 264 * @q: request_queue of interest
302 * 265 *
@@ -305,11 +268,12 @@ err_free_blkg:
305 * that all non-root blkg's have access to the parent blkg. This function 268 * that all non-root blkg's have access to the parent blkg. This function
306 * should be called under RCU read lock and @q->queue_lock. 269 * should be called under RCU read lock and @q->queue_lock.
307 * 270 *
308 * Returns the blkg or the closest blkg if blkg_create fails as it walks 271 * Returns pointer to the looked up or created blkg on success, ERR_PTR()
309 * down from root. 272 * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
273 * dead and bypassing, returns ERR_PTR(-EBUSY).
310 */ 274 */
311struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, 275struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
312 struct request_queue *q) 276 struct request_queue *q)
313{ 277{
314 struct blkcg_gq *blkg; 278 struct blkcg_gq *blkg;
315 279
@@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
321 * we shouldn't allow anything to go through for a bypassing queue. 285 * we shouldn't allow anything to go through for a bypassing queue.
322 */ 286 */
323 if (unlikely(blk_queue_bypass(q))) 287 if (unlikely(blk_queue_bypass(q)))
324 return q->root_blkg; 288 return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
325 289
326 blkg = __blkg_lookup(blkcg, q, true); 290 blkg = __blkg_lookup(blkcg, q, true);
327 if (blkg) 291 if (blkg)
@@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
329 293
330 /* 294 /*
331 * Create blkgs walking down from blkcg_root to @blkcg, so that all 295 * Create blkgs walking down from blkcg_root to @blkcg, so that all
332 * non-root blkgs have access to their parents. Returns the closest 296 * non-root blkgs have access to their parents.
333 * blkg to the intended blkg should blkg_create() fail.
334 */ 297 */
335 while (true) { 298 while (true) {
336 struct blkcg *pos = blkcg; 299 struct blkcg *pos = blkcg;
337 struct blkcg *parent = blkcg_parent(blkcg); 300 struct blkcg *parent = blkcg_parent(blkcg);
338 struct blkcg_gq *ret_blkg = q->root_blkg; 301
339 302 while (parent && !__blkg_lookup(parent, q, false)) {
340 while (parent) {
341 blkg = __blkg_lookup(parent, q, false);
342 if (blkg) {
343 /* remember closest blkg */
344 ret_blkg = blkg;
345 break;
346 }
347 pos = parent; 303 pos = parent;
348 parent = blkcg_parent(parent); 304 parent = blkcg_parent(parent);
349 } 305 }
350 306
351 blkg = blkg_create(pos, q, NULL); 307 blkg = blkg_create(pos, q, NULL);
352 if (IS_ERR(blkg)) 308 if (pos == blkcg || IS_ERR(blkg))
353 return ret_blkg;
354 if (pos == blkcg)
355 return blkg; 309 return blkg;
356 } 310 }
357} 311}
358 312
359/**
360 * blkg_lookup_create - find or create a blkg
361 * @blkcg: target block cgroup
362 * @q: target request_queue
363 *
364 * This looks up or creates the blkg representing the unique pair
365 * of the blkcg and the request_queue.
366 */
367struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
368 struct request_queue *q)
369{
370 struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
371 unsigned long flags;
372
373 if (unlikely(!blkg)) {
374 spin_lock_irqsave(q->queue_lock, flags);
375
376 blkg = __blkg_lookup_create(blkcg, q);
377
378 spin_unlock_irqrestore(q->queue_lock, flags);
379 }
380
381 return blkg;
382}
383
384static void blkg_destroy(struct blkcg_gq *blkg) 313static void blkg_destroy(struct blkcg_gq *blkg)
385{ 314{
386 struct blkcg *blkcg = blkg->blkcg; 315 struct blkcg *blkcg = blkg->blkcg;
@@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
424 * Put the reference taken at the time of creation so that when all 353 * Put the reference taken at the time of creation so that when all
425 * queues are gone, group can be destroyed. 354 * queues are gone, group can be destroyed.
426 */ 355 */
427 percpu_ref_kill(&blkg->refcnt); 356 blkg_put(blkg);
428} 357}
429 358
430/** 359/**
@@ -452,6 +381,29 @@ static void blkg_destroy_all(struct request_queue *q)
452} 381}
453 382
454/* 383/*
384 * A group is RCU protected, but having an rcu lock does not mean that one
385 * can access all the fields of blkg and assume these are valid. For
386 * example, don't try to follow throtl_data and request queue links.
387 *
388 * Having a reference to blkg under an rcu allows accesses to only values
389 * local to groups like group stats and group rate limits.
390 */
391void __blkg_release_rcu(struct rcu_head *rcu_head)
392{
393 struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
394
395 /* release the blkcg and parent blkg refs this blkg has been holding */
396 css_put(&blkg->blkcg->css);
397 if (blkg->parent)
398 blkg_put(blkg->parent);
399
400 wb_congested_put(blkg->wb_congested);
401
402 blkg_free(blkg);
403}
404EXPORT_SYMBOL_GPL(__blkg_release_rcu);
405
406/*
455 * The next function used by blk_queue_for_each_rl(). It's a bit tricky 407 * The next function used by blk_queue_for_each_rl(). It's a bit tricky
456 * because the root blkg uses @q->root_rl instead of its own rl. 408 * because the root blkg uses @q->root_rl instead of its own rl.
457 */ 409 */
@@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void)
1796 blkg = blkg_lookup(blkcg, q); 1748 blkg = blkg_lookup(blkcg, q);
1797 if (!blkg) 1749 if (!blkg)
1798 goto out; 1750 goto out;
1799 if (!blkg_tryget(blkg)) 1751 blkg = blkg_try_get(blkg);
1752 if (!blkg)
1800 goto out; 1753 goto out;
1801 rcu_read_unlock(); 1754 rcu_read_unlock();
1802 1755
diff --git a/block/blk-core.c b/block/blk-core.c
index 26a5dac80ed9..ce12515f9b9b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2435,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio)
2435 if (q) 2435 if (q)
2436 blk_queue_exit(q); 2436 blk_queue_exit(q);
2437 q = bio->bi_disk->queue; 2437 q = bio->bi_disk->queue;
2438 bio_reassociate_blkg(q, bio);
2439 flags = 0; 2438 flags = 0;
2440 if (bio->bi_opf & REQ_NOWAIT) 2439 if (bio->bi_opf & REQ_NOWAIT)
2441 flags = BLK_MQ_REQ_NOWAIT; 2440 flags = BLK_MQ_REQ_NOWAIT;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 35c48d7b8f78..bb240a0c1309 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -480,12 +480,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
480 spinlock_t *lock) 480 spinlock_t *lock)
481{ 481{
482 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); 482 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
483 struct blkcg_gq *blkg = bio->bi_blkg; 483 struct blkcg *blkcg;
484 struct blkcg_gq *blkg;
485 struct request_queue *q = rqos->q;
484 bool issue_as_root = bio_issue_as_root_blkg(bio); 486 bool issue_as_root = bio_issue_as_root_blkg(bio);
485 487
486 if (!blk_iolatency_enabled(blkiolat)) 488 if (!blk_iolatency_enabled(blkiolat))
487 return; 489 return;
488 490
491 rcu_read_lock();
492 blkcg = bio_blkcg(bio);
493 bio_associate_blkcg(bio, &blkcg->css);
494 blkg = blkg_lookup(blkcg, q);
495 if (unlikely(!blkg)) {
496 if (!lock)
497 spin_lock_irq(q->queue_lock);
498 blkg = blkg_lookup_create(blkcg, q);
499 if (IS_ERR(blkg))
500 blkg = NULL;
501 if (!lock)
502 spin_unlock_irq(q->queue_lock);
503 }
504 if (!blkg)
505 goto out;
506
507 bio_issue_init(&bio->bi_issue, bio_sectors(bio));
508 bio_associate_blkg(bio, blkg);
509out:
510 rcu_read_unlock();
489 while (blkg && blkg->parent) { 511 while (blkg && blkg->parent) {
490 struct iolatency_grp *iolat = blkg_to_lat(blkg); 512 struct iolatency_grp *iolat = blkg_to_lat(blkg);
491 if (!iolat) { 513 if (!iolat) {
@@ -706,7 +728,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
706 * We could be exiting, don't access the pd unless we have a 728 * We could be exiting, don't access the pd unless we have a
707 * ref on the blkg. 729 * ref on the blkg.
708 */ 730 */
709 if (!blkg_tryget(blkg)) 731 if (!blkg_try_get(blkg))
710 continue; 732 continue;
711 733
712 iolat = blkg_to_lat(blkg); 734 iolat = blkg_to_lat(blkg);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 4bda70e8db48..db1a3a2ae006 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
2115} 2115}
2116#endif 2116#endif
2117 2117
2118static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
2119{
2120#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2121 /* fallback to root_blkg if we fail to get a blkg ref */
2122 if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
2123 bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
2124 bio_issue_init(&bio->bi_issue, bio_sectors(bio));
2125#endif
2126}
2127
2118bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, 2128bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
2119 struct bio *bio) 2129 struct bio *bio)
2120{ 2130{
2121 struct throtl_qnode *qn = NULL; 2131 struct throtl_qnode *qn = NULL;
2122 struct throtl_grp *tg = blkg_to_tg(blkg); 2132 struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
2123 struct throtl_service_queue *sq; 2133 struct throtl_service_queue *sq;
2124 bool rw = bio_data_dir(bio); 2134 bool rw = bio_data_dir(bio);
2125 bool throttled = false; 2135 bool throttled = false;
@@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
2138 if (unlikely(blk_queue_bypass(q))) 2148 if (unlikely(blk_queue_bypass(q)))
2139 goto out_unlock; 2149 goto out_unlock;
2140 2150
2151 blk_throtl_assoc_bio(tg, bio);
2141 blk_throtl_update_idletime(tg); 2152 blk_throtl_update_idletime(tg);
2142 2153
2143 sq = &tg->service_queue; 2154 sq = &tg->service_queue;
diff --git a/block/bounce.c b/block/bounce.c
index ec0d99995f5f..418677dcec60 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
276 } 276 }
277 } 277 }
278 278
279 bio_clone_blkg_association(bio, bio_src); 279 bio_clone_blkcg_association(bio, bio_src);
280
281 blkcg_bio_issue_init(bio);
282 280
283 return bio; 281 return bio;
284} 282}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a3d87dd3c1a..ed41aa978c4a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
3759 uint64_t serial_nr; 3759 uint64_t serial_nr;
3760 3760
3761 rcu_read_lock(); 3761 rcu_read_lock();
3762 serial_nr = __bio_blkcg(bio)->css.serial_nr; 3762 serial_nr = bio_blkcg(bio)->css.serial_nr;
3763 rcu_read_unlock(); 3763 rcu_read_unlock();
3764 3764
3765 /* 3765 /*
@@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
3824 struct cfq_group *cfqg; 3824 struct cfq_group *cfqg;
3825 3825
3826 rcu_read_lock(); 3826 rcu_read_lock();
3827 cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio)); 3827 cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
3828 if (!cfqg) { 3828 if (!cfqg) {
3829 cfqq = &cfqd->oom_cfqq; 3829 cfqq = &cfqd->oom_cfqq;
3830 goto out; 3830 goto out;
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index abad6d15f956..ea9debf59b22 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -77,7 +77,6 @@
77#include <linux/falloc.h> 77#include <linux/falloc.h>
78#include <linux/uio.h> 78#include <linux/uio.h>
79#include <linux/ioprio.h> 79#include <linux/ioprio.h>
80#include <linux/blk-cgroup.h>
81 80
82#include "loop.h" 81#include "loop.h"
83 82
@@ -1761,8 +1760,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1761 1760
1762 /* always use the first bio's css */ 1761 /* always use the first bio's css */
1763#ifdef CONFIG_BLK_CGROUP 1762#ifdef CONFIG_BLK_CGROUP
1764 if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { 1763 if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
1765 cmd->css = &bio_blkcg(rq->bio)->css; 1764 cmd->css = rq->bio->bi_css;
1766 css_get(cmd->css); 1765 css_get(cmd->css);
1767 } else 1766 } else
1768#endif 1767#endif
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index f3fb5bb8c82a..ac1cffd2a09b 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
542 !discard_bio) 542 !discard_bio)
543 continue; 543 continue;
544 bio_chain(discard_bio, bio); 544 bio_chain(discard_bio, bio);
545 bio_clone_blkg_association(discard_bio, bio); 545 bio_clone_blkcg_association(discard_bio, bio);
546 if (mddev->gendisk) 546 if (mddev->gendisk)
547 trace_block_bio_remap(bdev_get_queue(rdev->bdev), 547 trace_block_bio_remap(bdev_get_queue(rdev->bdev),
548 discard_bio, disk_devt(mddev->gendisk), 548 discard_bio, disk_devt(mddev->gendisk),
diff --git a/fs/buffer.c b/fs/buffer.c
index 109f55196866..6f1ae3ac9789 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3060 */ 3060 */
3061 bio = bio_alloc(GFP_NOIO, 1); 3061 bio = bio_alloc(GFP_NOIO, 1);
3062 3062
3063 if (wbc) {
3064 wbc_init_bio(wbc, bio);
3065 wbc_account_io(wbc, bh->b_page, bh->b_size);
3066 }
3067
3063 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 3068 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3064 bio_set_dev(bio, bh->b_bdev); 3069 bio_set_dev(bio, bh->b_bdev);
3065 bio->bi_write_hint = write_hint; 3070 bio->bi_write_hint = write_hint;
@@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3079 op_flags |= REQ_PRIO; 3084 op_flags |= REQ_PRIO;
3080 bio_set_op_attrs(bio, op, op_flags); 3085 bio_set_op_attrs(bio, op, op_flags);
3081 3086
3082 if (wbc) {
3083 wbc_init_bio(wbc, bio);
3084 wbc_account_io(wbc, bh->b_page, bh->b_size);
3085 }
3086
3087 submit_bio(bio); 3087 submit_bio(bio);
3088 return 0; 3088 return 0;
3089} 3089}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 2aa62d58d8dd..db7590178dfc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
374 bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); 374 bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
375 if (!bio) 375 if (!bio)
376 return -ENOMEM; 376 return -ENOMEM;
377 wbc_init_bio(io->io_wbc, bio);
377 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 378 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
378 bio_set_dev(bio, bh->b_bdev); 379 bio_set_dev(bio, bh->b_bdev);
379 bio->bi_end_io = ext4_end_bio; 380 bio->bi_end_io = ext4_end_bio;
380 bio->bi_private = ext4_get_io_end(io->io_end); 381 bio->bi_private = ext4_get_io_end(io->io_end);
381 io->io_bio = bio; 382 io->io_bio = bio;
382 io->io_next_block = bh->b_blocknr; 383 io->io_next_block = bh->b_blocknr;
383 wbc_init_bio(io->io_wbc, bio);
384 return 0; 384 return 0;
385} 385}
386 386
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b47c7f716731..056fb627edb3 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -503,31 +503,23 @@ do { \
503 disk_devt((bio)->bi_disk) 503 disk_devt((bio)->bi_disk)
504 504
505#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) 505#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
506int bio_associate_blkg_from_page(struct bio *bio, struct page *page); 506int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
507#else 507#else
508static inline int bio_associate_blkg_from_page(struct bio *bio, 508static inline int bio_associate_blkcg_from_page(struct bio *bio,
509 struct page *page) { return 0; } 509 struct page *page) { return 0; }
510#endif 510#endif
511 511
512#ifdef CONFIG_BLK_CGROUP 512#ifdef CONFIG_BLK_CGROUP
513int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
513int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); 514int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
514int bio_associate_blkg_from_css(struct bio *bio,
515 struct cgroup_subsys_state *css);
516int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
517int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
518void bio_disassociate_task(struct bio *bio); 515void bio_disassociate_task(struct bio *bio);
519void bio_clone_blkg_association(struct bio *dst, struct bio *src); 516void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
520#else /* CONFIG_BLK_CGROUP */ 517#else /* CONFIG_BLK_CGROUP */
521static inline int bio_associate_blkg_from_css(struct bio *bio, 518static inline int bio_associate_blkcg(struct bio *bio,
522 struct cgroup_subsys_state *css) 519 struct cgroup_subsys_state *blkcg_css) { return 0; }
523{ return 0; }
524static inline int bio_associate_create_blkg(struct request_queue *q,
525 struct bio *bio) { return 0; }
526static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
527{ return 0; }
528static inline void bio_disassociate_task(struct bio *bio) { } 520static inline void bio_disassociate_task(struct bio *bio) { }
529static inline void bio_clone_blkg_association(struct bio *dst, 521static inline void bio_clone_blkcg_association(struct bio *dst,
530 struct bio *src) { } 522 struct bio *src) { }
531#endif /* CONFIG_BLK_CGROUP */ 523#endif /* CONFIG_BLK_CGROUP */
532 524
533#ifdef CONFIG_HIGHMEM 525#ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1e76ceebeb5d..6d766a19f2bb 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -126,7 +126,7 @@ struct blkcg_gq {
126 struct request_list rl; 126 struct request_list rl;
127 127
128 /* reference count */ 128 /* reference count */
129 struct percpu_ref refcnt; 129 atomic_t refcnt;
130 130
131 /* is this blkg online? protected by both blkcg and q locks */ 131 /* is this blkg online? protected by both blkcg and q locks */
132 bool online; 132 bool online;
@@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
184 184
185struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, 185struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
186 struct request_queue *q, bool update_hint); 186 struct request_queue *q, bool update_hint);
187struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
188 struct request_queue *q);
189struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, 187struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
190 struct request_queue *q); 188 struct request_queue *q);
191int blkcg_init_queue(struct request_queue *q); 189int blkcg_init_queue(struct request_queue *q);
@@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
232 char *input, struct blkg_conf_ctx *ctx); 230 char *input, struct blkg_conf_ctx *ctx);
233void blkg_conf_finish(struct blkg_conf_ctx *ctx); 231void blkg_conf_finish(struct blkg_conf_ctx *ctx);
234 232
235/**
236 * blkcg_css - find the current css
237 *
238 * Find the css associated with either the kthread or the current task.
239 * This may return a dying css, so it is up to the caller to use tryget logic
240 * to confirm it is alive and well.
241 */
242static inline struct cgroup_subsys_state *blkcg_css(void)
243{
244 struct cgroup_subsys_state *css;
245
246 css = kthread_blkcg();
247 if (css)
248 return css;
249 return task_css(current, io_cgrp_id);
250}
251 233
252static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) 234static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
253{ 235{
254 return css ? container_of(css, struct blkcg, css) : NULL; 236 return css ? container_of(css, struct blkcg, css) : NULL;
255} 237}
256 238
257/**
258 * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
259 *
260 * DO NOT USE.
261 * There is a flaw using this version of the function. In particular, this was
262 * used in a broken paradigm where association was called on the given css. It
263 * is possible though that the returned css from task_css() is in the process
264 * of dying due to migration of the current task. So it is improper to assume
265 * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will
266 * take additional work to handle more gracefully.
267 */
268static inline struct blkcg *__bio_blkcg(struct bio *bio)
269{
270 if (bio && bio->bi_blkg)
271 return bio->bi_blkg->blkcg;
272 return css_to_blkcg(blkcg_css());
273}
274
275/**
276 * bio_blkcg - grab the blkcg associated with a bio
277 * @bio: target bio
278 *
279 * This returns the blkcg associated with a bio, NULL if not associated.
280 * Callers are expected to either handle NULL or know association has been
281 * done prior to calling this.
282 */
283static inline struct blkcg *bio_blkcg(struct bio *bio) 239static inline struct blkcg *bio_blkcg(struct bio *bio)
284{ 240{
285 if (bio && bio->bi_blkg) 241 struct cgroup_subsys_state *css;
286 return bio->bi_blkg->blkcg; 242
287 return NULL; 243 if (bio && bio->bi_css)
244 return css_to_blkcg(bio->bi_css);
245 css = kthread_blkcg();
246 if (css)
247 return css_to_blkcg(css);
248 return css_to_blkcg(task_css(current, io_cgrp_id));
288} 249}
289 250
290static inline bool blk_cgroup_congested(void) 251static inline bool blk_cgroup_congested(void)
@@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
490 */ 451 */
491static inline void blkg_get(struct blkcg_gq *blkg) 452static inline void blkg_get(struct blkcg_gq *blkg)
492{ 453{
493 percpu_ref_get(&blkg->refcnt); 454 WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
455 atomic_inc(&blkg->refcnt);
494} 456}
495 457
496/** 458/**
497 * blkg_tryget - try and get a blkg reference 459 * blkg_try_get - try and get a blkg reference
498 * @blkg: blkg to get 460 * @blkg: blkg to get
499 * 461 *
500 * This is for use when doing an RCU lookup of the blkg. We may be in the midst 462 * This is for use when doing an RCU lookup of the blkg. We may be in the midst
501 * of freeing this blkg, so we can only use it if the refcnt is not zero. 463 * of freeing this blkg, so we can only use it if the refcnt is not zero.
502 */ 464 */
503static inline bool blkg_tryget(struct blkcg_gq *blkg) 465static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
504{ 466{
505 return percpu_ref_tryget(&blkg->refcnt); 467 if (atomic_inc_not_zero(&blkg->refcnt))
468 return blkg;
469 return NULL;
506} 470}
507 471
508/**
509 * blkg_tryget_closest - try and get a blkg ref on the closet blkg
510 * @blkg: blkg to get
511 *
512 * This walks up the blkg tree to find the closest non-dying blkg and returns
513 * the blkg that it did association with as it may not be the passed in blkg.
514 */
515static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
516{
517 while (!percpu_ref_tryget(&blkg->refcnt))
518 blkg = blkg->parent;
519 472
520 return blkg; 473void __blkg_release_rcu(struct rcu_head *rcu);
521}
522 474
523/** 475/**
524 * blkg_put - put a blkg reference 476 * blkg_put - put a blkg reference
@@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
526 */ 478 */
527static inline void blkg_put(struct blkcg_gq *blkg) 479static inline void blkg_put(struct blkcg_gq *blkg)
528{ 480{
529 percpu_ref_put(&blkg->refcnt); 481 WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
482 if (atomic_dec_and_test(&blkg->refcnt))
483 call_rcu(&blkg->rcu_head, __blkg_release_rcu);
530} 484}
531 485
532/** 486/**
@@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
579 533
580 rcu_read_lock(); 534 rcu_read_lock();
581 535
582 if (bio && bio->bi_blkg) { 536 blkcg = bio_blkcg(bio);
583 blkcg = bio->bi_blkg->blkcg;
584 if (blkcg == &blkcg_root)
585 goto rl_use_root;
586
587 blkg_get(bio->bi_blkg);
588 rcu_read_unlock();
589 return &bio->bi_blkg->rl;
590 }
591 537
592 blkcg = css_to_blkcg(blkcg_css()); 538 /* bypass blkg lookup and use @q->root_rl directly for root */
593 if (blkcg == &blkcg_root) 539 if (blkcg == &blkcg_root)
594 goto rl_use_root; 540 goto root_rl;
595 541
542 /*
543 * Try to use blkg->rl. blkg lookup may fail under memory pressure
544 * or if either the blkcg or queue is going away. Fall back to
545 * root_rl in such cases.
546 */
596 blkg = blkg_lookup(blkcg, q); 547 blkg = blkg_lookup(blkcg, q);
597 if (unlikely(!blkg)) 548 if (unlikely(!blkg))
598 blkg = __blkg_lookup_create(blkcg, q); 549 goto root_rl;
599
600 if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
601 goto rl_use_root;
602 550
551 blkg_get(blkg);
603 rcu_read_unlock(); 552 rcu_read_unlock();
604 return &blkg->rl; 553 return &blkg->rl;
605 554root_rl:
606 /*
607 * Each blkg has its own request_list, however, the root blkcg
608 * uses the request_queue's root_rl. This is to avoid most
609 * overhead for the root blkcg.
610 */
611rl_use_root:
612 rcu_read_unlock(); 555 rcu_read_unlock();
613 return &q->root_rl; 556 return &q->root_rl;
614} 557}
@@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
854 struct bio *bio) { return false; } 797 struct bio *bio) { return false; }
855#endif 798#endif
856 799
857
858static inline void blkcg_bio_issue_init(struct bio *bio)
859{
860 bio_issue_init(&bio->bi_issue, bio_sectors(bio));
861}
862
863static inline bool blkcg_bio_issue_check(struct request_queue *q, 800static inline bool blkcg_bio_issue_check(struct request_queue *q,
864 struct bio *bio) 801 struct bio *bio)
865{ 802{
803 struct blkcg *blkcg;
866 struct blkcg_gq *blkg; 804 struct blkcg_gq *blkg;
867 bool throtl = false; 805 bool throtl = false;
868 806
869 rcu_read_lock(); 807 rcu_read_lock();
808 blkcg = bio_blkcg(bio);
809
810 /* associate blkcg if bio hasn't attached one */
811 bio_associate_blkcg(bio, &blkcg->css);
870 812
871 bio_associate_create_blkg(q, bio); 813 blkg = blkg_lookup(blkcg, q);
872 blkg = bio->bi_blkg; 814 if (unlikely(!blkg)) {
815 spin_lock_irq(q->queue_lock);
816 blkg = blkg_lookup_create(blkcg, q);
817 if (IS_ERR(blkg))
818 blkg = NULL;
819 spin_unlock_irq(q->queue_lock);
820 }
873 821
874 throtl = blk_throtl_bio(q, blkg, bio); 822 throtl = blk_throtl_bio(q, blkg, bio);
875 823
876 if (!throtl) { 824 if (!throtl) {
825 blkg = blkg ?: q->root_blkg;
877 /* 826 /*
878 * If the bio is flagged with BIO_QUEUE_ENTERED it means this 827 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
879 * is a split bio and we would have already accounted for the 828 * is a split bio and we would have already accounted for the
@@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
885 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); 834 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
886 } 835 }
887 836
888 blkcg_bio_issue_init(bio);
889
890 rcu_read_unlock(); 837 rcu_read_unlock();
891 return !throtl; 838 return !throtl;
892} 839}
@@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
983static inline void blkcg_deactivate_policy(struct request_queue *q, 930static inline void blkcg_deactivate_policy(struct request_queue *q,
984 const struct blkcg_policy *pol) { } 931 const struct blkcg_policy *pol) { }
985 932
986static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
987static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } 933static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
988 934
989static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 935static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { }
999static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } 945static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
1000static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } 946static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
1001 947
1002static inline void blkcg_bio_issue_init(struct bio *bio) { }
1003static inline bool blkcg_bio_issue_check(struct request_queue *q, 948static inline bool blkcg_bio_issue_check(struct request_queue *q,
1004 struct bio *bio) { return true; } 949 struct bio *bio) { return true; }
1005 950
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 093a818c5b68..1dcf652ba0aa 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,6 +178,7 @@ struct bio {
178 * release. Read comment on top of bio_associate_current(). 178 * release. Read comment on top of bio_associate_current().
179 */ 179 */
180 struct io_context *bi_ioc; 180 struct io_context *bi_ioc;
181 struct cgroup_subsys_state *bi_css;
181 struct blkcg_gq *bi_blkg; 182 struct blkcg_gq *bi_blkg;
182 struct bio_issue bi_issue; 183 struct bio_issue bi_issue;
183#endif 184#endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b8bcbdeb2eac..32c553556bbd 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,8 +93,6 @@ extern struct css_set init_css_set;
93 93
94bool css_has_online_children(struct cgroup_subsys_state *css); 94bool css_has_online_children(struct cgroup_subsys_state *css);
95struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); 95struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
96struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
97 struct cgroup_subsys *ss);
98struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, 96struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
99 struct cgroup_subsys *ss); 97 struct cgroup_subsys *ss);
100struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, 98struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 738a0c24874f..fdfd04e348f6 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
246 * 246 *
247 * @bio is a part of the writeback in progress controlled by @wbc. Perform 247 * @bio is a part of the writeback in progress controlled by @wbc. Perform
248 * writeback specific initialization. This is used to apply the cgroup 248 * writeback specific initialization. This is used to apply the cgroup
249 * writeback context. Must be called after the bio has been associated with 249 * writeback context.
250 * a device.
251 */ 250 */
252static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) 251static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
253{ 252{
@@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
258 * regular writeback instead of writing things out itself. 257 * regular writeback instead of writing things out itself.
259 */ 258 */
260 if (wbc->wb) 259 if (wbc->wb)
261 bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); 260 bio_associate_blkcg(bio, wbc->wb->blkcg_css);
262} 261}
263 262
264#else /* CONFIG_CGROUP_WRITEBACK */ 263#else /* CONFIG_CGROUP_WRITEBACK */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4c1cf0969a80..4a3dae2a8283 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -492,7 +492,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
492} 492}
493 493
494/** 494/**
495 * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss 495 * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
496 * @cgrp: the cgroup of interest 496 * @cgrp: the cgroup of interest
497 * @ss: the subsystem of interest (%NULL returns @cgrp->self) 497 * @ss: the subsystem of interest (%NULL returns @cgrp->self)
498 * 498 *
@@ -501,8 +501,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
501 * enabled. If @ss is associated with the hierarchy @cgrp is on, this 501 * enabled. If @ss is associated with the hierarchy @cgrp is on, this
502 * function is guaranteed to return non-NULL css. 502 * function is guaranteed to return non-NULL css.
503 */ 503 */
504static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, 504static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
505 struct cgroup_subsys *ss) 505 struct cgroup_subsys *ss)
506{ 506{
507 lockdep_assert_held(&cgroup_mutex); 507 lockdep_assert_held(&cgroup_mutex);
508 508
@@ -523,35 +523,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
523} 523}
524 524
525/** 525/**
526 * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
527 * @cgrp: the cgroup of interest
528 * @ss: the subsystem of interest
529 *
530 * Find and get the effective css of @cgrp for @ss. The effective css is
531 * defined as the matching css of the nearest ancestor including self which
532 * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
533 * the root css is returned, so this function always returns a valid css.
534 *
535 * The returned css is not guaranteed to be online, and therefore it is the
536 * callers responsiblity to tryget a reference for it.
537 */
538struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
539 struct cgroup_subsys *ss)
540{
541 struct cgroup_subsys_state *css;
542
543 do {
544 css = cgroup_css(cgrp, ss);
545
546 if (css)
547 return css;
548 cgrp = cgroup_parent(cgrp);
549 } while (cgrp);
550
551 return init_css_set.subsys[ss->id];
552}
553
554/**
555 * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem 526 * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
556 * @cgrp: the cgroup of interest 527 * @cgrp: the cgroup of interest
557 * @ss: the subsystem of interest 528 * @ss: the subsystem of interest
@@ -633,11 +604,10 @@ EXPORT_SYMBOL_GPL(of_css);
633 * 604 *
634 * Should be called under cgroup_[tree_]mutex. 605 * Should be called under cgroup_[tree_]mutex.
635 */ 606 */
636#define for_each_e_css(css, ssid, cgrp) \ 607#define for_each_e_css(css, ssid, cgrp) \
637 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ 608 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
638 if (!((css) = cgroup_e_css_by_mask(cgrp, \ 609 if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
639 cgroup_subsys[(ssid)]))) \ 610 ; \
640 ; \
641 else 611 else
642 612
643/** 613/**
@@ -1036,7 +1006,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
1036 * @ss is in this hierarchy, so we want the 1006 * @ss is in this hierarchy, so we want the
1037 * effective css from @cgrp. 1007 * effective css from @cgrp.
1038 */ 1008 */
1039 template[i] = cgroup_e_css_by_mask(cgrp, ss); 1009 template[i] = cgroup_e_css(cgrp, ss);
1040 } else { 1010 } else {
1041 /* 1011 /*
1042 * @ss is not in this hierarchy, so we don't want 1012 * @ss is not in this hierarchy, so we don't want
@@ -3053,7 +3023,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
3053 return ret; 3023 return ret;
3054 3024
3055 /* 3025 /*
3056 * At this point, cgroup_e_css_by_mask() results reflect the new csses 3026 * At this point, cgroup_e_css() results reflect the new csses
3057 * making the following cgroup_update_dfl_csses() properly update 3027 * making the following cgroup_update_dfl_csses() properly update
3058 * css associations of all tasks in the subtree. 3028 * css associations of all tasks in the subtree.
3059 */ 3029 */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fac0ddf8a8e2..2868d85f1fb1 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
764 if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) 764 if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
765 return NULL; 765 return NULL;
766 766
767 if (!bio->bi_blkg) 767 if (!bio->bi_css)
768 return NULL; 768 return NULL;
769 return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup); 769 return cgroup_get_kernfs_id(bio->bi_css->cgroup);
770} 770}
771#else 771#else
772static union kernfs_node_id * 772static union kernfs_node_id *
diff --git a/mm/page_io.c b/mm/page_io.c
index 573d3663d846..aafd19ec1db4 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
339 goto out; 339 goto out;
340 } 340 }
341 bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); 341 bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
342 bio_associate_blkg_from_page(bio, page); 342 bio_associate_blkcg_from_page(bio, page);
343 count_swpout_vm_event(page); 343 count_swpout_vm_event(page);
344 set_page_writeback(page); 344 set_page_writeback(page);
345 unlock_page(page); 345 unlock_page(page);