aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-11-02 14:25:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-11-02 14:25:48 -0400
commit5f21585384a4a69b8bfdd2cae7e3648ae805f57d (patch)
treeb976d6e847b7209fb54cf78821a59951a7e9e8cd
parentfcc37f76a995cc08546b88b83f9bb5da11307a0b (diff)
parent9fe5c59ff6a1e5e26a39b75489a1420e7eaaf0b1 (diff)
Merge tag 'for-linus-20181102' of git://git.kernel.dk/linux-block
Pull block layer fixes from Jens Axboe: "The biggest part of this pull request is the revert of the blkcg cleanup series. It had one fix earlier for a stacked device issue, but another one was reported. Rather than play whack-a-mole with this, revert the entire series and try again for the next kernel release. Apart from that, only small fixes/changes. Summary: - Indentation fixup for mtip32xx (Colin Ian King) - The blkcg cleanup series revert (Dennis Zhou) - Two NVMe fixes. One fixing a regression in the nvme request initialization in this merge window, causing nvme-fc to not work. The other is a suspend/resume p2p resource issue (James, Keith) - Fix sg discard merge, allowing us to merge in cases where we didn't before (Jianchao Wang) - Call rq_qos_exit() after the queue is frozen, preventing a hang (Ming) - Fix brd queue setup, fixing an oops if we fail setting up all devices (Ming)" * tag 'for-linus-20181102' of git://git.kernel.dk/linux-block: nvme-pci: fix conflicting p2p resource adds nvme-fc: fix request private initialization blkcg: revert blkcg cleanups series block: brd: associate with queue until adding disk block: call rq_qos_exit() after queue is frozen mtip32xx: clean an indentation issue, remove extraneous tabs block: fix the DISCARD request merge
-rw-r--r--Documentation/admin-guide/cgroup-v2.rst8
-rw-r--r--block/bfq-cgroup.c4
-rw-r--r--block/bfq-iosched.c2
-rw-r--r--block/bio.c174
-rw-r--r--block/blk-cgroup.c123
-rw-r--r--block/blk-core.c4
-rw-r--r--block/blk-iolatency.c26
-rw-r--r--block/blk-merge.c46
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/blk-throttle.c13
-rw-r--r--block/bounce.c4
-rw-r--r--block/cfq-iosched.c4
-rw-r--r--drivers/block/brd.c16
-rw-r--r--drivers/block/loop.c5
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c4
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/nvme/host/fc.c2
-rw-r--r--drivers/nvme/host/pci.c5
-rw-r--r--fs/buffer.c10
-rw-r--r--fs/ext4/page-io.c2
-rw-r--r--include/linux/bio.h26
-rw-r--r--include/linux/blk-cgroup.h145
-rw-r--r--include/linux/blk_types.h1
-rw-r--r--include/linux/cgroup.h2
-rw-r--r--include/linux/writeback.h5
-rw-r--r--kernel/cgroup/cgroup.c48
-rw-r--r--kernel/trace/blktrace.c4
-rw-r--r--mm/page_io.c2
28 files changed, 265 insertions, 424 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 8384c681a4b2..476722b7b636 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1879,10 +1879,8 @@ following two functions.
1879 1879
1880 wbc_init_bio(@wbc, @bio) 1880 wbc_init_bio(@wbc, @bio)
1881 Should be called for each bio carrying writeback data and 1881 Should be called for each bio carrying writeback data and
1882 associates the bio with the inode's owner cgroup and the 1882 associates the bio with the inode's owner cgroup. Can be
1883 corresponding request queue. This must be called after 1883 called anytime between bio allocation and submission.
1884 a queue (device) has been associated with the bio and
1885 before submission.
1886 1884
1887 wbc_account_io(@wbc, @page, @bytes) 1885 wbc_account_io(@wbc, @page, @bytes)
1888 Should be called for each data segment being written out. 1886 Should be called for each data segment being written out.
@@ -1901,7 +1899,7 @@ the configuration, the bio may be executed at a lower priority and if
1901the writeback session is holding shared resources, e.g. a journal 1899the writeback session is holding shared resources, e.g. a journal
1902entry, may lead to priority inversion. There is no one easy solution 1900entry, may lead to priority inversion. There is no one easy solution
1903for the problem. Filesystems can try to work around specific problem 1901for the problem. Filesystems can try to work around specific problem
1904cases by skipping wbc_init_bio() or using bio_associate_create_blkg() 1902cases by skipping wbc_init_bio() or using bio_associate_blkcg()
1905directly. 1903directly.
1906 1904
1907 1905
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index d9a7916ff0ab..9fe5952d117d 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
642 uint64_t serial_nr; 642 uint64_t serial_nr;
643 643
644 rcu_read_lock(); 644 rcu_read_lock();
645 serial_nr = __bio_blkcg(bio)->css.serial_nr; 645 serial_nr = bio_blkcg(bio)->css.serial_nr;
646 646
647 /* 647 /*
648 * Check whether blkcg has changed. The condition may trigger 648 * Check whether blkcg has changed. The condition may trigger
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio)
651 if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) 651 if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr))
652 goto out; 652 goto out;
653 653
654 bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); 654 bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio));
655 /* 655 /*
656 * Update blkg_path for bfq_log_* functions. We cache this 656 * Update blkg_path for bfq_log_* functions. We cache this
657 * path, and update it here, for the following 657 * path, and update it here, for the following
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 6075100f03a5..3a27d31fcda6 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd,
4384 4384
4385 rcu_read_lock(); 4385 rcu_read_lock();
4386 4386
4387 bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); 4387 bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio));
4388 if (!bfqg) { 4388 if (!bfqg) {
4389 bfqq = &bfqd->oom_bfqq; 4389 bfqq = &bfqd->oom_bfqq;
4390 goto out; 4390 goto out;
diff --git a/block/bio.c b/block/bio.c
index c27f77befbac..d5368a445561 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
609 bio->bi_iter = bio_src->bi_iter; 609 bio->bi_iter = bio_src->bi_iter;
610 bio->bi_io_vec = bio_src->bi_io_vec; 610 bio->bi_io_vec = bio_src->bi_io_vec;
611 611
612 bio_clone_blkg_association(bio, bio_src); 612 bio_clone_blkcg_association(bio, bio_src);
613
614 blkcg_bio_issue_init(bio);
615} 613}
616EXPORT_SYMBOL(__bio_clone_fast); 614EXPORT_SYMBOL(__bio_clone_fast);
617 615
@@ -1956,151 +1954,69 @@ EXPORT_SYMBOL(bioset_init_from_src);
1956 1954
1957#ifdef CONFIG_BLK_CGROUP 1955#ifdef CONFIG_BLK_CGROUP
1958 1956
1959/**
1960 * bio_associate_blkg - associate a bio with the a blkg
1961 * @bio: target bio
1962 * @blkg: the blkg to associate
1963 *
1964 * This tries to associate @bio with the specified blkg. Association failure
1965 * is handled by walking up the blkg tree. Therefore, the blkg associated can
1966 * be anything between @blkg and the root_blkg. This situation only happens
1967 * when a cgroup is dying and then the remaining bios will spill to the closest
1968 * alive blkg.
1969 *
1970 * A reference will be taken on the @blkg and will be released when @bio is
1971 * freed.
1972 */
1973int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
1974{
1975 if (unlikely(bio->bi_blkg))
1976 return -EBUSY;
1977 bio->bi_blkg = blkg_tryget_closest(blkg);
1978 return 0;
1979}
1980
1981/**
1982 * __bio_associate_blkg_from_css - internal blkg association function
1983 *
1984 * This in the core association function that all association paths rely on.
1985 * A blkg reference is taken which is released upon freeing of the bio.
1986 */
1987static int __bio_associate_blkg_from_css(struct bio *bio,
1988 struct cgroup_subsys_state *css)
1989{
1990 struct request_queue *q = bio->bi_disk->queue;
1991 struct blkcg_gq *blkg;
1992 int ret;
1993
1994 rcu_read_lock();
1995
1996 if (!css || !css->parent)
1997 blkg = q->root_blkg;
1998 else
1999 blkg = blkg_lookup_create(css_to_blkcg(css), q);
2000
2001 ret = bio_associate_blkg(bio, blkg);
2002
2003 rcu_read_unlock();
2004 return ret;
2005}
2006
2007/**
2008 * bio_associate_blkg_from_css - associate a bio with a specified css
2009 * @bio: target bio
2010 * @css: target css
2011 *
2012 * Associate @bio with the blkg found by combining the css's blkg and the
2013 * request_queue of the @bio. This falls back to the queue's root_blkg if
2014 * the association fails with the css.
2015 */
2016int bio_associate_blkg_from_css(struct bio *bio,
2017 struct cgroup_subsys_state *css)
2018{
2019 if (unlikely(bio->bi_blkg))
2020 return -EBUSY;
2021 return __bio_associate_blkg_from_css(bio, css);
2022}
2023EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
2024
2025#ifdef CONFIG_MEMCG 1957#ifdef CONFIG_MEMCG
2026/** 1958/**
2027 * bio_associate_blkg_from_page - associate a bio with the page's blkg 1959 * bio_associate_blkcg_from_page - associate a bio with the page's blkcg
2028 * @bio: target bio 1960 * @bio: target bio
2029 * @page: the page to lookup the blkcg from 1961 * @page: the page to lookup the blkcg from
2030 * 1962 *
2031 * Associate @bio with the blkg from @page's owning memcg and the respective 1963 * Associate @bio with the blkcg from @page's owning memcg. This works like
2032 * request_queue. If cgroup_e_css returns NULL, fall back to the queue's 1964 * every other associate function wrt references.
2033 * root_blkg.
2034 *
2035 * Note: this must be called after bio has an associated device.
2036 */ 1965 */
2037int bio_associate_blkg_from_page(struct bio *bio, struct page *page) 1966int bio_associate_blkcg_from_page(struct bio *bio, struct page *page)
2038{ 1967{
2039 struct cgroup_subsys_state *css; 1968 struct cgroup_subsys_state *blkcg_css;
2040 int ret;
2041 1969
2042 if (unlikely(bio->bi_blkg)) 1970 if (unlikely(bio->bi_css))
2043 return -EBUSY; 1971 return -EBUSY;
2044 if (!page->mem_cgroup) 1972 if (!page->mem_cgroup)
2045 return 0; 1973 return 0;
2046 1974 blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
2047 rcu_read_lock(); 1975 &io_cgrp_subsys);
2048 1976 bio->bi_css = blkcg_css;
2049 css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); 1977 return 0;
2050
2051 ret = __bio_associate_blkg_from_css(bio, css);
2052
2053 rcu_read_unlock();
2054 return ret;
2055} 1978}
2056#endif /* CONFIG_MEMCG */ 1979#endif /* CONFIG_MEMCG */
2057 1980
2058/** 1981/**
2059 * bio_associate_create_blkg - associate a bio with a blkg from q 1982 * bio_associate_blkcg - associate a bio with the specified blkcg
2060 * @q: request_queue where bio is going
2061 * @bio: target bio 1983 * @bio: target bio
1984 * @blkcg_css: css of the blkcg to associate
1985 *
1986 * Associate @bio with the blkcg specified by @blkcg_css. Block layer will
1987 * treat @bio as if it were issued by a task which belongs to the blkcg.
2062 * 1988 *
2063 * Associate @bio with the blkg found from the bio's css and the request_queue. 1989 * This function takes an extra reference of @blkcg_css which will be put
2064 * If one is not found, bio_lookup_blkg creates the blkg. This falls back to 1990 * when @bio is released. The caller must own @bio and is responsible for
2065 * the queue's root_blkg if association fails. 1991 * synchronizing calls to this function.
2066 */ 1992 */
2067int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) 1993int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css)
2068{ 1994{
2069 struct cgroup_subsys_state *css; 1995 if (unlikely(bio->bi_css))
2070 int ret = 0; 1996 return -EBUSY;
2071 1997 css_get(blkcg_css);
2072 /* someone has already associated this bio with a blkg */ 1998 bio->bi_css = blkcg_css;
2073 if (bio->bi_blkg) 1999 return 0;
2074 return ret;
2075
2076 rcu_read_lock();
2077
2078 css = blkcg_css();
2079
2080 ret = __bio_associate_blkg_from_css(bio, css);
2081
2082 rcu_read_unlock();
2083 return ret;
2084} 2000}
2001EXPORT_SYMBOL_GPL(bio_associate_blkcg);
2085 2002
2086/** 2003/**
2087 * bio_reassociate_blkg - reassociate a bio with a blkg from q 2004 * bio_associate_blkg - associate a bio with the specified blkg
2088 * @q: request_queue where bio is going
2089 * @bio: target bio 2005 * @bio: target bio
2006 * @blkg: the blkg to associate
2090 * 2007 *
2091 * When submitting a bio, multiple recursive calls to make_request() may occur. 2008 * Associate @bio with the blkg specified by @blkg. This is the queue specific
2092 * This causes the initial associate done in blkcg_bio_issue_check() to be 2009 * blkcg information associated with the @bio, a reference will be taken on the
2093 * incorrect and reference the prior request_queue. This performs reassociation 2010 * @blkg and will be freed when the bio is freed.
2094 * when this situation happens.
2095 */ 2011 */
2096int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) 2012int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
2097{ 2013{
2098 if (bio->bi_blkg) { 2014 if (unlikely(bio->bi_blkg))
2099 blkg_put(bio->bi_blkg); 2015 return -EBUSY;
2100 bio->bi_blkg = NULL; 2016 if (!blkg_try_get(blkg))
2101 } 2017 return -ENODEV;
2102 2018 bio->bi_blkg = blkg;
2103 return bio_associate_create_blkg(q, bio); 2019 return 0;
2104} 2020}
2105 2021
2106/** 2022/**
@@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio)
2113 put_io_context(bio->bi_ioc); 2029 put_io_context(bio->bi_ioc);
2114 bio->bi_ioc = NULL; 2030 bio->bi_ioc = NULL;
2115 } 2031 }
2032 if (bio->bi_css) {
2033 css_put(bio->bi_css);
2034 bio->bi_css = NULL;
2035 }
2116 if (bio->bi_blkg) { 2036 if (bio->bi_blkg) {
2117 blkg_put(bio->bi_blkg); 2037 blkg_put(bio->bi_blkg);
2118 bio->bi_blkg = NULL; 2038 bio->bi_blkg = NULL;
@@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio)
2120} 2040}
2121 2041
2122/** 2042/**
2123 * bio_clone_blkg_association - clone blkg association from src to dst bio 2043 * bio_clone_blkcg_association - clone blkcg association from src to dst bio
2124 * @dst: destination bio 2044 * @dst: destination bio
2125 * @src: source bio 2045 * @src: source bio
2126 */ 2046 */
2127void bio_clone_blkg_association(struct bio *dst, struct bio *src) 2047void bio_clone_blkcg_association(struct bio *dst, struct bio *src)
2128{ 2048{
2129 if (src->bi_blkg) 2049 if (src->bi_css)
2130 bio_associate_blkg(dst, src->bi_blkg); 2050 WARN_ON(bio_associate_blkcg(dst, src->bi_css));
2131} 2051}
2132EXPORT_SYMBOL_GPL(bio_clone_blkg_association); 2052EXPORT_SYMBOL_GPL(bio_clone_blkcg_association);
2133#endif /* CONFIG_BLK_CGROUP */ 2053#endif /* CONFIG_BLK_CGROUP */
2134 2054
2135static void __init biovec_init_slabs(void) 2055static void __init biovec_init_slabs(void)
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 992da5592c6e..c630e02836a8 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg)
84 kfree(blkg); 84 kfree(blkg);
85} 85}
86 86
87static void __blkg_release(struct rcu_head *rcu)
88{
89 struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head);
90
91 percpu_ref_exit(&blkg->refcnt);
92
93 /* release the blkcg and parent blkg refs this blkg has been holding */
94 css_put(&blkg->blkcg->css);
95 if (blkg->parent)
96 blkg_put(blkg->parent);
97
98 wb_congested_put(blkg->wb_congested);
99
100 blkg_free(blkg);
101}
102
103/*
104 * A group is RCU protected, but having an rcu lock does not mean that one
105 * can access all the fields of blkg and assume these are valid. For
106 * example, don't try to follow throtl_data and request queue links.
107 *
108 * Having a reference to blkg under an rcu allows accesses to only values
109 * local to groups like group stats and group rate limits.
110 */
111static void blkg_release(struct percpu_ref *ref)
112{
113 struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt);
114
115 call_rcu(&blkg->rcu_head, __blkg_release);
116}
117
118/** 87/**
119 * blkg_alloc - allocate a blkg 88 * blkg_alloc - allocate a blkg
120 * @blkcg: block cgroup the new blkg is associated with 89 * @blkcg: block cgroup the new blkg is associated with
@@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
141 blkg->q = q; 110 blkg->q = q;
142 INIT_LIST_HEAD(&blkg->q_node); 111 INIT_LIST_HEAD(&blkg->q_node);
143 blkg->blkcg = blkcg; 112 blkg->blkcg = blkcg;
113 atomic_set(&blkg->refcnt, 1);
144 114
145 /* root blkg uses @q->root_rl, init rl only for !root blkgs */ 115 /* root blkg uses @q->root_rl, init rl only for !root blkgs */
146 if (blkcg != &blkcg_root) { 116 if (blkcg != &blkcg_root) {
@@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
247 blkg_get(blkg->parent); 217 blkg_get(blkg->parent);
248 } 218 }
249 219
250 ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0,
251 GFP_NOWAIT | __GFP_NOWARN);
252 if (ret)
253 goto err_cancel_ref;
254
255 /* invoke per-policy init */ 220 /* invoke per-policy init */
256 for (i = 0; i < BLKCG_MAX_POLS; i++) { 221 for (i = 0; i < BLKCG_MAX_POLS; i++) {
257 struct blkcg_policy *pol = blkcg_policy[i]; 222 struct blkcg_policy *pol = blkcg_policy[i];
@@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
284 blkg_put(blkg); 249 blkg_put(blkg);
285 return ERR_PTR(ret); 250 return ERR_PTR(ret);
286 251
287err_cancel_ref:
288 percpu_ref_exit(&blkg->refcnt);
289err_put_congested: 252err_put_congested:
290 wb_congested_put(wb_congested); 253 wb_congested_put(wb_congested);
291err_put_css: 254err_put_css:
@@ -296,7 +259,7 @@ err_free_blkg:
296} 259}
297 260
298/** 261/**
299 * __blkg_lookup_create - lookup blkg, try to create one if not there 262 * blkg_lookup_create - lookup blkg, try to create one if not there
300 * @blkcg: blkcg of interest 263 * @blkcg: blkcg of interest
301 * @q: request_queue of interest 264 * @q: request_queue of interest
302 * 265 *
@@ -305,11 +268,12 @@ err_free_blkg:
305 * that all non-root blkg's have access to the parent blkg. This function 268 * that all non-root blkg's have access to the parent blkg. This function
306 * should be called under RCU read lock and @q->queue_lock. 269 * should be called under RCU read lock and @q->queue_lock.
307 * 270 *
308 * Returns the blkg or the closest blkg if blkg_create fails as it walks 271 * Returns pointer to the looked up or created blkg on success, ERR_PTR()
309 * down from root. 272 * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
273 * dead and bypassing, returns ERR_PTR(-EBUSY).
310 */ 274 */
311struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, 275struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
312 struct request_queue *q) 276 struct request_queue *q)
313{ 277{
314 struct blkcg_gq *blkg; 278 struct blkcg_gq *blkg;
315 279
@@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
321 * we shouldn't allow anything to go through for a bypassing queue. 285 * we shouldn't allow anything to go through for a bypassing queue.
322 */ 286 */
323 if (unlikely(blk_queue_bypass(q))) 287 if (unlikely(blk_queue_bypass(q)))
324 return q->root_blkg; 288 return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY);
325 289
326 blkg = __blkg_lookup(blkcg, q, true); 290 blkg = __blkg_lookup(blkcg, q, true);
327 if (blkg) 291 if (blkg)
@@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
329 293
330 /* 294 /*
331 * Create blkgs walking down from blkcg_root to @blkcg, so that all 295 * Create blkgs walking down from blkcg_root to @blkcg, so that all
332 * non-root blkgs have access to their parents. Returns the closest 296 * non-root blkgs have access to their parents.
333 * blkg to the intended blkg should blkg_create() fail.
334 */ 297 */
335 while (true) { 298 while (true) {
336 struct blkcg *pos = blkcg; 299 struct blkcg *pos = blkcg;
337 struct blkcg *parent = blkcg_parent(blkcg); 300 struct blkcg *parent = blkcg_parent(blkcg);
338 struct blkcg_gq *ret_blkg = q->root_blkg; 301
339 302 while (parent && !__blkg_lookup(parent, q, false)) {
340 while (parent) {
341 blkg = __blkg_lookup(parent, q, false);
342 if (blkg) {
343 /* remember closest blkg */
344 ret_blkg = blkg;
345 break;
346 }
347 pos = parent; 303 pos = parent;
348 parent = blkcg_parent(parent); 304 parent = blkcg_parent(parent);
349 } 305 }
350 306
351 blkg = blkg_create(pos, q, NULL); 307 blkg = blkg_create(pos, q, NULL);
352 if (IS_ERR(blkg)) 308 if (pos == blkcg || IS_ERR(blkg))
353 return ret_blkg;
354 if (pos == blkcg)
355 return blkg; 309 return blkg;
356 } 310 }
357} 311}
358 312
359/**
360 * blkg_lookup_create - find or create a blkg
361 * @blkcg: target block cgroup
362 * @q: target request_queue
363 *
364 * This looks up or creates the blkg representing the unique pair
365 * of the blkcg and the request_queue.
366 */
367struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
368 struct request_queue *q)
369{
370 struct blkcg_gq *blkg = blkg_lookup(blkcg, q);
371 unsigned long flags;
372
373 if (unlikely(!blkg)) {
374 spin_lock_irqsave(q->queue_lock, flags);
375
376 blkg = __blkg_lookup_create(blkcg, q);
377
378 spin_unlock_irqrestore(q->queue_lock, flags);
379 }
380
381 return blkg;
382}
383
384static void blkg_destroy(struct blkcg_gq *blkg) 313static void blkg_destroy(struct blkcg_gq *blkg)
385{ 314{
386 struct blkcg *blkcg = blkg->blkcg; 315 struct blkcg *blkcg = blkg->blkcg;
@@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
424 * Put the reference taken at the time of creation so that when all 353 * Put the reference taken at the time of creation so that when all
425 * queues are gone, group can be destroyed. 354 * queues are gone, group can be destroyed.
426 */ 355 */
427 percpu_ref_kill(&blkg->refcnt); 356 blkg_put(blkg);
428} 357}
429 358
430/** 359/**
@@ -452,6 +381,29 @@ static void blkg_destroy_all(struct request_queue *q)
452} 381}
453 382
454/* 383/*
384 * A group is RCU protected, but having an rcu lock does not mean that one
385 * can access all the fields of blkg and assume these are valid. For
386 * example, don't try to follow throtl_data and request queue links.
387 *
388 * Having a reference to blkg under an rcu allows accesses to only values
389 * local to groups like group stats and group rate limits.
390 */
391void __blkg_release_rcu(struct rcu_head *rcu_head)
392{
393 struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
394
395 /* release the blkcg and parent blkg refs this blkg has been holding */
396 css_put(&blkg->blkcg->css);
397 if (blkg->parent)
398 blkg_put(blkg->parent);
399
400 wb_congested_put(blkg->wb_congested);
401
402 blkg_free(blkg);
403}
404EXPORT_SYMBOL_GPL(__blkg_release_rcu);
405
406/*
455 * The next function used by blk_queue_for_each_rl(). It's a bit tricky 407 * The next function used by blk_queue_for_each_rl(). It's a bit tricky
456 * because the root blkg uses @q->root_rl instead of its own rl. 408 * because the root blkg uses @q->root_rl instead of its own rl.
457 */ 409 */
@@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void)
1796 blkg = blkg_lookup(blkcg, q); 1748 blkg = blkg_lookup(blkcg, q);
1797 if (!blkg) 1749 if (!blkg)
1798 goto out; 1750 goto out;
1799 if (!blkg_tryget(blkg)) 1751 blkg = blkg_try_get(blkg);
1752 if (!blkg)
1800 goto out; 1753 goto out;
1801 rcu_read_unlock(); 1754 rcu_read_unlock();
1802 1755
diff --git a/block/blk-core.c b/block/blk-core.c
index bc6ea87d10e0..ce12515f9b9b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -785,6 +785,9 @@ void blk_cleanup_queue(struct request_queue *q)
785 * prevent that q->request_fn() gets invoked after draining finished. 785 * prevent that q->request_fn() gets invoked after draining finished.
786 */ 786 */
787 blk_freeze_queue(q); 787 blk_freeze_queue(q);
788
789 rq_qos_exit(q);
790
788 spin_lock_irq(lock); 791 spin_lock_irq(lock);
789 queue_flag_set(QUEUE_FLAG_DEAD, q); 792 queue_flag_set(QUEUE_FLAG_DEAD, q);
790 spin_unlock_irq(lock); 793 spin_unlock_irq(lock);
@@ -2432,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio)
2432 if (q) 2435 if (q)
2433 blk_queue_exit(q); 2436 blk_queue_exit(q);
2434 q = bio->bi_disk->queue; 2437 q = bio->bi_disk->queue;
2435 bio_reassociate_blkg(q, bio);
2436 flags = 0; 2438 flags = 0;
2437 if (bio->bi_opf & REQ_NOWAIT) 2439 if (bio->bi_opf & REQ_NOWAIT)
2438 flags = BLK_MQ_REQ_NOWAIT; 2440 flags = BLK_MQ_REQ_NOWAIT;
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
index 28f80d227528..38c35c32aff2 100644
--- a/block/blk-iolatency.c
+++ b/block/blk-iolatency.c
@@ -482,12 +482,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio,
482 spinlock_t *lock) 482 spinlock_t *lock)
483{ 483{
484 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); 484 struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos);
485 struct blkcg_gq *blkg = bio->bi_blkg; 485 struct blkcg *blkcg;
486 struct blkcg_gq *blkg;
487 struct request_queue *q = rqos->q;
486 bool issue_as_root = bio_issue_as_root_blkg(bio); 488 bool issue_as_root = bio_issue_as_root_blkg(bio);
487 489
488 if (!blk_iolatency_enabled(blkiolat)) 490 if (!blk_iolatency_enabled(blkiolat))
489 return; 491 return;
490 492
493 rcu_read_lock();
494 blkcg = bio_blkcg(bio);
495 bio_associate_blkcg(bio, &blkcg->css);
496 blkg = blkg_lookup(blkcg, q);
497 if (unlikely(!blkg)) {
498 if (!lock)
499 spin_lock_irq(q->queue_lock);
500 blkg = blkg_lookup_create(blkcg, q);
501 if (IS_ERR(blkg))
502 blkg = NULL;
503 if (!lock)
504 spin_unlock_irq(q->queue_lock);
505 }
506 if (!blkg)
507 goto out;
508
509 bio_issue_init(&bio->bi_issue, bio_sectors(bio));
510 bio_associate_blkg(bio, blkg);
511out:
512 rcu_read_unlock();
491 while (blkg && blkg->parent) { 513 while (blkg && blkg->parent) {
492 struct iolatency_grp *iolat = blkg_to_lat(blkg); 514 struct iolatency_grp *iolat = blkg_to_lat(blkg);
493 if (!iolat) { 515 if (!iolat) {
@@ -708,7 +730,7 @@ static void blkiolatency_timer_fn(struct timer_list *t)
708 * We could be exiting, don't access the pd unless we have a 730 * We could be exiting, don't access the pd unless we have a
709 * ref on the blkg. 731 * ref on the blkg.
710 */ 732 */
711 if (!blkg_tryget(blkg)) 733 if (!blkg_try_get(blkg))
712 continue; 734 continue;
713 735
714 iolat = blkg_to_lat(blkg); 736 iolat = blkg_to_lat(blkg);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 42a46744c11b..6b5ad275ed56 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -714,6 +714,31 @@ static void blk_account_io_merge(struct request *req)
714 part_stat_unlock(); 714 part_stat_unlock();
715 } 715 }
716} 716}
717/*
718 * Two cases of handling DISCARD merge:
719 * If max_discard_segments > 1, the driver takes every bio
720 * as a range and send them to controller together. The ranges
721 * needn't to be contiguous.
722 * Otherwise, the bios/requests will be handled as same as
723 * others which should be contiguous.
724 */
725static inline bool blk_discard_mergable(struct request *req)
726{
727 if (req_op(req) == REQ_OP_DISCARD &&
728 queue_max_discard_segments(req->q) > 1)
729 return true;
730 return false;
731}
732
733enum elv_merge blk_try_req_merge(struct request *req, struct request *next)
734{
735 if (blk_discard_mergable(req))
736 return ELEVATOR_DISCARD_MERGE;
737 else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
738 return ELEVATOR_BACK_MERGE;
739
740 return ELEVATOR_NO_MERGE;
741}
717 742
718/* 743/*
719 * For non-mq, this has to be called with the request spinlock acquired. 744 * For non-mq, this has to be called with the request spinlock acquired.
@@ -731,12 +756,6 @@ static struct request *attempt_merge(struct request_queue *q,
731 if (req_op(req) != req_op(next)) 756 if (req_op(req) != req_op(next))
732 return NULL; 757 return NULL;
733 758
734 /*
735 * not contiguous
736 */
737 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next))
738 return NULL;
739
740 if (rq_data_dir(req) != rq_data_dir(next) 759 if (rq_data_dir(req) != rq_data_dir(next)
741 || req->rq_disk != next->rq_disk 760 || req->rq_disk != next->rq_disk
742 || req_no_special_merge(next)) 761 || req_no_special_merge(next))
@@ -760,11 +779,19 @@ static struct request *attempt_merge(struct request_queue *q,
760 * counts here. Handle DISCARDs separately, as they 779 * counts here. Handle DISCARDs separately, as they
761 * have separate settings. 780 * have separate settings.
762 */ 781 */
763 if (req_op(req) == REQ_OP_DISCARD) { 782
783 switch (blk_try_req_merge(req, next)) {
784 case ELEVATOR_DISCARD_MERGE:
764 if (!req_attempt_discard_merge(q, req, next)) 785 if (!req_attempt_discard_merge(q, req, next))
765 return NULL; 786 return NULL;
766 } else if (!ll_merge_requests_fn(q, req, next)) 787 break;
788 case ELEVATOR_BACK_MERGE:
789 if (!ll_merge_requests_fn(q, req, next))
790 return NULL;
791 break;
792 default:
767 return NULL; 793 return NULL;
794 }
768 795
769 /* 796 /*
770 * If failfast settings disagree or any of the two is already 797 * If failfast settings disagree or any of the two is already
@@ -888,8 +915,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
888 915
889enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) 916enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
890{ 917{
891 if (req_op(rq) == REQ_OP_DISCARD && 918 if (blk_discard_mergable(rq))
892 queue_max_discard_segments(rq->q) > 1)
893 return ELEVATOR_DISCARD_MERGE; 919 return ELEVATOR_DISCARD_MERGE;
894 else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 920 else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
895 return ELEVATOR_BACK_MERGE; 921 return ELEVATOR_BACK_MERGE;
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 0641533597f1..844a454a7b3a 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -1007,8 +1007,6 @@ void blk_unregister_queue(struct gendisk *disk)
1007 kobject_del(&q->kobj); 1007 kobject_del(&q->kobj);
1008 blk_trace_remove_sysfs(disk_to_dev(disk)); 1008 blk_trace_remove_sysfs(disk_to_dev(disk));
1009 1009
1010 rq_qos_exit(q);
1011
1012 mutex_lock(&q->sysfs_lock); 1010 mutex_lock(&q->sysfs_lock);
1013 if (q->request_fn || (q->mq_ops && q->elevator)) 1011 if (q->request_fn || (q->mq_ops && q->elevator))
1014 elv_unregister_queue(q); 1012 elv_unregister_queue(q);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 4bda70e8db48..db1a3a2ae006 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td)
2115} 2115}
2116#endif 2116#endif
2117 2117
2118static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio)
2119{
2120#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
2121 /* fallback to root_blkg if we fail to get a blkg ref */
2122 if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV))
2123 bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg);
2124 bio_issue_init(&bio->bi_issue, bio_sectors(bio));
2125#endif
2126}
2127
2118bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, 2128bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
2119 struct bio *bio) 2129 struct bio *bio)
2120{ 2130{
2121 struct throtl_qnode *qn = NULL; 2131 struct throtl_qnode *qn = NULL;
2122 struct throtl_grp *tg = blkg_to_tg(blkg); 2132 struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg);
2123 struct throtl_service_queue *sq; 2133 struct throtl_service_queue *sq;
2124 bool rw = bio_data_dir(bio); 2134 bool rw = bio_data_dir(bio);
2125 bool throttled = false; 2135 bool throttled = false;
@@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg,
2138 if (unlikely(blk_queue_bypass(q))) 2148 if (unlikely(blk_queue_bypass(q)))
2139 goto out_unlock; 2149 goto out_unlock;
2140 2150
2151 blk_throtl_assoc_bio(tg, bio);
2141 blk_throtl_update_idletime(tg); 2152 blk_throtl_update_idletime(tg);
2142 2153
2143 sq = &tg->service_queue; 2154 sq = &tg->service_queue;
diff --git a/block/bounce.c b/block/bounce.c
index cf49fe02f65c..36869afc258c 100644
--- a/block/bounce.c
+++ b/block/bounce.c
@@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
276 } 276 }
277 } 277 }
278 278
279 bio_clone_blkg_association(bio, bio_src); 279 bio_clone_blkcg_association(bio, bio_src);
280
281 blkcg_bio_issue_init(bio);
282 280
283 return bio; 281 return bio;
284} 282}
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 6a3d87dd3c1a..ed41aa978c4a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio)
3759 uint64_t serial_nr; 3759 uint64_t serial_nr;
3760 3760
3761 rcu_read_lock(); 3761 rcu_read_lock();
3762 serial_nr = __bio_blkcg(bio)->css.serial_nr; 3762 serial_nr = bio_blkcg(bio)->css.serial_nr;
3763 rcu_read_unlock(); 3763 rcu_read_unlock();
3764 3764
3765 /* 3765 /*
@@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic,
3824 struct cfq_group *cfqg; 3824 struct cfq_group *cfqg;
3825 3825
3826 rcu_read_lock(); 3826 rcu_read_lock();
3827 cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio)); 3827 cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio));
3828 if (!cfqg) { 3828 if (!cfqg) {
3829 cfqq = &cfqd->oom_cfqq; 3829 cfqq = &cfqd->oom_cfqq;
3830 goto out; 3830 goto out;
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index df8103dd40ac..c18586fccb6f 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -396,15 +396,14 @@ static struct brd_device *brd_alloc(int i)
396 disk->first_minor = i * max_part; 396 disk->first_minor = i * max_part;
397 disk->fops = &brd_fops; 397 disk->fops = &brd_fops;
398 disk->private_data = brd; 398 disk->private_data = brd;
399 disk->queue = brd->brd_queue;
400 disk->flags = GENHD_FL_EXT_DEVT; 399 disk->flags = GENHD_FL_EXT_DEVT;
401 sprintf(disk->disk_name, "ram%d", i); 400 sprintf(disk->disk_name, "ram%d", i);
402 set_capacity(disk, rd_size * 2); 401 set_capacity(disk, rd_size * 2);
403 disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; 402 brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
404 403
405 /* Tell the block layer that this is not a rotational device */ 404 /* Tell the block layer that this is not a rotational device */
406 blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); 405 blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue);
407 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); 406 blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue);
408 407
409 return brd; 408 return brd;
410 409
@@ -436,6 +435,7 @@ static struct brd_device *brd_init_one(int i, bool *new)
436 435
437 brd = brd_alloc(i); 436 brd = brd_alloc(i);
438 if (brd) { 437 if (brd) {
438 brd->brd_disk->queue = brd->brd_queue;
439 add_disk(brd->brd_disk); 439 add_disk(brd->brd_disk);
440 list_add_tail(&brd->brd_list, &brd_devices); 440 list_add_tail(&brd->brd_list, &brd_devices);
441 } 441 }
@@ -503,8 +503,14 @@ static int __init brd_init(void)
503 503
504 /* point of no return */ 504 /* point of no return */
505 505
506 list_for_each_entry(brd, &brd_devices, brd_list) 506 list_for_each_entry(brd, &brd_devices, brd_list) {
507 /*
508 * associate with queue just before adding disk for
509 * avoiding to mess up failure path
510 */
511 brd->brd_disk->queue = brd->brd_queue;
507 add_disk(brd->brd_disk); 512 add_disk(brd->brd_disk);
513 }
508 514
509 blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, 515 blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS,
510 THIS_MODULE, brd_probe, NULL, NULL); 516 THIS_MODULE, brd_probe, NULL, NULL);
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index e6273ae85246..cb0cc8685076 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -77,7 +77,6 @@
77#include <linux/falloc.h> 77#include <linux/falloc.h>
78#include <linux/uio.h> 78#include <linux/uio.h>
79#include <linux/ioprio.h> 79#include <linux/ioprio.h>
80#include <linux/blk-cgroup.h>
81 80
82#include "loop.h" 81#include "loop.h"
83 82
@@ -1760,8 +1759,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx,
1760 1759
1761 /* always use the first bio's css */ 1760 /* always use the first bio's css */
1762#ifdef CONFIG_BLK_CGROUP 1761#ifdef CONFIG_BLK_CGROUP
1763 if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { 1762 if (cmd->use_aio && rq->bio && rq->bio->bi_css) {
1764 cmd->css = &bio_blkcg(rq->bio)->css; 1763 cmd->css = rq->bio->bi_css;
1765 css_get(cmd->css); 1764 css_get(cmd->css);
1766 } else 1765 } else
1767#endif 1766#endif
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index dfc8de6ce525..a7daa8acbab3 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -1942,8 +1942,8 @@ static int exec_drive_taskfile(struct driver_data *dd,
1942 dev_warn(&dd->pdev->dev, 1942 dev_warn(&dd->pdev->dev,
1943 "data movement but " 1943 "data movement but "
1944 "sect_count is 0\n"); 1944 "sect_count is 0\n");
1945 err = -EINVAL; 1945 err = -EINVAL;
1946 goto abort; 1946 goto abort;
1947 } 1947 }
1948 } 1948 }
1949 } 1949 }
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index f3fb5bb8c82a..ac1cffd2a09b 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio)
542 !discard_bio) 542 !discard_bio)
543 continue; 543 continue;
544 bio_chain(discard_bio, bio); 544 bio_chain(discard_bio, bio);
545 bio_clone_blkg_association(discard_bio, bio); 545 bio_clone_blkcg_association(discard_bio, bio);
546 if (mddev->gendisk) 546 if (mddev->gendisk)
547 trace_block_bio_remap(bdev_get_queue(rdev->bdev), 547 trace_block_bio_remap(bdev_get_queue(rdev->bdev),
548 discard_bio, disk_devt(mddev->gendisk), 548 discard_bio, disk_devt(mddev->gendisk),
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index e52b9d3c0bd6..0b70c8bab045 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl,
1704 op->fcp_req.rspaddr = &op->rsp_iu; 1704 op->fcp_req.rspaddr = &op->rsp_iu;
1705 op->fcp_req.rsplen = sizeof(op->rsp_iu); 1705 op->fcp_req.rsplen = sizeof(op->rsp_iu);
1706 op->fcp_req.done = nvme_fc_fcpio_done; 1706 op->fcp_req.done = nvme_fc_fcpio_done;
1707 op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE];
1708 op->ctrl = ctrl; 1707 op->ctrl = ctrl;
1709 op->queue = queue; 1708 op->queue = queue;
1710 op->rq = rq; 1709 op->rq = rq;
@@ -1752,6 +1751,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
1752 if (res) 1751 if (res)
1753 return res; 1752 return res;
1754 op->op.fcp_req.first_sgl = &op->sgl[0]; 1753 op->op.fcp_req.first_sgl = &op->sgl[0];
1754 op->op.fcp_req.private = &op->priv[0];
1755 return res; 1755 return res;
1756} 1756}
1757 1757
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index f30031945ee4..c33bb201b884 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1663,6 +1663,9 @@ static void nvme_map_cmb(struct nvme_dev *dev)
1663 struct pci_dev *pdev = to_pci_dev(dev->dev); 1663 struct pci_dev *pdev = to_pci_dev(dev->dev);
1664 int bar; 1664 int bar;
1665 1665
1666 if (dev->cmb_size)
1667 return;
1668
1666 dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); 1669 dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
1667 if (!dev->cmbsz) 1670 if (!dev->cmbsz)
1668 return; 1671 return;
@@ -2147,7 +2150,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
2147{ 2150{
2148 struct pci_dev *pdev = to_pci_dev(dev->dev); 2151 struct pci_dev *pdev = to_pci_dev(dev->dev);
2149 2152
2150 nvme_release_cmb(dev);
2151 pci_free_irq_vectors(pdev); 2153 pci_free_irq_vectors(pdev);
2152 2154
2153 if (pci_is_enabled(pdev)) { 2155 if (pci_is_enabled(pdev)) {
@@ -2595,6 +2597,7 @@ static void nvme_remove(struct pci_dev *pdev)
2595 nvme_stop_ctrl(&dev->ctrl); 2597 nvme_stop_ctrl(&dev->ctrl);
2596 nvme_remove_namespaces(&dev->ctrl); 2598 nvme_remove_namespaces(&dev->ctrl);
2597 nvme_dev_disable(dev, true); 2599 nvme_dev_disable(dev, true);
2600 nvme_release_cmb(dev);
2598 nvme_free_host_mem(dev); 2601 nvme_free_host_mem(dev);
2599 nvme_dev_remove_admin(dev); 2602 nvme_dev_remove_admin(dev);
2600 nvme_free_queues(dev, 0); 2603 nvme_free_queues(dev, 0);
diff --git a/fs/buffer.c b/fs/buffer.c
index d60d61e8ed7d..1286c2b95498 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3060 */ 3060 */
3061 bio = bio_alloc(GFP_NOIO, 1); 3061 bio = bio_alloc(GFP_NOIO, 1);
3062 3062
3063 if (wbc) {
3064 wbc_init_bio(wbc, bio);
3065 wbc_account_io(wbc, bh->b_page, bh->b_size);
3066 }
3067
3063 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 3068 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
3064 bio_set_dev(bio, bh->b_bdev); 3069 bio_set_dev(bio, bh->b_bdev);
3065 bio->bi_write_hint = write_hint; 3070 bio->bi_write_hint = write_hint;
@@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
3079 op_flags |= REQ_PRIO; 3084 op_flags |= REQ_PRIO;
3080 bio_set_op_attrs(bio, op, op_flags); 3085 bio_set_op_attrs(bio, op, op_flags);
3081 3086
3082 if (wbc) {
3083 wbc_init_bio(wbc, bio);
3084 wbc_account_io(wbc, bh->b_page, bh->b_size);
3085 }
3086
3087 submit_bio(bio); 3087 submit_bio(bio);
3088 return 0; 3088 return 0;
3089} 3089}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 2aa62d58d8dd..db7590178dfc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
374 bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); 374 bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
375 if (!bio) 375 if (!bio)
376 return -ENOMEM; 376 return -ENOMEM;
377 wbc_init_bio(io->io_wbc, bio);
377 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 378 bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
378 bio_set_dev(bio, bh->b_bdev); 379 bio_set_dev(bio, bh->b_bdev);
379 bio->bi_end_io = ext4_end_bio; 380 bio->bi_end_io = ext4_end_bio;
380 bio->bi_private = ext4_get_io_end(io->io_end); 381 bio->bi_private = ext4_get_io_end(io->io_end);
381 io->io_bio = bio; 382 io->io_bio = bio;
382 io->io_next_block = bh->b_blocknr; 383 io->io_next_block = bh->b_blocknr;
383 wbc_init_bio(io->io_wbc, bio);
384 return 0; 384 return 0;
385} 385}
386 386
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b47c7f716731..056fb627edb3 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -503,31 +503,23 @@ do { \
503 disk_devt((bio)->bi_disk) 503 disk_devt((bio)->bi_disk)
504 504
505#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) 505#if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP)
506int bio_associate_blkg_from_page(struct bio *bio, struct page *page); 506int bio_associate_blkcg_from_page(struct bio *bio, struct page *page);
507#else 507#else
508static inline int bio_associate_blkg_from_page(struct bio *bio, 508static inline int bio_associate_blkcg_from_page(struct bio *bio,
509 struct page *page) { return 0; } 509 struct page *page) { return 0; }
510#endif 510#endif
511 511
512#ifdef CONFIG_BLK_CGROUP 512#ifdef CONFIG_BLK_CGROUP
513int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css);
513int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); 514int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg);
514int bio_associate_blkg_from_css(struct bio *bio,
515 struct cgroup_subsys_state *css);
516int bio_associate_create_blkg(struct request_queue *q, struct bio *bio);
517int bio_reassociate_blkg(struct request_queue *q, struct bio *bio);
518void bio_disassociate_task(struct bio *bio); 515void bio_disassociate_task(struct bio *bio);
519void bio_clone_blkg_association(struct bio *dst, struct bio *src); 516void bio_clone_blkcg_association(struct bio *dst, struct bio *src);
520#else /* CONFIG_BLK_CGROUP */ 517#else /* CONFIG_BLK_CGROUP */
521static inline int bio_associate_blkg_from_css(struct bio *bio, 518static inline int bio_associate_blkcg(struct bio *bio,
522 struct cgroup_subsys_state *css) 519 struct cgroup_subsys_state *blkcg_css) { return 0; }
523{ return 0; }
524static inline int bio_associate_create_blkg(struct request_queue *q,
525 struct bio *bio) { return 0; }
526static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio)
527{ return 0; }
528static inline void bio_disassociate_task(struct bio *bio) { } 520static inline void bio_disassociate_task(struct bio *bio) { }
529static inline void bio_clone_blkg_association(struct bio *dst, 521static inline void bio_clone_blkcg_association(struct bio *dst,
530 struct bio *src) { } 522 struct bio *src) { }
531#endif /* CONFIG_BLK_CGROUP */ 523#endif /* CONFIG_BLK_CGROUP */
532 524
533#ifdef CONFIG_HIGHMEM 525#ifdef CONFIG_HIGHMEM
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 1e76ceebeb5d..6d766a19f2bb 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -126,7 +126,7 @@ struct blkcg_gq {
126 struct request_list rl; 126 struct request_list rl;
127 127
128 /* reference count */ 128 /* reference count */
129 struct percpu_ref refcnt; 129 atomic_t refcnt;
130 130
131 /* is this blkg online? protected by both blkcg and q locks */ 131 /* is this blkg online? protected by both blkcg and q locks */
132 bool online; 132 bool online;
@@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css;
184 184
185struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, 185struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
186 struct request_queue *q, bool update_hint); 186 struct request_queue *q, bool update_hint);
187struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
188 struct request_queue *q);
189struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, 187struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
190 struct request_queue *q); 188 struct request_queue *q);
191int blkcg_init_queue(struct request_queue *q); 189int blkcg_init_queue(struct request_queue *q);
@@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
232 char *input, struct blkg_conf_ctx *ctx); 230 char *input, struct blkg_conf_ctx *ctx);
233void blkg_conf_finish(struct blkg_conf_ctx *ctx); 231void blkg_conf_finish(struct blkg_conf_ctx *ctx);
234 232
235/**
236 * blkcg_css - find the current css
237 *
238 * Find the css associated with either the kthread or the current task.
239 * This may return a dying css, so it is up to the caller to use tryget logic
240 * to confirm it is alive and well.
241 */
242static inline struct cgroup_subsys_state *blkcg_css(void)
243{
244 struct cgroup_subsys_state *css;
245
246 css = kthread_blkcg();
247 if (css)
248 return css;
249 return task_css(current, io_cgrp_id);
250}
251 233
252static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) 234static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css)
253{ 235{
254 return css ? container_of(css, struct blkcg, css) : NULL; 236 return css ? container_of(css, struct blkcg, css) : NULL;
255} 237}
256 238
257/**
258 * __bio_blkcg - internal version of bio_blkcg for bfq and cfq
259 *
260 * DO NOT USE.
261 * There is a flaw using this version of the function. In particular, this was
262 * used in a broken paradigm where association was called on the given css. It
263 * is possible though that the returned css from task_css() is in the process
264 * of dying due to migration of the current task. So it is improper to assume
265 * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will
266 * take additional work to handle more gracefully.
267 */
268static inline struct blkcg *__bio_blkcg(struct bio *bio)
269{
270 if (bio && bio->bi_blkg)
271 return bio->bi_blkg->blkcg;
272 return css_to_blkcg(blkcg_css());
273}
274
275/**
276 * bio_blkcg - grab the blkcg associated with a bio
277 * @bio: target bio
278 *
279 * This returns the blkcg associated with a bio, NULL if not associated.
280 * Callers are expected to either handle NULL or know association has been
281 * done prior to calling this.
282 */
283static inline struct blkcg *bio_blkcg(struct bio *bio) 239static inline struct blkcg *bio_blkcg(struct bio *bio)
284{ 240{
285 if (bio && bio->bi_blkg) 241 struct cgroup_subsys_state *css;
286 return bio->bi_blkg->blkcg; 242
287 return NULL; 243 if (bio && bio->bi_css)
244 return css_to_blkcg(bio->bi_css);
245 css = kthread_blkcg();
246 if (css)
247 return css_to_blkcg(css);
248 return css_to_blkcg(task_css(current, io_cgrp_id));
288} 249}
289 250
290static inline bool blk_cgroup_congested(void) 251static inline bool blk_cgroup_congested(void)
@@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen)
490 */ 451 */
491static inline void blkg_get(struct blkcg_gq *blkg) 452static inline void blkg_get(struct blkcg_gq *blkg)
492{ 453{
493 percpu_ref_get(&blkg->refcnt); 454 WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
455 atomic_inc(&blkg->refcnt);
494} 456}
495 457
496/** 458/**
497 * blkg_tryget - try and get a blkg reference 459 * blkg_try_get - try and get a blkg reference
498 * @blkg: blkg to get 460 * @blkg: blkg to get
499 * 461 *
500 * This is for use when doing an RCU lookup of the blkg. We may be in the midst 462 * This is for use when doing an RCU lookup of the blkg. We may be in the midst
501 * of freeing this blkg, so we can only use it if the refcnt is not zero. 463 * of freeing this blkg, so we can only use it if the refcnt is not zero.
502 */ 464 */
503static inline bool blkg_tryget(struct blkcg_gq *blkg) 465static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg)
504{ 466{
505 return percpu_ref_tryget(&blkg->refcnt); 467 if (atomic_inc_not_zero(&blkg->refcnt))
468 return blkg;
469 return NULL;
506} 470}
507 471
508/**
509 * blkg_tryget_closest - try and get a blkg ref on the closet blkg
510 * @blkg: blkg to get
511 *
512 * This walks up the blkg tree to find the closest non-dying blkg and returns
513 * the blkg that it did association with as it may not be the passed in blkg.
514 */
515static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
516{
517 while (!percpu_ref_tryget(&blkg->refcnt))
518 blkg = blkg->parent;
519 472
520 return blkg; 473void __blkg_release_rcu(struct rcu_head *rcu);
521}
522 474
523/** 475/**
524 * blkg_put - put a blkg reference 476 * blkg_put - put a blkg reference
@@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg)
526 */ 478 */
527static inline void blkg_put(struct blkcg_gq *blkg) 479static inline void blkg_put(struct blkcg_gq *blkg)
528{ 480{
529 percpu_ref_put(&blkg->refcnt); 481 WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0);
482 if (atomic_dec_and_test(&blkg->refcnt))
483 call_rcu(&blkg->rcu_head, __blkg_release_rcu);
530} 484}
531 485
532/** 486/**
@@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q,
579 533
580 rcu_read_lock(); 534 rcu_read_lock();
581 535
582 if (bio && bio->bi_blkg) { 536 blkcg = bio_blkcg(bio);
583 blkcg = bio->bi_blkg->blkcg;
584 if (blkcg == &blkcg_root)
585 goto rl_use_root;
586
587 blkg_get(bio->bi_blkg);
588 rcu_read_unlock();
589 return &bio->bi_blkg->rl;
590 }
591 537
592 blkcg = css_to_blkcg(blkcg_css()); 538 /* bypass blkg lookup and use @q->root_rl directly for root */
593 if (blkcg == &blkcg_root) 539 if (blkcg == &blkcg_root)
594 goto rl_use_root; 540 goto root_rl;
595 541
542 /*
543 * Try to use blkg->rl. blkg lookup may fail under memory pressure
544 * or if either the blkcg or queue is going away. Fall back to
545 * root_rl in such cases.
546 */
596 blkg = blkg_lookup(blkcg, q); 547 blkg = blkg_lookup(blkcg, q);
597 if (unlikely(!blkg)) 548 if (unlikely(!blkg))
598 blkg = __blkg_lookup_create(blkcg, q); 549 goto root_rl;
599
600 if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg))
601 goto rl_use_root;
602 550
551 blkg_get(blkg);
603 rcu_read_unlock(); 552 rcu_read_unlock();
604 return &blkg->rl; 553 return &blkg->rl;
605 554root_rl:
606 /*
607 * Each blkg has its own request_list, however, the root blkcg
608 * uses the request_queue's root_rl. This is to avoid most
609 * overhead for the root blkcg.
610 */
611rl_use_root:
612 rcu_read_unlock(); 555 rcu_read_unlock();
613 return &q->root_rl; 556 return &q->root_rl;
614} 557}
@@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg
854 struct bio *bio) { return false; } 797 struct bio *bio) { return false; }
855#endif 798#endif
856 799
857
858static inline void blkcg_bio_issue_init(struct bio *bio)
859{
860 bio_issue_init(&bio->bi_issue, bio_sectors(bio));
861}
862
863static inline bool blkcg_bio_issue_check(struct request_queue *q, 800static inline bool blkcg_bio_issue_check(struct request_queue *q,
864 struct bio *bio) 801 struct bio *bio)
865{ 802{
803 struct blkcg *blkcg;
866 struct blkcg_gq *blkg; 804 struct blkcg_gq *blkg;
867 bool throtl = false; 805 bool throtl = false;
868 806
869 rcu_read_lock(); 807 rcu_read_lock();
808 blkcg = bio_blkcg(bio);
809
810 /* associate blkcg if bio hasn't attached one */
811 bio_associate_blkcg(bio, &blkcg->css);
870 812
871 bio_associate_create_blkg(q, bio); 813 blkg = blkg_lookup(blkcg, q);
872 blkg = bio->bi_blkg; 814 if (unlikely(!blkg)) {
815 spin_lock_irq(q->queue_lock);
816 blkg = blkg_lookup_create(blkcg, q);
817 if (IS_ERR(blkg))
818 blkg = NULL;
819 spin_unlock_irq(q->queue_lock);
820 }
873 821
874 throtl = blk_throtl_bio(q, blkg, bio); 822 throtl = blk_throtl_bio(q, blkg, bio);
875 823
876 if (!throtl) { 824 if (!throtl) {
825 blkg = blkg ?: q->root_blkg;
877 /* 826 /*
878 * If the bio is flagged with BIO_QUEUE_ENTERED it means this 827 * If the bio is flagged with BIO_QUEUE_ENTERED it means this
879 * is a split bio and we would have already accounted for the 828 * is a split bio and we would have already accounted for the
@@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q,
885 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); 834 blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1);
886 } 835 }
887 836
888 blkcg_bio_issue_init(bio);
889
890 rcu_read_unlock(); 837 rcu_read_unlock();
891 return !throtl; 838 return !throtl;
892} 839}
@@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q,
983static inline void blkcg_deactivate_policy(struct request_queue *q, 930static inline void blkcg_deactivate_policy(struct request_queue *q,
984 const struct blkcg_policy *pol) { } 931 const struct blkcg_policy *pol) { }
985 932
986static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; }
987static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } 933static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; }
988 934
989static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, 935static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
@@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { }
999static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } 945static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { }
1000static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } 946static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; }
1001 947
1002static inline void blkcg_bio_issue_init(struct bio *bio) { }
1003static inline bool blkcg_bio_issue_check(struct request_queue *q, 948static inline bool blkcg_bio_issue_check(struct request_queue *q,
1004 struct bio *bio) { return true; } 949 struct bio *bio) { return true; }
1005 950
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 093a818c5b68..1dcf652ba0aa 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -178,6 +178,7 @@ struct bio {
178 * release. Read comment on top of bio_associate_current(). 178 * release. Read comment on top of bio_associate_current().
179 */ 179 */
180 struct io_context *bi_ioc; 180 struct io_context *bi_ioc;
181 struct cgroup_subsys_state *bi_css;
181 struct blkcg_gq *bi_blkg; 182 struct blkcg_gq *bi_blkg;
182 struct bio_issue bi_issue; 183 struct bio_issue bi_issue;
183#endif 184#endif
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 9968332cceed..9d12757a65b0 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -93,8 +93,6 @@ extern struct css_set init_css_set;
93 93
94bool css_has_online_children(struct cgroup_subsys_state *css); 94bool css_has_online_children(struct cgroup_subsys_state *css);
95struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); 95struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss);
96struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup,
97 struct cgroup_subsys *ss);
98struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, 96struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
99 struct cgroup_subsys *ss); 97 struct cgroup_subsys *ss);
100struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, 98struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 738a0c24874f..fdfd04e348f6 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc,
246 * 246 *
247 * @bio is a part of the writeback in progress controlled by @wbc. Perform 247 * @bio is a part of the writeback in progress controlled by @wbc. Perform
248 * writeback specific initialization. This is used to apply the cgroup 248 * writeback specific initialization. This is used to apply the cgroup
249 * writeback context. Must be called after the bio has been associated with 249 * writeback context.
250 * a device.
251 */ 250 */
252static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) 251static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
253{ 252{
@@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio)
258 * regular writeback instead of writing things out itself. 257 * regular writeback instead of writing things out itself.
259 */ 258 */
260 if (wbc->wb) 259 if (wbc->wb)
261 bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); 260 bio_associate_blkcg(bio, wbc->wb->blkcg_css);
262} 261}
263 262
264#else /* CONFIG_CGROUP_WRITEBACK */ 263#else /* CONFIG_CGROUP_WRITEBACK */
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 8b79318810ad..6aaf5dd5383b 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
493} 493}
494 494
495/** 495/**
496 * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss 496 * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
497 * @cgrp: the cgroup of interest 497 * @cgrp: the cgroup of interest
498 * @ss: the subsystem of interest (%NULL returns @cgrp->self) 498 * @ss: the subsystem of interest (%NULL returns @cgrp->self)
499 * 499 *
@@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
502 * enabled. If @ss is associated with the hierarchy @cgrp is on, this 502 * enabled. If @ss is associated with the hierarchy @cgrp is on, this
503 * function is guaranteed to return non-NULL css. 503 * function is guaranteed to return non-NULL css.
504 */ 504 */
505static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, 505static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
506 struct cgroup_subsys *ss) 506 struct cgroup_subsys *ss)
507{ 507{
508 lockdep_assert_held(&cgroup_mutex); 508 lockdep_assert_held(&cgroup_mutex);
509 509
@@ -524,35 +524,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
524} 524}
525 525
526/** 526/**
527 * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
528 * @cgrp: the cgroup of interest
529 * @ss: the subsystem of interest
530 *
531 * Find and get the effective css of @cgrp for @ss. The effective css is
532 * defined as the matching css of the nearest ancestor including self which
533 * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
534 * the root css is returned, so this function always returns a valid css.
535 *
536 * The returned css is not guaranteed to be online, and therefore it is the
537 * callers responsiblity to tryget a reference for it.
538 */
539struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
540 struct cgroup_subsys *ss)
541{
542 struct cgroup_subsys_state *css;
543
544 do {
545 css = cgroup_css(cgrp, ss);
546
547 if (css)
548 return css;
549 cgrp = cgroup_parent(cgrp);
550 } while (cgrp);
551
552 return init_css_set.subsys[ss->id];
553}
554
555/**
556 * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem 527 * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
557 * @cgrp: the cgroup of interest 528 * @cgrp: the cgroup of interest
558 * @ss: the subsystem of interest 529 * @ss: the subsystem of interest
@@ -634,11 +605,10 @@ EXPORT_SYMBOL_GPL(of_css);
634 * 605 *
635 * Should be called under cgroup_[tree_]mutex. 606 * Should be called under cgroup_[tree_]mutex.
636 */ 607 */
637#define for_each_e_css(css, ssid, cgrp) \ 608#define for_each_e_css(css, ssid, cgrp) \
638 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ 609 for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
639 if (!((css) = cgroup_e_css_by_mask(cgrp, \ 610 if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \
640 cgroup_subsys[(ssid)]))) \ 611 ; \
641 ; \
642 else 612 else
643 613
644/** 614/**
@@ -1037,7 +1007,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
1037 * @ss is in this hierarchy, so we want the 1007 * @ss is in this hierarchy, so we want the
1038 * effective css from @cgrp. 1008 * effective css from @cgrp.
1039 */ 1009 */
1040 template[i] = cgroup_e_css_by_mask(cgrp, ss); 1010 template[i] = cgroup_e_css(cgrp, ss);
1041 } else { 1011 } else {
1042 /* 1012 /*
1043 * @ss is not in this hierarchy, so we don't want 1013 * @ss is not in this hierarchy, so we don't want
@@ -3054,7 +3024,7 @@ static int cgroup_apply_control(struct cgroup *cgrp)
3054 return ret; 3024 return ret;
3055 3025
3056 /* 3026 /*
3057 * At this point, cgroup_e_css_by_mask() results reflect the new csses 3027 * At this point, cgroup_e_css() results reflect the new csses
3058 * making the following cgroup_update_dfl_csses() properly update 3028 * making the following cgroup_update_dfl_csses() properly update
3059 * css associations of all tasks in the subtree. 3029 * css associations of all tasks in the subtree.
3060 */ 3030 */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index fac0ddf8a8e2..2868d85f1fb1 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)
764 if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) 764 if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))
765 return NULL; 765 return NULL;
766 766
767 if (!bio->bi_blkg) 767 if (!bio->bi_css)
768 return NULL; 768 return NULL;
769 return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup); 769 return cgroup_get_kernfs_id(bio->bi_css->cgroup);
770} 770}
771#else 771#else
772static union kernfs_node_id * 772static union kernfs_node_id *
diff --git a/mm/page_io.c b/mm/page_io.c
index 27b835728442..d4d1c89bcddd 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
339 goto out; 339 goto out;
340 } 340 }
341 bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); 341 bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc);
342 bio_associate_blkg_from_page(bio, page); 342 bio_associate_blkcg_from_page(bio, page);
343 count_swpout_vm_event(page); 343 count_swpout_vm_event(page);
344 set_page_writeback(page); 344 set_page_writeback(page);
345 unlock_page(page); 345 unlock_page(page);