diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-11-02 14:25:48 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-11-02 14:25:48 -0400 |
commit | 5f21585384a4a69b8bfdd2cae7e3648ae805f57d (patch) | |
tree | b976d6e847b7209fb54cf78821a59951a7e9e8cd | |
parent | fcc37f76a995cc08546b88b83f9bb5da11307a0b (diff) | |
parent | 9fe5c59ff6a1e5e26a39b75489a1420e7eaaf0b1 (diff) |
Merge tag 'for-linus-20181102' of git://git.kernel.dk/linux-block
Pull block layer fixes from Jens Axboe:
"The biggest part of this pull request is the revert of the blkcg
cleanup series. It had one fix earlier for a stacked device issue, but
another one was reported. Rather than play whack-a-mole with this,
revert the entire series and try again for the next kernel release.
Apart from that, only small fixes/changes.
Summary:
- Indentation fixup for mtip32xx (Colin Ian King)
- The blkcg cleanup series revert (Dennis Zhou)
- Two NVMe fixes. One fixing a regression in the nvme request
initialization in this merge window, causing nvme-fc to not work.
The other is a suspend/resume p2p resource issue (James, Keith)
- Fix sg discard merge, allowing us to merge in cases where we didn't
before (Jianchao Wang)
- Call rq_qos_exit() after the queue is frozen, preventing a hang
(Ming)
- Fix brd queue setup, fixing an oops if we fail setting up all
devices (Ming)"
* tag 'for-linus-20181102' of git://git.kernel.dk/linux-block:
nvme-pci: fix conflicting p2p resource adds
nvme-fc: fix request private initialization
blkcg: revert blkcg cleanups series
block: brd: associate with queue until adding disk
block: call rq_qos_exit() after queue is frozen
mtip32xx: clean an indentation issue, remove extraneous tabs
block: fix the DISCARD request merge
-rw-r--r-- | Documentation/admin-guide/cgroup-v2.rst | 8 | ||||
-rw-r--r-- | block/bfq-cgroup.c | 4 | ||||
-rw-r--r-- | block/bfq-iosched.c | 2 | ||||
-rw-r--r-- | block/bio.c | 174 | ||||
-rw-r--r-- | block/blk-cgroup.c | 123 | ||||
-rw-r--r-- | block/blk-core.c | 4 | ||||
-rw-r--r-- | block/blk-iolatency.c | 26 | ||||
-rw-r--r-- | block/blk-merge.c | 46 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/blk-throttle.c | 13 | ||||
-rw-r--r-- | block/bounce.c | 4 | ||||
-rw-r--r-- | block/cfq-iosched.c | 4 | ||||
-rw-r--r-- | drivers/block/brd.c | 16 | ||||
-rw-r--r-- | drivers/block/loop.c | 5 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 4 | ||||
-rw-r--r-- | drivers/md/raid0.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 5 | ||||
-rw-r--r-- | fs/buffer.c | 10 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 2 | ||||
-rw-r--r-- | include/linux/bio.h | 26 | ||||
-rw-r--r-- | include/linux/blk-cgroup.h | 145 | ||||
-rw-r--r-- | include/linux/blk_types.h | 1 | ||||
-rw-r--r-- | include/linux/cgroup.h | 2 | ||||
-rw-r--r-- | include/linux/writeback.h | 5 | ||||
-rw-r--r-- | kernel/cgroup/cgroup.c | 48 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 4 | ||||
-rw-r--r-- | mm/page_io.c | 2 |
28 files changed, 265 insertions, 424 deletions
diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 8384c681a4b2..476722b7b636 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst | |||
@@ -1879,10 +1879,8 @@ following two functions. | |||
1879 | 1879 | ||
1880 | wbc_init_bio(@wbc, @bio) | 1880 | wbc_init_bio(@wbc, @bio) |
1881 | Should be called for each bio carrying writeback data and | 1881 | Should be called for each bio carrying writeback data and |
1882 | associates the bio with the inode's owner cgroup and the | 1882 | associates the bio with the inode's owner cgroup. Can be |
1883 | corresponding request queue. This must be called after | 1883 | called anytime between bio allocation and submission. |
1884 | a queue (device) has been associated with the bio and | ||
1885 | before submission. | ||
1886 | 1884 | ||
1887 | wbc_account_io(@wbc, @page, @bytes) | 1885 | wbc_account_io(@wbc, @page, @bytes) |
1888 | Should be called for each data segment being written out. | 1886 | Should be called for each data segment being written out. |
@@ -1901,7 +1899,7 @@ the configuration, the bio may be executed at a lower priority and if | |||
1901 | the writeback session is holding shared resources, e.g. a journal | 1899 | the writeback session is holding shared resources, e.g. a journal |
1902 | entry, may lead to priority inversion. There is no one easy solution | 1900 | entry, may lead to priority inversion. There is no one easy solution |
1903 | for the problem. Filesystems can try to work around specific problem | 1901 | for the problem. Filesystems can try to work around specific problem |
1904 | cases by skipping wbc_init_bio() or using bio_associate_create_blkg() | 1902 | cases by skipping wbc_init_bio() or using bio_associate_blkcg() |
1905 | directly. | 1903 | directly. |
1906 | 1904 | ||
1907 | 1905 | ||
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index d9a7916ff0ab..9fe5952d117d 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c | |||
@@ -642,7 +642,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) | |||
642 | uint64_t serial_nr; | 642 | uint64_t serial_nr; |
643 | 643 | ||
644 | rcu_read_lock(); | 644 | rcu_read_lock(); |
645 | serial_nr = __bio_blkcg(bio)->css.serial_nr; | 645 | serial_nr = bio_blkcg(bio)->css.serial_nr; |
646 | 646 | ||
647 | /* | 647 | /* |
648 | * Check whether blkcg has changed. The condition may trigger | 648 | * Check whether blkcg has changed. The condition may trigger |
@@ -651,7 +651,7 @@ void bfq_bic_update_cgroup(struct bfq_io_cq *bic, struct bio *bio) | |||
651 | if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) | 651 | if (unlikely(!bfqd) || likely(bic->blkcg_serial_nr == serial_nr)) |
652 | goto out; | 652 | goto out; |
653 | 653 | ||
654 | bfqg = __bfq_bic_change_cgroup(bfqd, bic, __bio_blkcg(bio)); | 654 | bfqg = __bfq_bic_change_cgroup(bfqd, bic, bio_blkcg(bio)); |
655 | /* | 655 | /* |
656 | * Update blkg_path for bfq_log_* functions. We cache this | 656 | * Update blkg_path for bfq_log_* functions. We cache this |
657 | * path, and update it here, for the following | 657 | * path, and update it here, for the following |
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c index 6075100f03a5..3a27d31fcda6 100644 --- a/block/bfq-iosched.c +++ b/block/bfq-iosched.c | |||
@@ -4384,7 +4384,7 @@ static struct bfq_queue *bfq_get_queue(struct bfq_data *bfqd, | |||
4384 | 4384 | ||
4385 | rcu_read_lock(); | 4385 | rcu_read_lock(); |
4386 | 4386 | ||
4387 | bfqg = bfq_find_set_group(bfqd, __bio_blkcg(bio)); | 4387 | bfqg = bfq_find_set_group(bfqd, bio_blkcg(bio)); |
4388 | if (!bfqg) { | 4388 | if (!bfqg) { |
4389 | bfqq = &bfqd->oom_bfqq; | 4389 | bfqq = &bfqd->oom_bfqq; |
4390 | goto out; | 4390 | goto out; |
diff --git a/block/bio.c b/block/bio.c index c27f77befbac..d5368a445561 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -609,9 +609,7 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) | |||
609 | bio->bi_iter = bio_src->bi_iter; | 609 | bio->bi_iter = bio_src->bi_iter; |
610 | bio->bi_io_vec = bio_src->bi_io_vec; | 610 | bio->bi_io_vec = bio_src->bi_io_vec; |
611 | 611 | ||
612 | bio_clone_blkg_association(bio, bio_src); | 612 | bio_clone_blkcg_association(bio, bio_src); |
613 | |||
614 | blkcg_bio_issue_init(bio); | ||
615 | } | 613 | } |
616 | EXPORT_SYMBOL(__bio_clone_fast); | 614 | EXPORT_SYMBOL(__bio_clone_fast); |
617 | 615 | ||
@@ -1956,151 +1954,69 @@ EXPORT_SYMBOL(bioset_init_from_src); | |||
1956 | 1954 | ||
1957 | #ifdef CONFIG_BLK_CGROUP | 1955 | #ifdef CONFIG_BLK_CGROUP |
1958 | 1956 | ||
1959 | /** | ||
1960 | * bio_associate_blkg - associate a bio with the a blkg | ||
1961 | * @bio: target bio | ||
1962 | * @blkg: the blkg to associate | ||
1963 | * | ||
1964 | * This tries to associate @bio with the specified blkg. Association failure | ||
1965 | * is handled by walking up the blkg tree. Therefore, the blkg associated can | ||
1966 | * be anything between @blkg and the root_blkg. This situation only happens | ||
1967 | * when a cgroup is dying and then the remaining bios will spill to the closest | ||
1968 | * alive blkg. | ||
1969 | * | ||
1970 | * A reference will be taken on the @blkg and will be released when @bio is | ||
1971 | * freed. | ||
1972 | */ | ||
1973 | int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) | ||
1974 | { | ||
1975 | if (unlikely(bio->bi_blkg)) | ||
1976 | return -EBUSY; | ||
1977 | bio->bi_blkg = blkg_tryget_closest(blkg); | ||
1978 | return 0; | ||
1979 | } | ||
1980 | |||
1981 | /** | ||
1982 | * __bio_associate_blkg_from_css - internal blkg association function | ||
1983 | * | ||
1984 | * This in the core association function that all association paths rely on. | ||
1985 | * A blkg reference is taken which is released upon freeing of the bio. | ||
1986 | */ | ||
1987 | static int __bio_associate_blkg_from_css(struct bio *bio, | ||
1988 | struct cgroup_subsys_state *css) | ||
1989 | { | ||
1990 | struct request_queue *q = bio->bi_disk->queue; | ||
1991 | struct blkcg_gq *blkg; | ||
1992 | int ret; | ||
1993 | |||
1994 | rcu_read_lock(); | ||
1995 | |||
1996 | if (!css || !css->parent) | ||
1997 | blkg = q->root_blkg; | ||
1998 | else | ||
1999 | blkg = blkg_lookup_create(css_to_blkcg(css), q); | ||
2000 | |||
2001 | ret = bio_associate_blkg(bio, blkg); | ||
2002 | |||
2003 | rcu_read_unlock(); | ||
2004 | return ret; | ||
2005 | } | ||
2006 | |||
2007 | /** | ||
2008 | * bio_associate_blkg_from_css - associate a bio with a specified css | ||
2009 | * @bio: target bio | ||
2010 | * @css: target css | ||
2011 | * | ||
2012 | * Associate @bio with the blkg found by combining the css's blkg and the | ||
2013 | * request_queue of the @bio. This falls back to the queue's root_blkg if | ||
2014 | * the association fails with the css. | ||
2015 | */ | ||
2016 | int bio_associate_blkg_from_css(struct bio *bio, | ||
2017 | struct cgroup_subsys_state *css) | ||
2018 | { | ||
2019 | if (unlikely(bio->bi_blkg)) | ||
2020 | return -EBUSY; | ||
2021 | return __bio_associate_blkg_from_css(bio, css); | ||
2022 | } | ||
2023 | EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); | ||
2024 | |||
2025 | #ifdef CONFIG_MEMCG | 1957 | #ifdef CONFIG_MEMCG |
2026 | /** | 1958 | /** |
2027 | * bio_associate_blkg_from_page - associate a bio with the page's blkg | 1959 | * bio_associate_blkcg_from_page - associate a bio with the page's blkcg |
2028 | * @bio: target bio | 1960 | * @bio: target bio |
2029 | * @page: the page to lookup the blkcg from | 1961 | * @page: the page to lookup the blkcg from |
2030 | * | 1962 | * |
2031 | * Associate @bio with the blkg from @page's owning memcg and the respective | 1963 | * Associate @bio with the blkcg from @page's owning memcg. This works like |
2032 | * request_queue. If cgroup_e_css returns NULL, fall back to the queue's | 1964 | * every other associate function wrt references. |
2033 | * root_blkg. | ||
2034 | * | ||
2035 | * Note: this must be called after bio has an associated device. | ||
2036 | */ | 1965 | */ |
2037 | int bio_associate_blkg_from_page(struct bio *bio, struct page *page) | 1966 | int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) |
2038 | { | 1967 | { |
2039 | struct cgroup_subsys_state *css; | 1968 | struct cgroup_subsys_state *blkcg_css; |
2040 | int ret; | ||
2041 | 1969 | ||
2042 | if (unlikely(bio->bi_blkg)) | 1970 | if (unlikely(bio->bi_css)) |
2043 | return -EBUSY; | 1971 | return -EBUSY; |
2044 | if (!page->mem_cgroup) | 1972 | if (!page->mem_cgroup) |
2045 | return 0; | 1973 | return 0; |
2046 | 1974 | blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, | |
2047 | rcu_read_lock(); | 1975 | &io_cgrp_subsys); |
2048 | 1976 | bio->bi_css = blkcg_css; | |
2049 | css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); | 1977 | return 0; |
2050 | |||
2051 | ret = __bio_associate_blkg_from_css(bio, css); | ||
2052 | |||
2053 | rcu_read_unlock(); | ||
2054 | return ret; | ||
2055 | } | 1978 | } |
2056 | #endif /* CONFIG_MEMCG */ | 1979 | #endif /* CONFIG_MEMCG */ |
2057 | 1980 | ||
2058 | /** | 1981 | /** |
2059 | * bio_associate_create_blkg - associate a bio with a blkg from q | 1982 | * bio_associate_blkcg - associate a bio with the specified blkcg |
2060 | * @q: request_queue where bio is going | ||
2061 | * @bio: target bio | 1983 | * @bio: target bio |
1984 | * @blkcg_css: css of the blkcg to associate | ||
1985 | * | ||
1986 | * Associate @bio with the blkcg specified by @blkcg_css. Block layer will | ||
1987 | * treat @bio as if it were issued by a task which belongs to the blkcg. | ||
2062 | * | 1988 | * |
2063 | * Associate @bio with the blkg found from the bio's css and the request_queue. | 1989 | * This function takes an extra reference of @blkcg_css which will be put |
2064 | * If one is not found, bio_lookup_blkg creates the blkg. This falls back to | 1990 | * when @bio is released. The caller must own @bio and is responsible for |
2065 | * the queue's root_blkg if association fails. | 1991 | * synchronizing calls to this function. |
2066 | */ | 1992 | */ |
2067 | int bio_associate_create_blkg(struct request_queue *q, struct bio *bio) | 1993 | int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) |
2068 | { | 1994 | { |
2069 | struct cgroup_subsys_state *css; | 1995 | if (unlikely(bio->bi_css)) |
2070 | int ret = 0; | 1996 | return -EBUSY; |
2071 | 1997 | css_get(blkcg_css); | |
2072 | /* someone has already associated this bio with a blkg */ | 1998 | bio->bi_css = blkcg_css; |
2073 | if (bio->bi_blkg) | 1999 | return 0; |
2074 | return ret; | ||
2075 | |||
2076 | rcu_read_lock(); | ||
2077 | |||
2078 | css = blkcg_css(); | ||
2079 | |||
2080 | ret = __bio_associate_blkg_from_css(bio, css); | ||
2081 | |||
2082 | rcu_read_unlock(); | ||
2083 | return ret; | ||
2084 | } | 2000 | } |
2001 | EXPORT_SYMBOL_GPL(bio_associate_blkcg); | ||
2085 | 2002 | ||
2086 | /** | 2003 | /** |
2087 | * bio_reassociate_blkg - reassociate a bio with a blkg from q | 2004 | * bio_associate_blkg - associate a bio with the specified blkg |
2088 | * @q: request_queue where bio is going | ||
2089 | * @bio: target bio | 2005 | * @bio: target bio |
2006 | * @blkg: the blkg to associate | ||
2090 | * | 2007 | * |
2091 | * When submitting a bio, multiple recursive calls to make_request() may occur. | 2008 | * Associate @bio with the blkg specified by @blkg. This is the queue specific |
2092 | * This causes the initial associate done in blkcg_bio_issue_check() to be | 2009 | * blkcg information associated with the @bio, a reference will be taken on the |
2093 | * incorrect and reference the prior request_queue. This performs reassociation | 2010 | * @blkg and will be freed when the bio is freed. |
2094 | * when this situation happens. | ||
2095 | */ | 2011 | */ |
2096 | int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) | 2012 | int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) |
2097 | { | 2013 | { |
2098 | if (bio->bi_blkg) { | 2014 | if (unlikely(bio->bi_blkg)) |
2099 | blkg_put(bio->bi_blkg); | 2015 | return -EBUSY; |
2100 | bio->bi_blkg = NULL; | 2016 | if (!blkg_try_get(blkg)) |
2101 | } | 2017 | return -ENODEV; |
2102 | 2018 | bio->bi_blkg = blkg; | |
2103 | return bio_associate_create_blkg(q, bio); | 2019 | return 0; |
2104 | } | 2020 | } |
2105 | 2021 | ||
2106 | /** | 2022 | /** |
@@ -2113,6 +2029,10 @@ void bio_disassociate_task(struct bio *bio) | |||
2113 | put_io_context(bio->bi_ioc); | 2029 | put_io_context(bio->bi_ioc); |
2114 | bio->bi_ioc = NULL; | 2030 | bio->bi_ioc = NULL; |
2115 | } | 2031 | } |
2032 | if (bio->bi_css) { | ||
2033 | css_put(bio->bi_css); | ||
2034 | bio->bi_css = NULL; | ||
2035 | } | ||
2116 | if (bio->bi_blkg) { | 2036 | if (bio->bi_blkg) { |
2117 | blkg_put(bio->bi_blkg); | 2037 | blkg_put(bio->bi_blkg); |
2118 | bio->bi_blkg = NULL; | 2038 | bio->bi_blkg = NULL; |
@@ -2120,16 +2040,16 @@ void bio_disassociate_task(struct bio *bio) | |||
2120 | } | 2040 | } |
2121 | 2041 | ||
2122 | /** | 2042 | /** |
2123 | * bio_clone_blkg_association - clone blkg association from src to dst bio | 2043 | * bio_clone_blkcg_association - clone blkcg association from src to dst bio |
2124 | * @dst: destination bio | 2044 | * @dst: destination bio |
2125 | * @src: source bio | 2045 | * @src: source bio |
2126 | */ | 2046 | */ |
2127 | void bio_clone_blkg_association(struct bio *dst, struct bio *src) | 2047 | void bio_clone_blkcg_association(struct bio *dst, struct bio *src) |
2128 | { | 2048 | { |
2129 | if (src->bi_blkg) | 2049 | if (src->bi_css) |
2130 | bio_associate_blkg(dst, src->bi_blkg); | 2050 | WARN_ON(bio_associate_blkcg(dst, src->bi_css)); |
2131 | } | 2051 | } |
2132 | EXPORT_SYMBOL_GPL(bio_clone_blkg_association); | 2052 | EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); |
2133 | #endif /* CONFIG_BLK_CGROUP */ | 2053 | #endif /* CONFIG_BLK_CGROUP */ |
2134 | 2054 | ||
2135 | static void __init biovec_init_slabs(void) | 2055 | static void __init biovec_init_slabs(void) |
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 992da5592c6e..c630e02836a8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -84,37 +84,6 @@ static void blkg_free(struct blkcg_gq *blkg) | |||
84 | kfree(blkg); | 84 | kfree(blkg); |
85 | } | 85 | } |
86 | 86 | ||
87 | static void __blkg_release(struct rcu_head *rcu) | ||
88 | { | ||
89 | struct blkcg_gq *blkg = container_of(rcu, struct blkcg_gq, rcu_head); | ||
90 | |||
91 | percpu_ref_exit(&blkg->refcnt); | ||
92 | |||
93 | /* release the blkcg and parent blkg refs this blkg has been holding */ | ||
94 | css_put(&blkg->blkcg->css); | ||
95 | if (blkg->parent) | ||
96 | blkg_put(blkg->parent); | ||
97 | |||
98 | wb_congested_put(blkg->wb_congested); | ||
99 | |||
100 | blkg_free(blkg); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * A group is RCU protected, but having an rcu lock does not mean that one | ||
105 | * can access all the fields of blkg and assume these are valid. For | ||
106 | * example, don't try to follow throtl_data and request queue links. | ||
107 | * | ||
108 | * Having a reference to blkg under an rcu allows accesses to only values | ||
109 | * local to groups like group stats and group rate limits. | ||
110 | */ | ||
111 | static void blkg_release(struct percpu_ref *ref) | ||
112 | { | ||
113 | struct blkcg_gq *blkg = container_of(ref, struct blkcg_gq, refcnt); | ||
114 | |||
115 | call_rcu(&blkg->rcu_head, __blkg_release); | ||
116 | } | ||
117 | |||
118 | /** | 87 | /** |
119 | * blkg_alloc - allocate a blkg | 88 | * blkg_alloc - allocate a blkg |
120 | * @blkcg: block cgroup the new blkg is associated with | 89 | * @blkcg: block cgroup the new blkg is associated with |
@@ -141,6 +110,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, | |||
141 | blkg->q = q; | 110 | blkg->q = q; |
142 | INIT_LIST_HEAD(&blkg->q_node); | 111 | INIT_LIST_HEAD(&blkg->q_node); |
143 | blkg->blkcg = blkcg; | 112 | blkg->blkcg = blkcg; |
113 | atomic_set(&blkg->refcnt, 1); | ||
144 | 114 | ||
145 | /* root blkg uses @q->root_rl, init rl only for !root blkgs */ | 115 | /* root blkg uses @q->root_rl, init rl only for !root blkgs */ |
146 | if (blkcg != &blkcg_root) { | 116 | if (blkcg != &blkcg_root) { |
@@ -247,11 +217,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, | |||
247 | blkg_get(blkg->parent); | 217 | blkg_get(blkg->parent); |
248 | } | 218 | } |
249 | 219 | ||
250 | ret = percpu_ref_init(&blkg->refcnt, blkg_release, 0, | ||
251 | GFP_NOWAIT | __GFP_NOWARN); | ||
252 | if (ret) | ||
253 | goto err_cancel_ref; | ||
254 | |||
255 | /* invoke per-policy init */ | 220 | /* invoke per-policy init */ |
256 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 221 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
257 | struct blkcg_policy *pol = blkcg_policy[i]; | 222 | struct blkcg_policy *pol = blkcg_policy[i]; |
@@ -284,8 +249,6 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, | |||
284 | blkg_put(blkg); | 249 | blkg_put(blkg); |
285 | return ERR_PTR(ret); | 250 | return ERR_PTR(ret); |
286 | 251 | ||
287 | err_cancel_ref: | ||
288 | percpu_ref_exit(&blkg->refcnt); | ||
289 | err_put_congested: | 252 | err_put_congested: |
290 | wb_congested_put(wb_congested); | 253 | wb_congested_put(wb_congested); |
291 | err_put_css: | 254 | err_put_css: |
@@ -296,7 +259,7 @@ err_free_blkg: | |||
296 | } | 259 | } |
297 | 260 | ||
298 | /** | 261 | /** |
299 | * __blkg_lookup_create - lookup blkg, try to create one if not there | 262 | * blkg_lookup_create - lookup blkg, try to create one if not there |
300 | * @blkcg: blkcg of interest | 263 | * @blkcg: blkcg of interest |
301 | * @q: request_queue of interest | 264 | * @q: request_queue of interest |
302 | * | 265 | * |
@@ -305,11 +268,12 @@ err_free_blkg: | |||
305 | * that all non-root blkg's have access to the parent blkg. This function | 268 | * that all non-root blkg's have access to the parent blkg. This function |
306 | * should be called under RCU read lock and @q->queue_lock. | 269 | * should be called under RCU read lock and @q->queue_lock. |
307 | * | 270 | * |
308 | * Returns the blkg or the closest blkg if blkg_create fails as it walks | 271 | * Returns pointer to the looked up or created blkg on success, ERR_PTR() |
309 | * down from root. | 272 | * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not |
273 | * dead and bypassing, returns ERR_PTR(-EBUSY). | ||
310 | */ | 274 | */ |
311 | struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | 275 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
312 | struct request_queue *q) | 276 | struct request_queue *q) |
313 | { | 277 | { |
314 | struct blkcg_gq *blkg; | 278 | struct blkcg_gq *blkg; |
315 | 279 | ||
@@ -321,7 +285,7 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | |||
321 | * we shouldn't allow anything to go through for a bypassing queue. | 285 | * we shouldn't allow anything to go through for a bypassing queue. |
322 | */ | 286 | */ |
323 | if (unlikely(blk_queue_bypass(q))) | 287 | if (unlikely(blk_queue_bypass(q))) |
324 | return q->root_blkg; | 288 | return ERR_PTR(blk_queue_dying(q) ? -ENODEV : -EBUSY); |
325 | 289 | ||
326 | blkg = __blkg_lookup(blkcg, q, true); | 290 | blkg = __blkg_lookup(blkcg, q, true); |
327 | if (blkg) | 291 | if (blkg) |
@@ -329,58 +293,23 @@ struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | |||
329 | 293 | ||
330 | /* | 294 | /* |
331 | * Create blkgs walking down from blkcg_root to @blkcg, so that all | 295 | * Create blkgs walking down from blkcg_root to @blkcg, so that all |
332 | * non-root blkgs have access to their parents. Returns the closest | 296 | * non-root blkgs have access to their parents. |
333 | * blkg to the intended blkg should blkg_create() fail. | ||
334 | */ | 297 | */ |
335 | while (true) { | 298 | while (true) { |
336 | struct blkcg *pos = blkcg; | 299 | struct blkcg *pos = blkcg; |
337 | struct blkcg *parent = blkcg_parent(blkcg); | 300 | struct blkcg *parent = blkcg_parent(blkcg); |
338 | struct blkcg_gq *ret_blkg = q->root_blkg; | 301 | |
339 | 302 | while (parent && !__blkg_lookup(parent, q, false)) { | |
340 | while (parent) { | ||
341 | blkg = __blkg_lookup(parent, q, false); | ||
342 | if (blkg) { | ||
343 | /* remember closest blkg */ | ||
344 | ret_blkg = blkg; | ||
345 | break; | ||
346 | } | ||
347 | pos = parent; | 303 | pos = parent; |
348 | parent = blkcg_parent(parent); | 304 | parent = blkcg_parent(parent); |
349 | } | 305 | } |
350 | 306 | ||
351 | blkg = blkg_create(pos, q, NULL); | 307 | blkg = blkg_create(pos, q, NULL); |
352 | if (IS_ERR(blkg)) | 308 | if (pos == blkcg || IS_ERR(blkg)) |
353 | return ret_blkg; | ||
354 | if (pos == blkcg) | ||
355 | return blkg; | 309 | return blkg; |
356 | } | 310 | } |
357 | } | 311 | } |
358 | 312 | ||
359 | /** | ||
360 | * blkg_lookup_create - find or create a blkg | ||
361 | * @blkcg: target block cgroup | ||
362 | * @q: target request_queue | ||
363 | * | ||
364 | * This looks up or creates the blkg representing the unique pair | ||
365 | * of the blkcg and the request_queue. | ||
366 | */ | ||
367 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | ||
368 | struct request_queue *q) | ||
369 | { | ||
370 | struct blkcg_gq *blkg = blkg_lookup(blkcg, q); | ||
371 | unsigned long flags; | ||
372 | |||
373 | if (unlikely(!blkg)) { | ||
374 | spin_lock_irqsave(q->queue_lock, flags); | ||
375 | |||
376 | blkg = __blkg_lookup_create(blkcg, q); | ||
377 | |||
378 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
379 | } | ||
380 | |||
381 | return blkg; | ||
382 | } | ||
383 | |||
384 | static void blkg_destroy(struct blkcg_gq *blkg) | 313 | static void blkg_destroy(struct blkcg_gq *blkg) |
385 | { | 314 | { |
386 | struct blkcg *blkcg = blkg->blkcg; | 315 | struct blkcg *blkcg = blkg->blkcg; |
@@ -424,7 +353,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) | |||
424 | * Put the reference taken at the time of creation so that when all | 353 | * Put the reference taken at the time of creation so that when all |
425 | * queues are gone, group can be destroyed. | 354 | * queues are gone, group can be destroyed. |
426 | */ | 355 | */ |
427 | percpu_ref_kill(&blkg->refcnt); | 356 | blkg_put(blkg); |
428 | } | 357 | } |
429 | 358 | ||
430 | /** | 359 | /** |
@@ -452,6 +381,29 @@ static void blkg_destroy_all(struct request_queue *q) | |||
452 | } | 381 | } |
453 | 382 | ||
454 | /* | 383 | /* |
384 | * A group is RCU protected, but having an rcu lock does not mean that one | ||
385 | * can access all the fields of blkg and assume these are valid. For | ||
386 | * example, don't try to follow throtl_data and request queue links. | ||
387 | * | ||
388 | * Having a reference to blkg under an rcu allows accesses to only values | ||
389 | * local to groups like group stats and group rate limits. | ||
390 | */ | ||
391 | void __blkg_release_rcu(struct rcu_head *rcu_head) | ||
392 | { | ||
393 | struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); | ||
394 | |||
395 | /* release the blkcg and parent blkg refs this blkg has been holding */ | ||
396 | css_put(&blkg->blkcg->css); | ||
397 | if (blkg->parent) | ||
398 | blkg_put(blkg->parent); | ||
399 | |||
400 | wb_congested_put(blkg->wb_congested); | ||
401 | |||
402 | blkg_free(blkg); | ||
403 | } | ||
404 | EXPORT_SYMBOL_GPL(__blkg_release_rcu); | ||
405 | |||
406 | /* | ||
455 | * The next function used by blk_queue_for_each_rl(). It's a bit tricky | 407 | * The next function used by blk_queue_for_each_rl(). It's a bit tricky |
456 | * because the root blkg uses @q->root_rl instead of its own rl. | 408 | * because the root blkg uses @q->root_rl instead of its own rl. |
457 | */ | 409 | */ |
@@ -1796,7 +1748,8 @@ void blkcg_maybe_throttle_current(void) | |||
1796 | blkg = blkg_lookup(blkcg, q); | 1748 | blkg = blkg_lookup(blkcg, q); |
1797 | if (!blkg) | 1749 | if (!blkg) |
1798 | goto out; | 1750 | goto out; |
1799 | if (!blkg_tryget(blkg)) | 1751 | blkg = blkg_try_get(blkg); |
1752 | if (!blkg) | ||
1800 | goto out; | 1753 | goto out; |
1801 | rcu_read_unlock(); | 1754 | rcu_read_unlock(); |
1802 | 1755 | ||
diff --git a/block/blk-core.c b/block/blk-core.c index bc6ea87d10e0..ce12515f9b9b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -785,6 +785,9 @@ void blk_cleanup_queue(struct request_queue *q) | |||
785 | * prevent that q->request_fn() gets invoked after draining finished. | 785 | * prevent that q->request_fn() gets invoked after draining finished. |
786 | */ | 786 | */ |
787 | blk_freeze_queue(q); | 787 | blk_freeze_queue(q); |
788 | |||
789 | rq_qos_exit(q); | ||
790 | |||
788 | spin_lock_irq(lock); | 791 | spin_lock_irq(lock); |
789 | queue_flag_set(QUEUE_FLAG_DEAD, q); | 792 | queue_flag_set(QUEUE_FLAG_DEAD, q); |
790 | spin_unlock_irq(lock); | 793 | spin_unlock_irq(lock); |
@@ -2432,7 +2435,6 @@ blk_qc_t generic_make_request(struct bio *bio) | |||
2432 | if (q) | 2435 | if (q) |
2433 | blk_queue_exit(q); | 2436 | blk_queue_exit(q); |
2434 | q = bio->bi_disk->queue; | 2437 | q = bio->bi_disk->queue; |
2435 | bio_reassociate_blkg(q, bio); | ||
2436 | flags = 0; | 2438 | flags = 0; |
2437 | if (bio->bi_opf & REQ_NOWAIT) | 2439 | if (bio->bi_opf & REQ_NOWAIT) |
2438 | flags = BLK_MQ_REQ_NOWAIT; | 2440 | flags = BLK_MQ_REQ_NOWAIT; |
diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 28f80d227528..38c35c32aff2 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c | |||
@@ -482,12 +482,34 @@ static void blkcg_iolatency_throttle(struct rq_qos *rqos, struct bio *bio, | |||
482 | spinlock_t *lock) | 482 | spinlock_t *lock) |
483 | { | 483 | { |
484 | struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); | 484 | struct blk_iolatency *blkiolat = BLKIOLATENCY(rqos); |
485 | struct blkcg_gq *blkg = bio->bi_blkg; | 485 | struct blkcg *blkcg; |
486 | struct blkcg_gq *blkg; | ||
487 | struct request_queue *q = rqos->q; | ||
486 | bool issue_as_root = bio_issue_as_root_blkg(bio); | 488 | bool issue_as_root = bio_issue_as_root_blkg(bio); |
487 | 489 | ||
488 | if (!blk_iolatency_enabled(blkiolat)) | 490 | if (!blk_iolatency_enabled(blkiolat)) |
489 | return; | 491 | return; |
490 | 492 | ||
493 | rcu_read_lock(); | ||
494 | blkcg = bio_blkcg(bio); | ||
495 | bio_associate_blkcg(bio, &blkcg->css); | ||
496 | blkg = blkg_lookup(blkcg, q); | ||
497 | if (unlikely(!blkg)) { | ||
498 | if (!lock) | ||
499 | spin_lock_irq(q->queue_lock); | ||
500 | blkg = blkg_lookup_create(blkcg, q); | ||
501 | if (IS_ERR(blkg)) | ||
502 | blkg = NULL; | ||
503 | if (!lock) | ||
504 | spin_unlock_irq(q->queue_lock); | ||
505 | } | ||
506 | if (!blkg) | ||
507 | goto out; | ||
508 | |||
509 | bio_issue_init(&bio->bi_issue, bio_sectors(bio)); | ||
510 | bio_associate_blkg(bio, blkg); | ||
511 | out: | ||
512 | rcu_read_unlock(); | ||
491 | while (blkg && blkg->parent) { | 513 | while (blkg && blkg->parent) { |
492 | struct iolatency_grp *iolat = blkg_to_lat(blkg); | 514 | struct iolatency_grp *iolat = blkg_to_lat(blkg); |
493 | if (!iolat) { | 515 | if (!iolat) { |
@@ -708,7 +730,7 @@ static void blkiolatency_timer_fn(struct timer_list *t) | |||
708 | * We could be exiting, don't access the pd unless we have a | 730 | * We could be exiting, don't access the pd unless we have a |
709 | * ref on the blkg. | 731 | * ref on the blkg. |
710 | */ | 732 | */ |
711 | if (!blkg_tryget(blkg)) | 733 | if (!blkg_try_get(blkg)) |
712 | continue; | 734 | continue; |
713 | 735 | ||
714 | iolat = blkg_to_lat(blkg); | 736 | iolat = blkg_to_lat(blkg); |
diff --git a/block/blk-merge.c b/block/blk-merge.c index 42a46744c11b..6b5ad275ed56 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -714,6 +714,31 @@ static void blk_account_io_merge(struct request *req) | |||
714 | part_stat_unlock(); | 714 | part_stat_unlock(); |
715 | } | 715 | } |
716 | } | 716 | } |
717 | /* | ||
718 | * Two cases of handling DISCARD merge: | ||
719 | * If max_discard_segments > 1, the driver takes every bio | ||
720 | * as a range and send them to controller together. The ranges | ||
721 | * needn't to be contiguous. | ||
722 | * Otherwise, the bios/requests will be handled as same as | ||
723 | * others which should be contiguous. | ||
724 | */ | ||
725 | static inline bool blk_discard_mergable(struct request *req) | ||
726 | { | ||
727 | if (req_op(req) == REQ_OP_DISCARD && | ||
728 | queue_max_discard_segments(req->q) > 1) | ||
729 | return true; | ||
730 | return false; | ||
731 | } | ||
732 | |||
733 | enum elv_merge blk_try_req_merge(struct request *req, struct request *next) | ||
734 | { | ||
735 | if (blk_discard_mergable(req)) | ||
736 | return ELEVATOR_DISCARD_MERGE; | ||
737 | else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) | ||
738 | return ELEVATOR_BACK_MERGE; | ||
739 | |||
740 | return ELEVATOR_NO_MERGE; | ||
741 | } | ||
717 | 742 | ||
718 | /* | 743 | /* |
719 | * For non-mq, this has to be called with the request spinlock acquired. | 744 | * For non-mq, this has to be called with the request spinlock acquired. |
@@ -731,12 +756,6 @@ static struct request *attempt_merge(struct request_queue *q, | |||
731 | if (req_op(req) != req_op(next)) | 756 | if (req_op(req) != req_op(next)) |
732 | return NULL; | 757 | return NULL; |
733 | 758 | ||
734 | /* | ||
735 | * not contiguous | ||
736 | */ | ||
737 | if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) | ||
738 | return NULL; | ||
739 | |||
740 | if (rq_data_dir(req) != rq_data_dir(next) | 759 | if (rq_data_dir(req) != rq_data_dir(next) |
741 | || req->rq_disk != next->rq_disk | 760 | || req->rq_disk != next->rq_disk |
742 | || req_no_special_merge(next)) | 761 | || req_no_special_merge(next)) |
@@ -760,11 +779,19 @@ static struct request *attempt_merge(struct request_queue *q, | |||
760 | * counts here. Handle DISCARDs separately, as they | 779 | * counts here. Handle DISCARDs separately, as they |
761 | * have separate settings. | 780 | * have separate settings. |
762 | */ | 781 | */ |
763 | if (req_op(req) == REQ_OP_DISCARD) { | 782 | |
783 | switch (blk_try_req_merge(req, next)) { | ||
784 | case ELEVATOR_DISCARD_MERGE: | ||
764 | if (!req_attempt_discard_merge(q, req, next)) | 785 | if (!req_attempt_discard_merge(q, req, next)) |
765 | return NULL; | 786 | return NULL; |
766 | } else if (!ll_merge_requests_fn(q, req, next)) | 787 | break; |
788 | case ELEVATOR_BACK_MERGE: | ||
789 | if (!ll_merge_requests_fn(q, req, next)) | ||
790 | return NULL; | ||
791 | break; | ||
792 | default: | ||
767 | return NULL; | 793 | return NULL; |
794 | } | ||
768 | 795 | ||
769 | /* | 796 | /* |
770 | * If failfast settings disagree or any of the two is already | 797 | * If failfast settings disagree or any of the two is already |
@@ -888,8 +915,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) | |||
888 | 915 | ||
889 | enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) | 916 | enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) |
890 | { | 917 | { |
891 | if (req_op(rq) == REQ_OP_DISCARD && | 918 | if (blk_discard_mergable(rq)) |
892 | queue_max_discard_segments(rq->q) > 1) | ||
893 | return ELEVATOR_DISCARD_MERGE; | 919 | return ELEVATOR_DISCARD_MERGE; |
894 | else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) | 920 | else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) |
895 | return ELEVATOR_BACK_MERGE; | 921 | return ELEVATOR_BACK_MERGE; |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 0641533597f1..844a454a7b3a 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -1007,8 +1007,6 @@ void blk_unregister_queue(struct gendisk *disk) | |||
1007 | kobject_del(&q->kobj); | 1007 | kobject_del(&q->kobj); |
1008 | blk_trace_remove_sysfs(disk_to_dev(disk)); | 1008 | blk_trace_remove_sysfs(disk_to_dev(disk)); |
1009 | 1009 | ||
1010 | rq_qos_exit(q); | ||
1011 | |||
1012 | mutex_lock(&q->sysfs_lock); | 1010 | mutex_lock(&q->sysfs_lock); |
1013 | if (q->request_fn || (q->mq_ops && q->elevator)) | 1011 | if (q->request_fn || (q->mq_ops && q->elevator)) |
1014 | elv_unregister_queue(q); | 1012 | elv_unregister_queue(q); |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 4bda70e8db48..db1a3a2ae006 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -2115,11 +2115,21 @@ static inline void throtl_update_latency_buckets(struct throtl_data *td) | |||
2115 | } | 2115 | } |
2116 | #endif | 2116 | #endif |
2117 | 2117 | ||
2118 | static void blk_throtl_assoc_bio(struct throtl_grp *tg, struct bio *bio) | ||
2119 | { | ||
2120 | #ifdef CONFIG_BLK_DEV_THROTTLING_LOW | ||
2121 | /* fallback to root_blkg if we fail to get a blkg ref */ | ||
2122 | if (bio->bi_css && (bio_associate_blkg(bio, tg_to_blkg(tg)) == -ENODEV)) | ||
2123 | bio_associate_blkg(bio, bio->bi_disk->queue->root_blkg); | ||
2124 | bio_issue_init(&bio->bi_issue, bio_sectors(bio)); | ||
2125 | #endif | ||
2126 | } | ||
2127 | |||
2118 | bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, | 2128 | bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, |
2119 | struct bio *bio) | 2129 | struct bio *bio) |
2120 | { | 2130 | { |
2121 | struct throtl_qnode *qn = NULL; | 2131 | struct throtl_qnode *qn = NULL; |
2122 | struct throtl_grp *tg = blkg_to_tg(blkg); | 2132 | struct throtl_grp *tg = blkg_to_tg(blkg ?: q->root_blkg); |
2123 | struct throtl_service_queue *sq; | 2133 | struct throtl_service_queue *sq; |
2124 | bool rw = bio_data_dir(bio); | 2134 | bool rw = bio_data_dir(bio); |
2125 | bool throttled = false; | 2135 | bool throttled = false; |
@@ -2138,6 +2148,7 @@ bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg, | |||
2138 | if (unlikely(blk_queue_bypass(q))) | 2148 | if (unlikely(blk_queue_bypass(q))) |
2139 | goto out_unlock; | 2149 | goto out_unlock; |
2140 | 2150 | ||
2151 | blk_throtl_assoc_bio(tg, bio); | ||
2141 | blk_throtl_update_idletime(tg); | 2152 | blk_throtl_update_idletime(tg); |
2142 | 2153 | ||
2143 | sq = &tg->service_queue; | 2154 | sq = &tg->service_queue; |
diff --git a/block/bounce.c b/block/bounce.c index cf49fe02f65c..36869afc258c 100644 --- a/block/bounce.c +++ b/block/bounce.c | |||
@@ -276,9 +276,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask, | |||
276 | } | 276 | } |
277 | } | 277 | } |
278 | 278 | ||
279 | bio_clone_blkg_association(bio, bio_src); | 279 | bio_clone_blkcg_association(bio, bio_src); |
280 | |||
281 | blkcg_bio_issue_init(bio); | ||
282 | 280 | ||
283 | return bio; | 281 | return bio; |
284 | } | 282 | } |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 6a3d87dd3c1a..ed41aa978c4a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -3759,7 +3759,7 @@ static void check_blkcg_changed(struct cfq_io_cq *cic, struct bio *bio) | |||
3759 | uint64_t serial_nr; | 3759 | uint64_t serial_nr; |
3760 | 3760 | ||
3761 | rcu_read_lock(); | 3761 | rcu_read_lock(); |
3762 | serial_nr = __bio_blkcg(bio)->css.serial_nr; | 3762 | serial_nr = bio_blkcg(bio)->css.serial_nr; |
3763 | rcu_read_unlock(); | 3763 | rcu_read_unlock(); |
3764 | 3764 | ||
3765 | /* | 3765 | /* |
@@ -3824,7 +3824,7 @@ cfq_get_queue(struct cfq_data *cfqd, bool is_sync, struct cfq_io_cq *cic, | |||
3824 | struct cfq_group *cfqg; | 3824 | struct cfq_group *cfqg; |
3825 | 3825 | ||
3826 | rcu_read_lock(); | 3826 | rcu_read_lock(); |
3827 | cfqg = cfq_lookup_cfqg(cfqd, __bio_blkcg(bio)); | 3827 | cfqg = cfq_lookup_cfqg(cfqd, bio_blkcg(bio)); |
3828 | if (!cfqg) { | 3828 | if (!cfqg) { |
3829 | cfqq = &cfqd->oom_cfqq; | 3829 | cfqq = &cfqd->oom_cfqq; |
3830 | goto out; | 3830 | goto out; |
diff --git a/drivers/block/brd.c b/drivers/block/brd.c index df8103dd40ac..c18586fccb6f 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c | |||
@@ -396,15 +396,14 @@ static struct brd_device *brd_alloc(int i) | |||
396 | disk->first_minor = i * max_part; | 396 | disk->first_minor = i * max_part; |
397 | disk->fops = &brd_fops; | 397 | disk->fops = &brd_fops; |
398 | disk->private_data = brd; | 398 | disk->private_data = brd; |
399 | disk->queue = brd->brd_queue; | ||
400 | disk->flags = GENHD_FL_EXT_DEVT; | 399 | disk->flags = GENHD_FL_EXT_DEVT; |
401 | sprintf(disk->disk_name, "ram%d", i); | 400 | sprintf(disk->disk_name, "ram%d", i); |
402 | set_capacity(disk, rd_size * 2); | 401 | set_capacity(disk, rd_size * 2); |
403 | disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; | 402 | brd->brd_queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO; |
404 | 403 | ||
405 | /* Tell the block layer that this is not a rotational device */ | 404 | /* Tell the block layer that this is not a rotational device */ |
406 | blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue); | 405 | blk_queue_flag_set(QUEUE_FLAG_NONROT, brd->brd_queue); |
407 | blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, disk->queue); | 406 | blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, brd->brd_queue); |
408 | 407 | ||
409 | return brd; | 408 | return brd; |
410 | 409 | ||
@@ -436,6 +435,7 @@ static struct brd_device *brd_init_one(int i, bool *new) | |||
436 | 435 | ||
437 | brd = brd_alloc(i); | 436 | brd = brd_alloc(i); |
438 | if (brd) { | 437 | if (brd) { |
438 | brd->brd_disk->queue = brd->brd_queue; | ||
439 | add_disk(brd->brd_disk); | 439 | add_disk(brd->brd_disk); |
440 | list_add_tail(&brd->brd_list, &brd_devices); | 440 | list_add_tail(&brd->brd_list, &brd_devices); |
441 | } | 441 | } |
@@ -503,8 +503,14 @@ static int __init brd_init(void) | |||
503 | 503 | ||
504 | /* point of no return */ | 504 | /* point of no return */ |
505 | 505 | ||
506 | list_for_each_entry(brd, &brd_devices, brd_list) | 506 | list_for_each_entry(brd, &brd_devices, brd_list) { |
507 | /* | ||
508 | * associate with queue just before adding disk for | ||
509 | * avoiding to mess up failure path | ||
510 | */ | ||
511 | brd->brd_disk->queue = brd->brd_queue; | ||
507 | add_disk(brd->brd_disk); | 512 | add_disk(brd->brd_disk); |
513 | } | ||
508 | 514 | ||
509 | blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, | 515 | blk_register_region(MKDEV(RAMDISK_MAJOR, 0), 1UL << MINORBITS, |
510 | THIS_MODULE, brd_probe, NULL, NULL); | 516 | THIS_MODULE, brd_probe, NULL, NULL); |
diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e6273ae85246..cb0cc8685076 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c | |||
@@ -77,7 +77,6 @@ | |||
77 | #include <linux/falloc.h> | 77 | #include <linux/falloc.h> |
78 | #include <linux/uio.h> | 78 | #include <linux/uio.h> |
79 | #include <linux/ioprio.h> | 79 | #include <linux/ioprio.h> |
80 | #include <linux/blk-cgroup.h> | ||
81 | 80 | ||
82 | #include "loop.h" | 81 | #include "loop.h" |
83 | 82 | ||
@@ -1760,8 +1759,8 @@ static blk_status_t loop_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
1760 | 1759 | ||
1761 | /* always use the first bio's css */ | 1760 | /* always use the first bio's css */ |
1762 | #ifdef CONFIG_BLK_CGROUP | 1761 | #ifdef CONFIG_BLK_CGROUP |
1763 | if (cmd->use_aio && rq->bio && rq->bio->bi_blkg) { | 1762 | if (cmd->use_aio && rq->bio && rq->bio->bi_css) { |
1764 | cmd->css = &bio_blkcg(rq->bio)->css; | 1763 | cmd->css = rq->bio->bi_css; |
1765 | css_get(cmd->css); | 1764 | css_get(cmd->css); |
1766 | } else | 1765 | } else |
1767 | #endif | 1766 | #endif |
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index dfc8de6ce525..a7daa8acbab3 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c | |||
@@ -1942,8 +1942,8 @@ static int exec_drive_taskfile(struct driver_data *dd, | |||
1942 | dev_warn(&dd->pdev->dev, | 1942 | dev_warn(&dd->pdev->dev, |
1943 | "data movement but " | 1943 | "data movement but " |
1944 | "sect_count is 0\n"); | 1944 | "sect_count is 0\n"); |
1945 | err = -EINVAL; | 1945 | err = -EINVAL; |
1946 | goto abort; | 1946 | goto abort; |
1947 | } | 1947 | } |
1948 | } | 1948 | } |
1949 | } | 1949 | } |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f3fb5bb8c82a..ac1cffd2a09b 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -542,7 +542,7 @@ static void raid0_handle_discard(struct mddev *mddev, struct bio *bio) | |||
542 | !discard_bio) | 542 | !discard_bio) |
543 | continue; | 543 | continue; |
544 | bio_chain(discard_bio, bio); | 544 | bio_chain(discard_bio, bio); |
545 | bio_clone_blkg_association(discard_bio, bio); | 545 | bio_clone_blkcg_association(discard_bio, bio); |
546 | if (mddev->gendisk) | 546 | if (mddev->gendisk) |
547 | trace_block_bio_remap(bdev_get_queue(rdev->bdev), | 547 | trace_block_bio_remap(bdev_get_queue(rdev->bdev), |
548 | discard_bio, disk_devt(mddev->gendisk), | 548 | discard_bio, disk_devt(mddev->gendisk), |
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index e52b9d3c0bd6..0b70c8bab045 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c | |||
@@ -1704,7 +1704,6 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, | |||
1704 | op->fcp_req.rspaddr = &op->rsp_iu; | 1704 | op->fcp_req.rspaddr = &op->rsp_iu; |
1705 | op->fcp_req.rsplen = sizeof(op->rsp_iu); | 1705 | op->fcp_req.rsplen = sizeof(op->rsp_iu); |
1706 | op->fcp_req.done = nvme_fc_fcpio_done; | 1706 | op->fcp_req.done = nvme_fc_fcpio_done; |
1707 | op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; | ||
1708 | op->ctrl = ctrl; | 1707 | op->ctrl = ctrl; |
1709 | op->queue = queue; | 1708 | op->queue = queue; |
1710 | op->rq = rq; | 1709 | op->rq = rq; |
@@ -1752,6 +1751,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, | |||
1752 | if (res) | 1751 | if (res) |
1753 | return res; | 1752 | return res; |
1754 | op->op.fcp_req.first_sgl = &op->sgl[0]; | 1753 | op->op.fcp_req.first_sgl = &op->sgl[0]; |
1754 | op->op.fcp_req.private = &op->priv[0]; | ||
1755 | return res; | 1755 | return res; |
1756 | } | 1756 | } |
1757 | 1757 | ||
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f30031945ee4..c33bb201b884 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -1663,6 +1663,9 @@ static void nvme_map_cmb(struct nvme_dev *dev) | |||
1663 | struct pci_dev *pdev = to_pci_dev(dev->dev); | 1663 | struct pci_dev *pdev = to_pci_dev(dev->dev); |
1664 | int bar; | 1664 | int bar; |
1665 | 1665 | ||
1666 | if (dev->cmb_size) | ||
1667 | return; | ||
1668 | |||
1666 | dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); | 1669 | dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); |
1667 | if (!dev->cmbsz) | 1670 | if (!dev->cmbsz) |
1668 | return; | 1671 | return; |
@@ -2147,7 +2150,6 @@ static void nvme_pci_disable(struct nvme_dev *dev) | |||
2147 | { | 2150 | { |
2148 | struct pci_dev *pdev = to_pci_dev(dev->dev); | 2151 | struct pci_dev *pdev = to_pci_dev(dev->dev); |
2149 | 2152 | ||
2150 | nvme_release_cmb(dev); | ||
2151 | pci_free_irq_vectors(pdev); | 2153 | pci_free_irq_vectors(pdev); |
2152 | 2154 | ||
2153 | if (pci_is_enabled(pdev)) { | 2155 | if (pci_is_enabled(pdev)) { |
@@ -2595,6 +2597,7 @@ static void nvme_remove(struct pci_dev *pdev) | |||
2595 | nvme_stop_ctrl(&dev->ctrl); | 2597 | nvme_stop_ctrl(&dev->ctrl); |
2596 | nvme_remove_namespaces(&dev->ctrl); | 2598 | nvme_remove_namespaces(&dev->ctrl); |
2597 | nvme_dev_disable(dev, true); | 2599 | nvme_dev_disable(dev, true); |
2600 | nvme_release_cmb(dev); | ||
2598 | nvme_free_host_mem(dev); | 2601 | nvme_free_host_mem(dev); |
2599 | nvme_dev_remove_admin(dev); | 2602 | nvme_dev_remove_admin(dev); |
2600 | nvme_free_queues(dev, 0); | 2603 | nvme_free_queues(dev, 0); |
diff --git a/fs/buffer.c b/fs/buffer.c index d60d61e8ed7d..1286c2b95498 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -3060,6 +3060,11 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, | |||
3060 | */ | 3060 | */ |
3061 | bio = bio_alloc(GFP_NOIO, 1); | 3061 | bio = bio_alloc(GFP_NOIO, 1); |
3062 | 3062 | ||
3063 | if (wbc) { | ||
3064 | wbc_init_bio(wbc, bio); | ||
3065 | wbc_account_io(wbc, bh->b_page, bh->b_size); | ||
3066 | } | ||
3067 | |||
3063 | bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 3068 | bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
3064 | bio_set_dev(bio, bh->b_bdev); | 3069 | bio_set_dev(bio, bh->b_bdev); |
3065 | bio->bi_write_hint = write_hint; | 3070 | bio->bi_write_hint = write_hint; |
@@ -3079,11 +3084,6 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh, | |||
3079 | op_flags |= REQ_PRIO; | 3084 | op_flags |= REQ_PRIO; |
3080 | bio_set_op_attrs(bio, op, op_flags); | 3085 | bio_set_op_attrs(bio, op, op_flags); |
3081 | 3086 | ||
3082 | if (wbc) { | ||
3083 | wbc_init_bio(wbc, bio); | ||
3084 | wbc_account_io(wbc, bh->b_page, bh->b_size); | ||
3085 | } | ||
3086 | |||
3087 | submit_bio(bio); | 3087 | submit_bio(bio); |
3088 | return 0; | 3088 | return 0; |
3089 | } | 3089 | } |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 2aa62d58d8dd..db7590178dfc 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -374,13 +374,13 @@ static int io_submit_init_bio(struct ext4_io_submit *io, | |||
374 | bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); | 374 | bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); |
375 | if (!bio) | 375 | if (!bio) |
376 | return -ENOMEM; | 376 | return -ENOMEM; |
377 | wbc_init_bio(io->io_wbc, bio); | ||
377 | bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 378 | bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
378 | bio_set_dev(bio, bh->b_bdev); | 379 | bio_set_dev(bio, bh->b_bdev); |
379 | bio->bi_end_io = ext4_end_bio; | 380 | bio->bi_end_io = ext4_end_bio; |
380 | bio->bi_private = ext4_get_io_end(io->io_end); | 381 | bio->bi_private = ext4_get_io_end(io->io_end); |
381 | io->io_bio = bio; | 382 | io->io_bio = bio; |
382 | io->io_next_block = bh->b_blocknr; | 383 | io->io_next_block = bh->b_blocknr; |
383 | wbc_init_bio(io->io_wbc, bio); | ||
384 | return 0; | 384 | return 0; |
385 | } | 385 | } |
386 | 386 | ||
diff --git a/include/linux/bio.h b/include/linux/bio.h index b47c7f716731..056fb627edb3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -503,31 +503,23 @@ do { \ | |||
503 | disk_devt((bio)->bi_disk) | 503 | disk_devt((bio)->bi_disk) |
504 | 504 | ||
505 | #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) | 505 | #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) |
506 | int bio_associate_blkg_from_page(struct bio *bio, struct page *page); | 506 | int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); |
507 | #else | 507 | #else |
508 | static inline int bio_associate_blkg_from_page(struct bio *bio, | 508 | static inline int bio_associate_blkcg_from_page(struct bio *bio, |
509 | struct page *page) { return 0; } | 509 | struct page *page) { return 0; } |
510 | #endif | 510 | #endif |
511 | 511 | ||
512 | #ifdef CONFIG_BLK_CGROUP | 512 | #ifdef CONFIG_BLK_CGROUP |
513 | int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); | ||
513 | int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); | 514 | int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); |
514 | int bio_associate_blkg_from_css(struct bio *bio, | ||
515 | struct cgroup_subsys_state *css); | ||
516 | int bio_associate_create_blkg(struct request_queue *q, struct bio *bio); | ||
517 | int bio_reassociate_blkg(struct request_queue *q, struct bio *bio); | ||
518 | void bio_disassociate_task(struct bio *bio); | 515 | void bio_disassociate_task(struct bio *bio); |
519 | void bio_clone_blkg_association(struct bio *dst, struct bio *src); | 516 | void bio_clone_blkcg_association(struct bio *dst, struct bio *src); |
520 | #else /* CONFIG_BLK_CGROUP */ | 517 | #else /* CONFIG_BLK_CGROUP */ |
521 | static inline int bio_associate_blkg_from_css(struct bio *bio, | 518 | static inline int bio_associate_blkcg(struct bio *bio, |
522 | struct cgroup_subsys_state *css) | 519 | struct cgroup_subsys_state *blkcg_css) { return 0; } |
523 | { return 0; } | ||
524 | static inline int bio_associate_create_blkg(struct request_queue *q, | ||
525 | struct bio *bio) { return 0; } | ||
526 | static inline int bio_reassociate_blkg(struct request_queue *q, struct bio *bio) | ||
527 | { return 0; } | ||
528 | static inline void bio_disassociate_task(struct bio *bio) { } | 520 | static inline void bio_disassociate_task(struct bio *bio) { } |
529 | static inline void bio_clone_blkg_association(struct bio *dst, | 521 | static inline void bio_clone_blkcg_association(struct bio *dst, |
530 | struct bio *src) { } | 522 | struct bio *src) { } |
531 | #endif /* CONFIG_BLK_CGROUP */ | 523 | #endif /* CONFIG_BLK_CGROUP */ |
532 | 524 | ||
533 | #ifdef CONFIG_HIGHMEM | 525 | #ifdef CONFIG_HIGHMEM |
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1e76ceebeb5d..6d766a19f2bb 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h | |||
@@ -126,7 +126,7 @@ struct blkcg_gq { | |||
126 | struct request_list rl; | 126 | struct request_list rl; |
127 | 127 | ||
128 | /* reference count */ | 128 | /* reference count */ |
129 | struct percpu_ref refcnt; | 129 | atomic_t refcnt; |
130 | 130 | ||
131 | /* is this blkg online? protected by both blkcg and q locks */ | 131 | /* is this blkg online? protected by both blkcg and q locks */ |
132 | bool online; | 132 | bool online; |
@@ -184,8 +184,6 @@ extern struct cgroup_subsys_state * const blkcg_root_css; | |||
184 | 184 | ||
185 | struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, | 185 | struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, |
186 | struct request_queue *q, bool update_hint); | 186 | struct request_queue *q, bool update_hint); |
187 | struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | ||
188 | struct request_queue *q); | ||
189 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 187 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
190 | struct request_queue *q); | 188 | struct request_queue *q); |
191 | int blkcg_init_queue(struct request_queue *q); | 189 | int blkcg_init_queue(struct request_queue *q); |
@@ -232,59 +230,22 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, | |||
232 | char *input, struct blkg_conf_ctx *ctx); | 230 | char *input, struct blkg_conf_ctx *ctx); |
233 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); | 231 | void blkg_conf_finish(struct blkg_conf_ctx *ctx); |
234 | 232 | ||
235 | /** | ||
236 | * blkcg_css - find the current css | ||
237 | * | ||
238 | * Find the css associated with either the kthread or the current task. | ||
239 | * This may return a dying css, so it is up to the caller to use tryget logic | ||
240 | * to confirm it is alive and well. | ||
241 | */ | ||
242 | static inline struct cgroup_subsys_state *blkcg_css(void) | ||
243 | { | ||
244 | struct cgroup_subsys_state *css; | ||
245 | |||
246 | css = kthread_blkcg(); | ||
247 | if (css) | ||
248 | return css; | ||
249 | return task_css(current, io_cgrp_id); | ||
250 | } | ||
251 | 233 | ||
252 | static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) | 234 | static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) |
253 | { | 235 | { |
254 | return css ? container_of(css, struct blkcg, css) : NULL; | 236 | return css ? container_of(css, struct blkcg, css) : NULL; |
255 | } | 237 | } |
256 | 238 | ||
257 | /** | ||
258 | * __bio_blkcg - internal version of bio_blkcg for bfq and cfq | ||
259 | * | ||
260 | * DO NOT USE. | ||
261 | * There is a flaw using this version of the function. In particular, this was | ||
262 | * used in a broken paradigm where association was called on the given css. It | ||
263 | * is possible though that the returned css from task_css() is in the process | ||
264 | * of dying due to migration of the current task. So it is improper to assume | ||
265 | * *_get() is going to succeed. Both BFQ and CFQ rely on this logic and will | ||
266 | * take additional work to handle more gracefully. | ||
267 | */ | ||
268 | static inline struct blkcg *__bio_blkcg(struct bio *bio) | ||
269 | { | ||
270 | if (bio && bio->bi_blkg) | ||
271 | return bio->bi_blkg->blkcg; | ||
272 | return css_to_blkcg(blkcg_css()); | ||
273 | } | ||
274 | |||
275 | /** | ||
276 | * bio_blkcg - grab the blkcg associated with a bio | ||
277 | * @bio: target bio | ||
278 | * | ||
279 | * This returns the blkcg associated with a bio, NULL if not associated. | ||
280 | * Callers are expected to either handle NULL or know association has been | ||
281 | * done prior to calling this. | ||
282 | */ | ||
283 | static inline struct blkcg *bio_blkcg(struct bio *bio) | 239 | static inline struct blkcg *bio_blkcg(struct bio *bio) |
284 | { | 240 | { |
285 | if (bio && bio->bi_blkg) | 241 | struct cgroup_subsys_state *css; |
286 | return bio->bi_blkg->blkcg; | 242 | |
287 | return NULL; | 243 | if (bio && bio->bi_css) |
244 | return css_to_blkcg(bio->bi_css); | ||
245 | css = kthread_blkcg(); | ||
246 | if (css) | ||
247 | return css_to_blkcg(css); | ||
248 | return css_to_blkcg(task_css(current, io_cgrp_id)); | ||
288 | } | 249 | } |
289 | 250 | ||
290 | static inline bool blk_cgroup_congested(void) | 251 | static inline bool blk_cgroup_congested(void) |
@@ -490,35 +451,26 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) | |||
490 | */ | 451 | */ |
491 | static inline void blkg_get(struct blkcg_gq *blkg) | 452 | static inline void blkg_get(struct blkcg_gq *blkg) |
492 | { | 453 | { |
493 | percpu_ref_get(&blkg->refcnt); | 454 | WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); |
455 | atomic_inc(&blkg->refcnt); | ||
494 | } | 456 | } |
495 | 457 | ||
496 | /** | 458 | /** |
497 | * blkg_tryget - try and get a blkg reference | 459 | * blkg_try_get - try and get a blkg reference |
498 | * @blkg: blkg to get | 460 | * @blkg: blkg to get |
499 | * | 461 | * |
500 | * This is for use when doing an RCU lookup of the blkg. We may be in the midst | 462 | * This is for use when doing an RCU lookup of the blkg. We may be in the midst |
501 | * of freeing this blkg, so we can only use it if the refcnt is not zero. | 463 | * of freeing this blkg, so we can only use it if the refcnt is not zero. |
502 | */ | 464 | */ |
503 | static inline bool blkg_tryget(struct blkcg_gq *blkg) | 465 | static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) |
504 | { | 466 | { |
505 | return percpu_ref_tryget(&blkg->refcnt); | 467 | if (atomic_inc_not_zero(&blkg->refcnt)) |
468 | return blkg; | ||
469 | return NULL; | ||
506 | } | 470 | } |
507 | 471 | ||
508 | /** | ||
509 | * blkg_tryget_closest - try and get a blkg ref on the closet blkg | ||
510 | * @blkg: blkg to get | ||
511 | * | ||
512 | * This walks up the blkg tree to find the closest non-dying blkg and returns | ||
513 | * the blkg that it did association with as it may not be the passed in blkg. | ||
514 | */ | ||
515 | static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) | ||
516 | { | ||
517 | while (!percpu_ref_tryget(&blkg->refcnt)) | ||
518 | blkg = blkg->parent; | ||
519 | 472 | ||
520 | return blkg; | 473 | void __blkg_release_rcu(struct rcu_head *rcu); |
521 | } | ||
522 | 474 | ||
523 | /** | 475 | /** |
524 | * blkg_put - put a blkg reference | 476 | * blkg_put - put a blkg reference |
@@ -526,7 +478,9 @@ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) | |||
526 | */ | 478 | */ |
527 | static inline void blkg_put(struct blkcg_gq *blkg) | 479 | static inline void blkg_put(struct blkcg_gq *blkg) |
528 | { | 480 | { |
529 | percpu_ref_put(&blkg->refcnt); | 481 | WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); |
482 | if (atomic_dec_and_test(&blkg->refcnt)) | ||
483 | call_rcu(&blkg->rcu_head, __blkg_release_rcu); | ||
530 | } | 484 | } |
531 | 485 | ||
532 | /** | 486 | /** |
@@ -579,36 +533,25 @@ static inline struct request_list *blk_get_rl(struct request_queue *q, | |||
579 | 533 | ||
580 | rcu_read_lock(); | 534 | rcu_read_lock(); |
581 | 535 | ||
582 | if (bio && bio->bi_blkg) { | 536 | blkcg = bio_blkcg(bio); |
583 | blkcg = bio->bi_blkg->blkcg; | ||
584 | if (blkcg == &blkcg_root) | ||
585 | goto rl_use_root; | ||
586 | |||
587 | blkg_get(bio->bi_blkg); | ||
588 | rcu_read_unlock(); | ||
589 | return &bio->bi_blkg->rl; | ||
590 | } | ||
591 | 537 | ||
592 | blkcg = css_to_blkcg(blkcg_css()); | 538 | /* bypass blkg lookup and use @q->root_rl directly for root */ |
593 | if (blkcg == &blkcg_root) | 539 | if (blkcg == &blkcg_root) |
594 | goto rl_use_root; | 540 | goto root_rl; |
595 | 541 | ||
542 | /* | ||
543 | * Try to use blkg->rl. blkg lookup may fail under memory pressure | ||
544 | * or if either the blkcg or queue is going away. Fall back to | ||
545 | * root_rl in such cases. | ||
546 | */ | ||
596 | blkg = blkg_lookup(blkcg, q); | 547 | blkg = blkg_lookup(blkcg, q); |
597 | if (unlikely(!blkg)) | 548 | if (unlikely(!blkg)) |
598 | blkg = __blkg_lookup_create(blkcg, q); | 549 | goto root_rl; |
599 | |||
600 | if (blkg->blkcg == &blkcg_root || !blkg_tryget(blkg)) | ||
601 | goto rl_use_root; | ||
602 | 550 | ||
551 | blkg_get(blkg); | ||
603 | rcu_read_unlock(); | 552 | rcu_read_unlock(); |
604 | return &blkg->rl; | 553 | return &blkg->rl; |
605 | 554 | root_rl: | |
606 | /* | ||
607 | * Each blkg has its own request_list, however, the root blkcg | ||
608 | * uses the request_queue's root_rl. This is to avoid most | ||
609 | * overhead for the root blkcg. | ||
610 | */ | ||
611 | rl_use_root: | ||
612 | rcu_read_unlock(); | 555 | rcu_read_unlock(); |
613 | return &q->root_rl; | 556 | return &q->root_rl; |
614 | } | 557 | } |
@@ -854,26 +797,32 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg | |||
854 | struct bio *bio) { return false; } | 797 | struct bio *bio) { return false; } |
855 | #endif | 798 | #endif |
856 | 799 | ||
857 | |||
858 | static inline void blkcg_bio_issue_init(struct bio *bio) | ||
859 | { | ||
860 | bio_issue_init(&bio->bi_issue, bio_sectors(bio)); | ||
861 | } | ||
862 | |||
863 | static inline bool blkcg_bio_issue_check(struct request_queue *q, | 800 | static inline bool blkcg_bio_issue_check(struct request_queue *q, |
864 | struct bio *bio) | 801 | struct bio *bio) |
865 | { | 802 | { |
803 | struct blkcg *blkcg; | ||
866 | struct blkcg_gq *blkg; | 804 | struct blkcg_gq *blkg; |
867 | bool throtl = false; | 805 | bool throtl = false; |
868 | 806 | ||
869 | rcu_read_lock(); | 807 | rcu_read_lock(); |
808 | blkcg = bio_blkcg(bio); | ||
809 | |||
810 | /* associate blkcg if bio hasn't attached one */ | ||
811 | bio_associate_blkcg(bio, &blkcg->css); | ||
870 | 812 | ||
871 | bio_associate_create_blkg(q, bio); | 813 | blkg = blkg_lookup(blkcg, q); |
872 | blkg = bio->bi_blkg; | 814 | if (unlikely(!blkg)) { |
815 | spin_lock_irq(q->queue_lock); | ||
816 | blkg = blkg_lookup_create(blkcg, q); | ||
817 | if (IS_ERR(blkg)) | ||
818 | blkg = NULL; | ||
819 | spin_unlock_irq(q->queue_lock); | ||
820 | } | ||
873 | 821 | ||
874 | throtl = blk_throtl_bio(q, blkg, bio); | 822 | throtl = blk_throtl_bio(q, blkg, bio); |
875 | 823 | ||
876 | if (!throtl) { | 824 | if (!throtl) { |
825 | blkg = blkg ?: q->root_blkg; | ||
877 | /* | 826 | /* |
878 | * If the bio is flagged with BIO_QUEUE_ENTERED it means this | 827 | * If the bio is flagged with BIO_QUEUE_ENTERED it means this |
879 | * is a split bio and we would have already accounted for the | 828 | * is a split bio and we would have already accounted for the |
@@ -885,8 +834,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, | |||
885 | blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); | 834 | blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); |
886 | } | 835 | } |
887 | 836 | ||
888 | blkcg_bio_issue_init(bio); | ||
889 | |||
890 | rcu_read_unlock(); | 837 | rcu_read_unlock(); |
891 | return !throtl; | 838 | return !throtl; |
892 | } | 839 | } |
@@ -983,7 +930,6 @@ static inline int blkcg_activate_policy(struct request_queue *q, | |||
983 | static inline void blkcg_deactivate_policy(struct request_queue *q, | 930 | static inline void blkcg_deactivate_policy(struct request_queue *q, |
984 | const struct blkcg_policy *pol) { } | 931 | const struct blkcg_policy *pol) { } |
985 | 932 | ||
986 | static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } | ||
987 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } | 933 | static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } |
988 | 934 | ||
989 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, | 935 | static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, |
@@ -999,7 +945,6 @@ static inline void blk_put_rl(struct request_list *rl) { } | |||
999 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } | 945 | static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } |
1000 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } | 946 | static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } |
1001 | 947 | ||
1002 | static inline void blkcg_bio_issue_init(struct bio *bio) { } | ||
1003 | static inline bool blkcg_bio_issue_check(struct request_queue *q, | 948 | static inline bool blkcg_bio_issue_check(struct request_queue *q, |
1004 | struct bio *bio) { return true; } | 949 | struct bio *bio) { return true; } |
1005 | 950 | ||
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 093a818c5b68..1dcf652ba0aa 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -178,6 +178,7 @@ struct bio { | |||
178 | * release. Read comment on top of bio_associate_current(). | 178 | * release. Read comment on top of bio_associate_current(). |
179 | */ | 179 | */ |
180 | struct io_context *bi_ioc; | 180 | struct io_context *bi_ioc; |
181 | struct cgroup_subsys_state *bi_css; | ||
181 | struct blkcg_gq *bi_blkg; | 182 | struct blkcg_gq *bi_blkg; |
182 | struct bio_issue bi_issue; | 183 | struct bio_issue bi_issue; |
183 | #endif | 184 | #endif |
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9968332cceed..9d12757a65b0 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h | |||
@@ -93,8 +93,6 @@ extern struct css_set init_css_set; | |||
93 | 93 | ||
94 | bool css_has_online_children(struct cgroup_subsys_state *css); | 94 | bool css_has_online_children(struct cgroup_subsys_state *css); |
95 | struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); | 95 | struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); |
96 | struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, | ||
97 | struct cgroup_subsys *ss); | ||
98 | struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, | 96 | struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, |
99 | struct cgroup_subsys *ss); | 97 | struct cgroup_subsys *ss); |
100 | struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, | 98 | struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, |
diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 738a0c24874f..fdfd04e348f6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h | |||
@@ -246,8 +246,7 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, | |||
246 | * | 246 | * |
247 | * @bio is a part of the writeback in progress controlled by @wbc. Perform | 247 | * @bio is a part of the writeback in progress controlled by @wbc. Perform |
248 | * writeback specific initialization. This is used to apply the cgroup | 248 | * writeback specific initialization. This is used to apply the cgroup |
249 | * writeback context. Must be called after the bio has been associated with | 249 | * writeback context. |
250 | * a device. | ||
251 | */ | 250 | */ |
252 | static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) | 251 | static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) |
253 | { | 252 | { |
@@ -258,7 +257,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) | |||
258 | * regular writeback instead of writing things out itself. | 257 | * regular writeback instead of writing things out itself. |
259 | */ | 258 | */ |
260 | if (wbc->wb) | 259 | if (wbc->wb) |
261 | bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); | 260 | bio_associate_blkcg(bio, wbc->wb->blkcg_css); |
262 | } | 261 | } |
263 | 262 | ||
264 | #else /* CONFIG_CGROUP_WRITEBACK */ | 263 | #else /* CONFIG_CGROUP_WRITEBACK */ |
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 8b79318810ad..6aaf5dd5383b 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c | |||
@@ -493,7 +493,7 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, | |||
493 | } | 493 | } |
494 | 494 | ||
495 | /** | 495 | /** |
496 | * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss | 496 | * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem |
497 | * @cgrp: the cgroup of interest | 497 | * @cgrp: the cgroup of interest |
498 | * @ss: the subsystem of interest (%NULL returns @cgrp->self) | 498 | * @ss: the subsystem of interest (%NULL returns @cgrp->self) |
499 | * | 499 | * |
@@ -502,8 +502,8 @@ static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp, | |||
502 | * enabled. If @ss is associated with the hierarchy @cgrp is on, this | 502 | * enabled. If @ss is associated with the hierarchy @cgrp is on, this |
503 | * function is guaranteed to return non-NULL css. | 503 | * function is guaranteed to return non-NULL css. |
504 | */ | 504 | */ |
505 | static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, | 505 | static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, |
506 | struct cgroup_subsys *ss) | 506 | struct cgroup_subsys *ss) |
507 | { | 507 | { |
508 | lockdep_assert_held(&cgroup_mutex); | 508 | lockdep_assert_held(&cgroup_mutex); |
509 | 509 | ||
@@ -524,35 +524,6 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp, | |||
524 | } | 524 | } |
525 | 525 | ||
526 | /** | 526 | /** |
527 | * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem | ||
528 | * @cgrp: the cgroup of interest | ||
529 | * @ss: the subsystem of interest | ||
530 | * | ||
531 | * Find and get the effective css of @cgrp for @ss. The effective css is | ||
532 | * defined as the matching css of the nearest ancestor including self which | ||
533 | * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on, | ||
534 | * the root css is returned, so this function always returns a valid css. | ||
535 | * | ||
536 | * The returned css is not guaranteed to be online, and therefore it is the | ||
537 | * callers responsiblity to tryget a reference for it. | ||
538 | */ | ||
539 | struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp, | ||
540 | struct cgroup_subsys *ss) | ||
541 | { | ||
542 | struct cgroup_subsys_state *css; | ||
543 | |||
544 | do { | ||
545 | css = cgroup_css(cgrp, ss); | ||
546 | |||
547 | if (css) | ||
548 | return css; | ||
549 | cgrp = cgroup_parent(cgrp); | ||
550 | } while (cgrp); | ||
551 | |||
552 | return init_css_set.subsys[ss->id]; | ||
553 | } | ||
554 | |||
555 | /** | ||
556 | * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem | 527 | * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem |
557 | * @cgrp: the cgroup of interest | 528 | * @cgrp: the cgroup of interest |
558 | * @ss: the subsystem of interest | 529 | * @ss: the subsystem of interest |
@@ -634,11 +605,10 @@ EXPORT_SYMBOL_GPL(of_css); | |||
634 | * | 605 | * |
635 | * Should be called under cgroup_[tree_]mutex. | 606 | * Should be called under cgroup_[tree_]mutex. |
636 | */ | 607 | */ |
637 | #define for_each_e_css(css, ssid, cgrp) \ | 608 | #define for_each_e_css(css, ssid, cgrp) \ |
638 | for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ | 609 | for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \ |
639 | if (!((css) = cgroup_e_css_by_mask(cgrp, \ | 610 | if (!((css) = cgroup_e_css(cgrp, cgroup_subsys[(ssid)]))) \ |
640 | cgroup_subsys[(ssid)]))) \ | 611 | ; \ |
641 | ; \ | ||
642 | else | 612 | else |
643 | 613 | ||
644 | /** | 614 | /** |
@@ -1037,7 +1007,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset, | |||
1037 | * @ss is in this hierarchy, so we want the | 1007 | * @ss is in this hierarchy, so we want the |
1038 | * effective css from @cgrp. | 1008 | * effective css from @cgrp. |
1039 | */ | 1009 | */ |
1040 | template[i] = cgroup_e_css_by_mask(cgrp, ss); | 1010 | template[i] = cgroup_e_css(cgrp, ss); |
1041 | } else { | 1011 | } else { |
1042 | /* | 1012 | /* |
1043 | * @ss is not in this hierarchy, so we don't want | 1013 | * @ss is not in this hierarchy, so we don't want |
@@ -3054,7 +3024,7 @@ static int cgroup_apply_control(struct cgroup *cgrp) | |||
3054 | return ret; | 3024 | return ret; |
3055 | 3025 | ||
3056 | /* | 3026 | /* |
3057 | * At this point, cgroup_e_css_by_mask() results reflect the new csses | 3027 | * At this point, cgroup_e_css() results reflect the new csses |
3058 | * making the following cgroup_update_dfl_csses() properly update | 3028 | * making the following cgroup_update_dfl_csses() properly update |
3059 | * css associations of all tasks in the subtree. | 3029 | * css associations of all tasks in the subtree. |
3060 | */ | 3030 | */ |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index fac0ddf8a8e2..2868d85f1fb1 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -764,9 +764,9 @@ blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio) | |||
764 | if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) | 764 | if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP)) |
765 | return NULL; | 765 | return NULL; |
766 | 766 | ||
767 | if (!bio->bi_blkg) | 767 | if (!bio->bi_css) |
768 | return NULL; | 768 | return NULL; |
769 | return cgroup_get_kernfs_id(bio_blkcg(bio)->css.cgroup); | 769 | return cgroup_get_kernfs_id(bio->bi_css->cgroup); |
770 | } | 770 | } |
771 | #else | 771 | #else |
772 | static union kernfs_node_id * | 772 | static union kernfs_node_id * |
diff --git a/mm/page_io.c b/mm/page_io.c index 27b835728442..d4d1c89bcddd 100644 --- a/mm/page_io.c +++ b/mm/page_io.c | |||
@@ -339,7 +339,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, | |||
339 | goto out; | 339 | goto out; |
340 | } | 340 | } |
341 | bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); | 341 | bio->bi_opf = REQ_OP_WRITE | REQ_SWAP | wbc_to_write_flags(wbc); |
342 | bio_associate_blkg_from_page(bio, page); | 342 | bio_associate_blkcg_from_page(bio, page); |
343 | count_swpout_vm_event(page); | 343 | count_swpout_vm_event(page); |
344 | set_page_writeback(page); | 344 | set_page_writeback(page); |
345 | unlock_page(page); | 345 | unlock_page(page); |