diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 16:19:59 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-12-28 16:19:59 -0500 |
commit | 0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch) | |
tree | 2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /block/bio.c | |
parent | b12a9124eeb71d766a3e3eb594ebbb3fefc66902 (diff) | |
parent | 00203ba40d40d7f33857416adfb18adaf0e40123 (diff) |
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe:
"This is the main pull request for block/storage for 4.21.
Larger than usual, it was a busy round with lots of goodies queued up.
Most notable is the removal of the old IO stack, which has been a long
time coming. No new features for a while, everything coming in this
week has all been fixes for things that were previously merged.
This contains:
- Use atomic counters instead of semaphores for mtip32xx (Arnd)
- Cleanup of the mtip32xx request setup (Christoph)
- Fix for circular locking dependency in loop (Jan, Tetsuo)
- bcache (Coly, Guoju, Shenghui)
* Optimizations for writeback caching
* Various fixes and improvements
- nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith)
* host and target support for NVMe over TCP
* Error log page support
* Support for separate read/write/poll queues
* Much improved polling
* discard OOM fallback
* Tracepoint improvements
- lightnvm (Hans, Hua, Igor, Matias, Javier)
* Igor added packed metadata to pblk. Now drives without metadata
per LBA can be used as well.
* Fix from Geert on uninitialized value on chunk metadata reads.
* Fixes from Hans and Javier to pblk recovery and write path.
* Fix from Hua Su to fix a race condition in the pblk recovery
code.
* Scan optimization added to pblk recovery from Zhoujie.
* Small geometry cleanup from me.
- Conversion of the last few drivers that used the legacy path to
blk-mq (me)
- Removal of legacy IO path in SCSI (me, Christoph)
- Removal of legacy IO stack and schedulers (me)
- Support for much better polling, now without interrupts at all.
blk-mq adds support for multiple queue maps, which enables us to
have a map per type. This in turn enables nvme to have separate
completion queues for polling, which can then be interrupt-less.
Also means we're ready for async polled IO, which is hopefully
coming in the next release.
- Killing of (now) unused block exports (Christoph)
- Unification of the blk-rq-qos and blk-wbt wait handling (Josef)
- Support for zoned testing with null_blk (Masato)
- sx8 conversion to per-host tag sets (Christoph)
- IO priority improvements (Damien)
- mq-deadline zoned fix (Damien)
- Ref count blkcg series (Dennis)
- Lots of blk-mq improvements and speedups (me)
- sbitmap scalability improvements (me)
- Make core inflight IO accounting per-cpu (Mikulas)
- Export timeout setting in sysfs (Weiping)
- Cleanup the direct issue path (Jianchao)
- Export blk-wbt internals in block debugfs for easier debugging
(Ming)
- Lots of other fixes and improvements"
* tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits)
kyber: use sbitmap add_wait_queue/list_del wait helpers
sbitmap: add helpers for add/del wait queue handling
block: save irq state in blkg_lookup_create()
dm: don't reuse bio for flushes
nvme-pci: trace SQ status on completions
nvme-rdma: implement polling queue map
nvme-fabrics: allow user to pass in nr_poll_queues
nvme-fabrics: allow nvmf_connect_io_queue to poll
nvme-core: optionally poll sync commands
block: make request_to_qc_t public
nvme-tcp: fix spelling mistake "attepmpt" -> "attempt"
nvme-tcp: fix endianess annotations
nvmet-tcp: fix endianess annotations
nvme-pci: refactor nvme_poll_irqdisable to make sparse happy
nvme-pci: only set nr_maps to 2 if poll queues are supported
nvmet: use a macro for default error location
nvmet: fix comparison of a u16 with -1
blk-mq: enable IO poll if .nr_queues of type poll > 0
blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight()
blk-mq: skip zero-queue maps in blk_mq_map_swqueue
...
Diffstat (limited to 'block/bio.c')
-rw-r--r-- | block/bio.c | 202 |
1 files changed, 125 insertions, 77 deletions
diff --git a/block/bio.c b/block/bio.c index 4d86e90654b2..8281bfcbc265 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -244,7 +244,7 @@ fallback: | |||
244 | 244 | ||
245 | void bio_uninit(struct bio *bio) | 245 | void bio_uninit(struct bio *bio) |
246 | { | 246 | { |
247 | bio_disassociate_task(bio); | 247 | bio_disassociate_blkg(bio); |
248 | } | 248 | } |
249 | EXPORT_SYMBOL(bio_uninit); | 249 | EXPORT_SYMBOL(bio_uninit); |
250 | 250 | ||
@@ -571,14 +571,13 @@ void bio_put(struct bio *bio) | |||
571 | } | 571 | } |
572 | EXPORT_SYMBOL(bio_put); | 572 | EXPORT_SYMBOL(bio_put); |
573 | 573 | ||
574 | inline int bio_phys_segments(struct request_queue *q, struct bio *bio) | 574 | int bio_phys_segments(struct request_queue *q, struct bio *bio) |
575 | { | 575 | { |
576 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | 576 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) |
577 | blk_recount_segments(q, bio); | 577 | blk_recount_segments(q, bio); |
578 | 578 | ||
579 | return bio->bi_phys_segments; | 579 | return bio->bi_phys_segments; |
580 | } | 580 | } |
581 | EXPORT_SYMBOL(bio_phys_segments); | ||
582 | 581 | ||
583 | /** | 582 | /** |
584 | * __bio_clone_fast - clone a bio that shares the original bio's biovec | 583 | * __bio_clone_fast - clone a bio that shares the original bio's biovec |
@@ -610,7 +609,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src) | |||
610 | bio->bi_iter = bio_src->bi_iter; | 609 | bio->bi_iter = bio_src->bi_iter; |
611 | bio->bi_io_vec = bio_src->bi_io_vec; | 610 | bio->bi_io_vec = bio_src->bi_io_vec; |
612 | 611 | ||
613 | bio_clone_blkcg_association(bio, bio_src); | 612 | bio_clone_blkg_association(bio, bio_src); |
613 | blkcg_bio_issue_init(bio); | ||
614 | } | 614 | } |
615 | EXPORT_SYMBOL(__bio_clone_fast); | 615 | EXPORT_SYMBOL(__bio_clone_fast); |
616 | 616 | ||
@@ -901,7 +901,6 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) | |||
901 | 901 | ||
902 | return 0; | 902 | return 0; |
903 | } | 903 | } |
904 | EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); | ||
905 | 904 | ||
906 | static void submit_bio_wait_endio(struct bio *bio) | 905 | static void submit_bio_wait_endio(struct bio *bio) |
907 | { | 906 | { |
@@ -1592,7 +1591,6 @@ void bio_set_pages_dirty(struct bio *bio) | |||
1592 | set_page_dirty_lock(bvec->bv_page); | 1591 | set_page_dirty_lock(bvec->bv_page); |
1593 | } | 1592 | } |
1594 | } | 1593 | } |
1595 | EXPORT_SYMBOL_GPL(bio_set_pages_dirty); | ||
1596 | 1594 | ||
1597 | static void bio_release_pages(struct bio *bio) | 1595 | static void bio_release_pages(struct bio *bio) |
1598 | { | 1596 | { |
@@ -1662,17 +1660,33 @@ defer: | |||
1662 | spin_unlock_irqrestore(&bio_dirty_lock, flags); | 1660 | spin_unlock_irqrestore(&bio_dirty_lock, flags); |
1663 | schedule_work(&bio_dirty_work); | 1661 | schedule_work(&bio_dirty_work); |
1664 | } | 1662 | } |
1665 | EXPORT_SYMBOL_GPL(bio_check_pages_dirty); | 1663 | |
1664 | void update_io_ticks(struct hd_struct *part, unsigned long now) | ||
1665 | { | ||
1666 | unsigned long stamp; | ||
1667 | again: | ||
1668 | stamp = READ_ONCE(part->stamp); | ||
1669 | if (unlikely(stamp != now)) { | ||
1670 | if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) { | ||
1671 | __part_stat_add(part, io_ticks, 1); | ||
1672 | } | ||
1673 | } | ||
1674 | if (part->partno) { | ||
1675 | part = &part_to_disk(part)->part0; | ||
1676 | goto again; | ||
1677 | } | ||
1678 | } | ||
1666 | 1679 | ||
1667 | void generic_start_io_acct(struct request_queue *q, int op, | 1680 | void generic_start_io_acct(struct request_queue *q, int op, |
1668 | unsigned long sectors, struct hd_struct *part) | 1681 | unsigned long sectors, struct hd_struct *part) |
1669 | { | 1682 | { |
1670 | const int sgrp = op_stat_group(op); | 1683 | const int sgrp = op_stat_group(op); |
1671 | int cpu = part_stat_lock(); | ||
1672 | 1684 | ||
1673 | part_round_stats(q, cpu, part); | 1685 | part_stat_lock(); |
1674 | part_stat_inc(cpu, part, ios[sgrp]); | 1686 | |
1675 | part_stat_add(cpu, part, sectors[sgrp], sectors); | 1687 | update_io_ticks(part, jiffies); |
1688 | part_stat_inc(part, ios[sgrp]); | ||
1689 | part_stat_add(part, sectors[sgrp], sectors); | ||
1676 | part_inc_in_flight(q, part, op_is_write(op)); | 1690 | part_inc_in_flight(q, part, op_is_write(op)); |
1677 | 1691 | ||
1678 | part_stat_unlock(); | 1692 | part_stat_unlock(); |
@@ -1682,12 +1696,15 @@ EXPORT_SYMBOL(generic_start_io_acct); | |||
1682 | void generic_end_io_acct(struct request_queue *q, int req_op, | 1696 | void generic_end_io_acct(struct request_queue *q, int req_op, |
1683 | struct hd_struct *part, unsigned long start_time) | 1697 | struct hd_struct *part, unsigned long start_time) |
1684 | { | 1698 | { |
1685 | unsigned long duration = jiffies - start_time; | 1699 | unsigned long now = jiffies; |
1700 | unsigned long duration = now - start_time; | ||
1686 | const int sgrp = op_stat_group(req_op); | 1701 | const int sgrp = op_stat_group(req_op); |
1687 | int cpu = part_stat_lock(); | ||
1688 | 1702 | ||
1689 | part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration)); | 1703 | part_stat_lock(); |
1690 | part_round_stats(q, cpu, part); | 1704 | |
1705 | update_io_ticks(part, now); | ||
1706 | part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); | ||
1707 | part_stat_add(part, time_in_queue, duration); | ||
1691 | part_dec_in_flight(q, part, op_is_write(req_op)); | 1708 | part_dec_in_flight(q, part, op_is_write(req_op)); |
1692 | 1709 | ||
1693 | part_stat_unlock(); | 1710 | part_stat_unlock(); |
@@ -1957,102 +1974,133 @@ EXPORT_SYMBOL(bioset_init_from_src); | |||
1957 | 1974 | ||
1958 | #ifdef CONFIG_BLK_CGROUP | 1975 | #ifdef CONFIG_BLK_CGROUP |
1959 | 1976 | ||
1960 | #ifdef CONFIG_MEMCG | ||
1961 | /** | 1977 | /** |
1962 | * bio_associate_blkcg_from_page - associate a bio with the page's blkcg | 1978 | * bio_disassociate_blkg - puts back the blkg reference if associated |
1963 | * @bio: target bio | 1979 | * @bio: target bio |
1964 | * @page: the page to lookup the blkcg from | ||
1965 | * | 1980 | * |
1966 | * Associate @bio with the blkcg from @page's owning memcg. This works like | 1981 | * Helper to disassociate the blkg from @bio if a blkg is associated. |
1967 | * every other associate function wrt references. | ||
1968 | */ | 1982 | */ |
1969 | int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) | 1983 | void bio_disassociate_blkg(struct bio *bio) |
1970 | { | 1984 | { |
1971 | struct cgroup_subsys_state *blkcg_css; | 1985 | if (bio->bi_blkg) { |
1972 | 1986 | blkg_put(bio->bi_blkg); | |
1973 | if (unlikely(bio->bi_css)) | 1987 | bio->bi_blkg = NULL; |
1974 | return -EBUSY; | 1988 | } |
1975 | if (!page->mem_cgroup) | ||
1976 | return 0; | ||
1977 | blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, | ||
1978 | &io_cgrp_subsys); | ||
1979 | bio->bi_css = blkcg_css; | ||
1980 | return 0; | ||
1981 | } | 1989 | } |
1982 | #endif /* CONFIG_MEMCG */ | 1990 | EXPORT_SYMBOL_GPL(bio_disassociate_blkg); |
1983 | 1991 | ||
1984 | /** | 1992 | /** |
1985 | * bio_associate_blkcg - associate a bio with the specified blkcg | 1993 | * __bio_associate_blkg - associate a bio with the a blkg |
1986 | * @bio: target bio | 1994 | * @bio: target bio |
1987 | * @blkcg_css: css of the blkcg to associate | 1995 | * @blkg: the blkg to associate |
1988 | * | 1996 | * |
1989 | * Associate @bio with the blkcg specified by @blkcg_css. Block layer will | 1997 | * This tries to associate @bio with the specified @blkg. Association failure |
1990 | * treat @bio as if it were issued by a task which belongs to the blkcg. | 1998 | * is handled by walking up the blkg tree. Therefore, the blkg associated can |
1999 | * be anything between @blkg and the root_blkg. This situation only happens | ||
2000 | * when a cgroup is dying and then the remaining bios will spill to the closest | ||
2001 | * alive blkg. | ||
1991 | * | 2002 | * |
1992 | * This function takes an extra reference of @blkcg_css which will be put | 2003 | * A reference will be taken on the @blkg and will be released when @bio is |
1993 | * when @bio is released. The caller must own @bio and is responsible for | 2004 | * freed. |
1994 | * synchronizing calls to this function. | ||
1995 | */ | 2005 | */ |
1996 | int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) | 2006 | static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) |
1997 | { | 2007 | { |
1998 | if (unlikely(bio->bi_css)) | 2008 | bio_disassociate_blkg(bio); |
1999 | return -EBUSY; | 2009 | |
2000 | css_get(blkcg_css); | 2010 | bio->bi_blkg = blkg_tryget_closest(blkg); |
2001 | bio->bi_css = blkcg_css; | ||
2002 | return 0; | ||
2003 | } | 2011 | } |
2004 | EXPORT_SYMBOL_GPL(bio_associate_blkcg); | ||
2005 | 2012 | ||
2006 | /** | 2013 | /** |
2007 | * bio_associate_blkg - associate a bio with the specified blkg | 2014 | * bio_associate_blkg_from_css - associate a bio with a specified css |
2008 | * @bio: target bio | 2015 | * @bio: target bio |
2009 | * @blkg: the blkg to associate | 2016 | * @css: target css |
2010 | * | 2017 | * |
2011 | * Associate @bio with the blkg specified by @blkg. This is the queue specific | 2018 | * Associate @bio with the blkg found by combining the css's blkg and the |
2012 | * blkcg information associated with the @bio, a reference will be taken on the | 2019 | * request_queue of the @bio. This falls back to the queue's root_blkg if |
2013 | * @blkg and will be freed when the bio is freed. | 2020 | * the association fails with the css. |
2014 | */ | 2021 | */ |
2015 | int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) | 2022 | void bio_associate_blkg_from_css(struct bio *bio, |
2023 | struct cgroup_subsys_state *css) | ||
2016 | { | 2024 | { |
2017 | if (unlikely(bio->bi_blkg)) | 2025 | struct request_queue *q = bio->bi_disk->queue; |
2018 | return -EBUSY; | 2026 | struct blkcg_gq *blkg; |
2019 | if (!blkg_try_get(blkg)) | 2027 | |
2020 | return -ENODEV; | 2028 | rcu_read_lock(); |
2021 | bio->bi_blkg = blkg; | 2029 | |
2022 | return 0; | 2030 | if (!css || !css->parent) |
2031 | blkg = q->root_blkg; | ||
2032 | else | ||
2033 | blkg = blkg_lookup_create(css_to_blkcg(css), q); | ||
2034 | |||
2035 | __bio_associate_blkg(bio, blkg); | ||
2036 | |||
2037 | rcu_read_unlock(); | ||
2023 | } | 2038 | } |
2039 | EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css); | ||
2024 | 2040 | ||
2041 | #ifdef CONFIG_MEMCG | ||
2025 | /** | 2042 | /** |
2026 | * bio_disassociate_task - undo bio_associate_current() | 2043 | * bio_associate_blkg_from_page - associate a bio with the page's blkg |
2027 | * @bio: target bio | 2044 | * @bio: target bio |
2045 | * @page: the page to lookup the blkcg from | ||
2046 | * | ||
2047 | * Associate @bio with the blkg from @page's owning memcg and the respective | ||
2048 | * request_queue. If cgroup_e_css returns %NULL, fall back to the queue's | ||
2049 | * root_blkg. | ||
2028 | */ | 2050 | */ |
2029 | void bio_disassociate_task(struct bio *bio) | 2051 | void bio_associate_blkg_from_page(struct bio *bio, struct page *page) |
2030 | { | 2052 | { |
2031 | if (bio->bi_ioc) { | 2053 | struct cgroup_subsys_state *css; |
2032 | put_io_context(bio->bi_ioc); | 2054 | |
2033 | bio->bi_ioc = NULL; | 2055 | if (!page->mem_cgroup) |
2034 | } | 2056 | return; |
2035 | if (bio->bi_css) { | 2057 | |
2036 | css_put(bio->bi_css); | 2058 | rcu_read_lock(); |
2037 | bio->bi_css = NULL; | 2059 | |
2038 | } | 2060 | css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys); |
2039 | if (bio->bi_blkg) { | 2061 | bio_associate_blkg_from_css(bio, css); |
2040 | blkg_put(bio->bi_blkg); | 2062 | |
2041 | bio->bi_blkg = NULL; | 2063 | rcu_read_unlock(); |
2042 | } | 2064 | } |
2065 | #endif /* CONFIG_MEMCG */ | ||
2066 | |||
2067 | /** | ||
2068 | * bio_associate_blkg - associate a bio with a blkg | ||
2069 | * @bio: target bio | ||
2070 | * | ||
2071 | * Associate @bio with the blkg found from the bio's css and request_queue. | ||
2072 | * If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is | ||
2073 | * already associated, the css is reused and association redone as the | ||
2074 | * request_queue may have changed. | ||
2075 | */ | ||
2076 | void bio_associate_blkg(struct bio *bio) | ||
2077 | { | ||
2078 | struct cgroup_subsys_state *css; | ||
2079 | |||
2080 | rcu_read_lock(); | ||
2081 | |||
2082 | if (bio->bi_blkg) | ||
2083 | css = &bio_blkcg(bio)->css; | ||
2084 | else | ||
2085 | css = blkcg_css(); | ||
2086 | |||
2087 | bio_associate_blkg_from_css(bio, css); | ||
2088 | |||
2089 | rcu_read_unlock(); | ||
2043 | } | 2090 | } |
2091 | EXPORT_SYMBOL_GPL(bio_associate_blkg); | ||
2044 | 2092 | ||
2045 | /** | 2093 | /** |
2046 | * bio_clone_blkcg_association - clone blkcg association from src to dst bio | 2094 | * bio_clone_blkg_association - clone blkg association from src to dst bio |
2047 | * @dst: destination bio | 2095 | * @dst: destination bio |
2048 | * @src: source bio | 2096 | * @src: source bio |
2049 | */ | 2097 | */ |
2050 | void bio_clone_blkcg_association(struct bio *dst, struct bio *src) | 2098 | void bio_clone_blkg_association(struct bio *dst, struct bio *src) |
2051 | { | 2099 | { |
2052 | if (src->bi_css) | 2100 | if (src->bi_blkg) |
2053 | WARN_ON(bio_associate_blkcg(dst, src->bi_css)); | 2101 | __bio_associate_blkg(dst, src->bi_blkg); |
2054 | } | 2102 | } |
2055 | EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); | 2103 | EXPORT_SYMBOL_GPL(bio_clone_blkg_association); |
2056 | #endif /* CONFIG_BLK_CGROUP */ | 2104 | #endif /* CONFIG_BLK_CGROUP */ |
2057 | 2105 | ||
2058 | static void __init biovec_init_slabs(void) | 2106 | static void __init biovec_init_slabs(void) |