summaryrefslogtreecommitdiffstats
path: root/block/bio.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 16:19:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-28 16:19:59 -0500
commit0e9da3fbf7d81f0f913b491c8de1ba7883d4f217 (patch)
tree2b3d25e3be60bf4ee40b4690c7bb9d6fa499ae69 /block/bio.c
parentb12a9124eeb71d766a3e3eb594ebbb3fefc66902 (diff)
parent00203ba40d40d7f33857416adfb18adaf0e40123 (diff)
Merge tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block
Pull block updates from Jens Axboe: "This is the main pull request for block/storage for 4.21. Larger than usual, it was a busy round with lots of goodies queued up. Most notable is the removal of the old IO stack, which has been a long time coming. No new features for a while, everything coming in this week has all been fixes for things that were previously merged. This contains: - Use atomic counters instead of semaphores for mtip32xx (Arnd) - Cleanup of the mtip32xx request setup (Christoph) - Fix for circular locking dependency in loop (Jan, Tetsuo) - bcache (Coly, Guoju, Shenghui) * Optimizations for writeback caching * Various fixes and improvements - nvme (Chaitanya, Christoph, Sagi, Jay, me, Keith) * host and target support for NVMe over TCP * Error log page support * Support for separate read/write/poll queues * Much improved polling * discard OOM fallback * Tracepoint improvements - lightnvm (Hans, Hua, Igor, Matias, Javier) * Igor added packed metadata to pblk. Now drives without metadata per LBA can be used as well. * Fix from Geert on uninitialized value on chunk metadata reads. * Fixes from Hans and Javier to pblk recovery and write path. * Fix from Hua Su to fix a race condition in the pblk recovery code. * Scan optimization added to pblk recovery from Zhoujie. * Small geometry cleanup from me. - Conversion of the last few drivers that used the legacy path to blk-mq (me) - Removal of legacy IO path in SCSI (me, Christoph) - Removal of legacy IO stack and schedulers (me) - Support for much better polling, now without interrupts at all. blk-mq adds support for multiple queue maps, which enables us to have a map per type. This in turn enables nvme to have separate completion queues for polling, which can then be interrupt-less. Also means we're ready for async polled IO, which is hopefully coming in the next release. - Killing of (now) unused block exports (Christoph) - Unification of the blk-rq-qos and blk-wbt wait handling (Josef) - Support for zoned testing with null_blk (Masato) - sx8 conversion to per-host tag sets (Christoph) - IO priority improvements (Damien) - mq-deadline zoned fix (Damien) - Ref count blkcg series (Dennis) - Lots of blk-mq improvements and speedups (me) - sbitmap scalability improvements (me) - Make core inflight IO accounting per-cpu (Mikulas) - Export timeout setting in sysfs (Weiping) - Cleanup the direct issue path (Jianchao) - Export blk-wbt internals in block debugfs for easier debugging (Ming) - Lots of other fixes and improvements" * tag 'for-4.21/block-20181221' of git://git.kernel.dk/linux-block: (364 commits) kyber: use sbitmap add_wait_queue/list_del wait helpers sbitmap: add helpers for add/del wait queue handling block: save irq state in blkg_lookup_create() dm: don't reuse bio for flushes nvme-pci: trace SQ status on completions nvme-rdma: implement polling queue map nvme-fabrics: allow user to pass in nr_poll_queues nvme-fabrics: allow nvmf_connect_io_queue to poll nvme-core: optionally poll sync commands block: make request_to_qc_t public nvme-tcp: fix spelling mistake "attepmpt" -> "attempt" nvme-tcp: fix endianess annotations nvmet-tcp: fix endianess annotations nvme-pci: refactor nvme_poll_irqdisable to make sparse happy nvme-pci: only set nr_maps to 2 if poll queues are supported nvmet: use a macro for default error location nvmet: fix comparison of a u16 with -1 blk-mq: enable IO poll if .nr_queues of type poll > 0 blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight() blk-mq: skip zero-queue maps in blk_mq_map_swqueue ...
Diffstat (limited to 'block/bio.c')
-rw-r--r--block/bio.c202
1 files changed, 125 insertions, 77 deletions
diff --git a/block/bio.c b/block/bio.c
index 4d86e90654b2..8281bfcbc265 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -244,7 +244,7 @@ fallback:
244 244
245void bio_uninit(struct bio *bio) 245void bio_uninit(struct bio *bio)
246{ 246{
247 bio_disassociate_task(bio); 247 bio_disassociate_blkg(bio);
248} 248}
249EXPORT_SYMBOL(bio_uninit); 249EXPORT_SYMBOL(bio_uninit);
250 250
@@ -571,14 +571,13 @@ void bio_put(struct bio *bio)
571} 571}
572EXPORT_SYMBOL(bio_put); 572EXPORT_SYMBOL(bio_put);
573 573
574inline int bio_phys_segments(struct request_queue *q, struct bio *bio) 574int bio_phys_segments(struct request_queue *q, struct bio *bio)
575{ 575{
576 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) 576 if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
577 blk_recount_segments(q, bio); 577 blk_recount_segments(q, bio);
578 578
579 return bio->bi_phys_segments; 579 return bio->bi_phys_segments;
580} 580}
581EXPORT_SYMBOL(bio_phys_segments);
582 581
583/** 582/**
584 * __bio_clone_fast - clone a bio that shares the original bio's biovec 583 * __bio_clone_fast - clone a bio that shares the original bio's biovec
@@ -610,7 +609,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
610 bio->bi_iter = bio_src->bi_iter; 609 bio->bi_iter = bio_src->bi_iter;
611 bio->bi_io_vec = bio_src->bi_io_vec; 610 bio->bi_io_vec = bio_src->bi_io_vec;
612 611
613 bio_clone_blkcg_association(bio, bio_src); 612 bio_clone_blkg_association(bio, bio_src);
613 blkcg_bio_issue_init(bio);
614} 614}
615EXPORT_SYMBOL(__bio_clone_fast); 615EXPORT_SYMBOL(__bio_clone_fast);
616 616
@@ -901,7 +901,6 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
901 901
902 return 0; 902 return 0;
903} 903}
904EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages);
905 904
906static void submit_bio_wait_endio(struct bio *bio) 905static void submit_bio_wait_endio(struct bio *bio)
907{ 906{
@@ -1592,7 +1591,6 @@ void bio_set_pages_dirty(struct bio *bio)
1592 set_page_dirty_lock(bvec->bv_page); 1591 set_page_dirty_lock(bvec->bv_page);
1593 } 1592 }
1594} 1593}
1595EXPORT_SYMBOL_GPL(bio_set_pages_dirty);
1596 1594
1597static void bio_release_pages(struct bio *bio) 1595static void bio_release_pages(struct bio *bio)
1598{ 1596{
@@ -1662,17 +1660,33 @@ defer:
1662 spin_unlock_irqrestore(&bio_dirty_lock, flags); 1660 spin_unlock_irqrestore(&bio_dirty_lock, flags);
1663 schedule_work(&bio_dirty_work); 1661 schedule_work(&bio_dirty_work);
1664} 1662}
1665EXPORT_SYMBOL_GPL(bio_check_pages_dirty); 1663
1664void update_io_ticks(struct hd_struct *part, unsigned long now)
1665{
1666 unsigned long stamp;
1667again:
1668 stamp = READ_ONCE(part->stamp);
1669 if (unlikely(stamp != now)) {
1670 if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
1671 __part_stat_add(part, io_ticks, 1);
1672 }
1673 }
1674 if (part->partno) {
1675 part = &part_to_disk(part)->part0;
1676 goto again;
1677 }
1678}
1666 1679
1667void generic_start_io_acct(struct request_queue *q, int op, 1680void generic_start_io_acct(struct request_queue *q, int op,
1668 unsigned long sectors, struct hd_struct *part) 1681 unsigned long sectors, struct hd_struct *part)
1669{ 1682{
1670 const int sgrp = op_stat_group(op); 1683 const int sgrp = op_stat_group(op);
1671 int cpu = part_stat_lock();
1672 1684
1673 part_round_stats(q, cpu, part); 1685 part_stat_lock();
1674 part_stat_inc(cpu, part, ios[sgrp]); 1686
1675 part_stat_add(cpu, part, sectors[sgrp], sectors); 1687 update_io_ticks(part, jiffies);
1688 part_stat_inc(part, ios[sgrp]);
1689 part_stat_add(part, sectors[sgrp], sectors);
1676 part_inc_in_flight(q, part, op_is_write(op)); 1690 part_inc_in_flight(q, part, op_is_write(op));
1677 1691
1678 part_stat_unlock(); 1692 part_stat_unlock();
@@ -1682,12 +1696,15 @@ EXPORT_SYMBOL(generic_start_io_acct);
1682void generic_end_io_acct(struct request_queue *q, int req_op, 1696void generic_end_io_acct(struct request_queue *q, int req_op,
1683 struct hd_struct *part, unsigned long start_time) 1697 struct hd_struct *part, unsigned long start_time)
1684{ 1698{
1685 unsigned long duration = jiffies - start_time; 1699 unsigned long now = jiffies;
1700 unsigned long duration = now - start_time;
1686 const int sgrp = op_stat_group(req_op); 1701 const int sgrp = op_stat_group(req_op);
1687 int cpu = part_stat_lock();
1688 1702
1689 part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration)); 1703 part_stat_lock();
1690 part_round_stats(q, cpu, part); 1704
1705 update_io_ticks(part, now);
1706 part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
1707 part_stat_add(part, time_in_queue, duration);
1691 part_dec_in_flight(q, part, op_is_write(req_op)); 1708 part_dec_in_flight(q, part, op_is_write(req_op));
1692 1709
1693 part_stat_unlock(); 1710 part_stat_unlock();
@@ -1957,102 +1974,133 @@ EXPORT_SYMBOL(bioset_init_from_src);
1957 1974
1958#ifdef CONFIG_BLK_CGROUP 1975#ifdef CONFIG_BLK_CGROUP
1959 1976
1960#ifdef CONFIG_MEMCG
1961/** 1977/**
1962 * bio_associate_blkcg_from_page - associate a bio with the page's blkcg 1978 * bio_disassociate_blkg - puts back the blkg reference if associated
1963 * @bio: target bio 1979 * @bio: target bio
1964 * @page: the page to lookup the blkcg from
1965 * 1980 *
1966 * Associate @bio with the blkcg from @page's owning memcg. This works like 1981 * Helper to disassociate the blkg from @bio if a blkg is associated.
1967 * every other associate function wrt references.
1968 */ 1982 */
1969int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) 1983void bio_disassociate_blkg(struct bio *bio)
1970{ 1984{
1971 struct cgroup_subsys_state *blkcg_css; 1985 if (bio->bi_blkg) {
1972 1986 blkg_put(bio->bi_blkg);
1973 if (unlikely(bio->bi_css)) 1987 bio->bi_blkg = NULL;
1974 return -EBUSY; 1988 }
1975 if (!page->mem_cgroup)
1976 return 0;
1977 blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup,
1978 &io_cgrp_subsys);
1979 bio->bi_css = blkcg_css;
1980 return 0;
1981} 1989}
1982#endif /* CONFIG_MEMCG */ 1990EXPORT_SYMBOL_GPL(bio_disassociate_blkg);
1983 1991
1984/** 1992/**
1985 * bio_associate_blkcg - associate a bio with the specified blkcg 1993 * __bio_associate_blkg - associate a bio with the a blkg
1986 * @bio: target bio 1994 * @bio: target bio
1987 * @blkcg_css: css of the blkcg to associate 1995 * @blkg: the blkg to associate
1988 * 1996 *
1989 * Associate @bio with the blkcg specified by @blkcg_css. Block layer will 1997 * This tries to associate @bio with the specified @blkg. Association failure
1990 * treat @bio as if it were issued by a task which belongs to the blkcg. 1998 * is handled by walking up the blkg tree. Therefore, the blkg associated can
1999 * be anything between @blkg and the root_blkg. This situation only happens
2000 * when a cgroup is dying and then the remaining bios will spill to the closest
2001 * alive blkg.
1991 * 2002 *
1992 * This function takes an extra reference of @blkcg_css which will be put 2003 * A reference will be taken on the @blkg and will be released when @bio is
1993 * when @bio is released. The caller must own @bio and is responsible for 2004 * freed.
1994 * synchronizing calls to this function.
1995 */ 2005 */
1996int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) 2006static void __bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg)
1997{ 2007{
1998 if (unlikely(bio->bi_css)) 2008 bio_disassociate_blkg(bio);
1999 return -EBUSY; 2009
2000 css_get(blkcg_css); 2010 bio->bi_blkg = blkg_tryget_closest(blkg);
2001 bio->bi_css = blkcg_css;
2002 return 0;
2003} 2011}
2004EXPORT_SYMBOL_GPL(bio_associate_blkcg);
2005 2012
2006/** 2013/**
2007 * bio_associate_blkg - associate a bio with the specified blkg 2014 * bio_associate_blkg_from_css - associate a bio with a specified css
2008 * @bio: target bio 2015 * @bio: target bio
2009 * @blkg: the blkg to associate 2016 * @css: target css
2010 * 2017 *
2011 * Associate @bio with the blkg specified by @blkg. This is the queue specific 2018 * Associate @bio with the blkg found by combining the css's blkg and the
2012 * blkcg information associated with the @bio, a reference will be taken on the 2019 * request_queue of the @bio. This falls back to the queue's root_blkg if
2013 * @blkg and will be freed when the bio is freed. 2020 * the association fails with the css.
2014 */ 2021 */
2015int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) 2022void bio_associate_blkg_from_css(struct bio *bio,
2023 struct cgroup_subsys_state *css)
2016{ 2024{
2017 if (unlikely(bio->bi_blkg)) 2025 struct request_queue *q = bio->bi_disk->queue;
2018 return -EBUSY; 2026 struct blkcg_gq *blkg;
2019 if (!blkg_try_get(blkg)) 2027
2020 return -ENODEV; 2028 rcu_read_lock();
2021 bio->bi_blkg = blkg; 2029
2022 return 0; 2030 if (!css || !css->parent)
2031 blkg = q->root_blkg;
2032 else
2033 blkg = blkg_lookup_create(css_to_blkcg(css), q);
2034
2035 __bio_associate_blkg(bio, blkg);
2036
2037 rcu_read_unlock();
2023} 2038}
2039EXPORT_SYMBOL_GPL(bio_associate_blkg_from_css);
2024 2040
2041#ifdef CONFIG_MEMCG
2025/** 2042/**
2026 * bio_disassociate_task - undo bio_associate_current() 2043 * bio_associate_blkg_from_page - associate a bio with the page's blkg
2027 * @bio: target bio 2044 * @bio: target bio
2045 * @page: the page to lookup the blkcg from
2046 *
2047 * Associate @bio with the blkg from @page's owning memcg and the respective
2048 * request_queue. If cgroup_e_css returns %NULL, fall back to the queue's
2049 * root_blkg.
2028 */ 2050 */
2029void bio_disassociate_task(struct bio *bio) 2051void bio_associate_blkg_from_page(struct bio *bio, struct page *page)
2030{ 2052{
2031 if (bio->bi_ioc) { 2053 struct cgroup_subsys_state *css;
2032 put_io_context(bio->bi_ioc); 2054
2033 bio->bi_ioc = NULL; 2055 if (!page->mem_cgroup)
2034 } 2056 return;
2035 if (bio->bi_css) { 2057
2036 css_put(bio->bi_css); 2058 rcu_read_lock();
2037 bio->bi_css = NULL; 2059
2038 } 2060 css = cgroup_e_css(page->mem_cgroup->css.cgroup, &io_cgrp_subsys);
2039 if (bio->bi_blkg) { 2061 bio_associate_blkg_from_css(bio, css);
2040 blkg_put(bio->bi_blkg); 2062
2041 bio->bi_blkg = NULL; 2063 rcu_read_unlock();
2042 } 2064}
2065#endif /* CONFIG_MEMCG */
2066
2067/**
2068 * bio_associate_blkg - associate a bio with a blkg
2069 * @bio: target bio
2070 *
2071 * Associate @bio with the blkg found from the bio's css and request_queue.
2072 * If one is not found, bio_lookup_blkg() creates the blkg. If a blkg is
2073 * already associated, the css is reused and association redone as the
2074 * request_queue may have changed.
2075 */
2076void bio_associate_blkg(struct bio *bio)
2077{
2078 struct cgroup_subsys_state *css;
2079
2080 rcu_read_lock();
2081
2082 if (bio->bi_blkg)
2083 css = &bio_blkcg(bio)->css;
2084 else
2085 css = blkcg_css();
2086
2087 bio_associate_blkg_from_css(bio, css);
2088
2089 rcu_read_unlock();
2043} 2090}
2091EXPORT_SYMBOL_GPL(bio_associate_blkg);
2044 2092
2045/** 2093/**
2046 * bio_clone_blkcg_association - clone blkcg association from src to dst bio 2094 * bio_clone_blkg_association - clone blkg association from src to dst bio
2047 * @dst: destination bio 2095 * @dst: destination bio
2048 * @src: source bio 2096 * @src: source bio
2049 */ 2097 */
2050void bio_clone_blkcg_association(struct bio *dst, struct bio *src) 2098void bio_clone_blkg_association(struct bio *dst, struct bio *src)
2051{ 2099{
2052 if (src->bi_css) 2100 if (src->bi_blkg)
2053 WARN_ON(bio_associate_blkcg(dst, src->bi_css)); 2101 __bio_associate_blkg(dst, src->bi_blkg);
2054} 2102}
2055EXPORT_SYMBOL_GPL(bio_clone_blkcg_association); 2103EXPORT_SYMBOL_GPL(bio_clone_blkg_association);
2056#endif /* CONFIG_BLK_CGROUP */ 2104#endif /* CONFIG_BLK_CGROUP */
2057 2105
2058static void __init biovec_init_slabs(void) 2106static void __init biovec_init_slabs(void)