aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 16:10:25 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-02 16:10:25 -0400
commit1081230b748de8f03f37f80c53dfa89feda9b8de (patch)
tree7238d60e01f0843bad8f03b5d84e4220fbba5e76 /drivers/md/dm.c
parentdf910390e2db07a76c87f258475f6c96253cee6c (diff)
parent2ca495ac27d245513c11fed70591b1838250e240 (diff)
Merge branch 'for-4.3/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: "This first core part of the block IO changes contains: - Cleanup of the bio IO error signaling from Christoph. We used to rely on the uptodate bit and passing around of an error, now we store the error in the bio itself. - Improvement of the above from myself, by shrinking the bio size down again to fit in two cachelines on x86-64. - Revert of the max_hw_sectors cap removal from a revision again, from Jeff Moyer. This caused performance regressions in various tests. Reinstate the limit, bump it to a more reasonable size instead. - Make /sys/block/<dev>/queue/discard_max_bytes writeable, by me. Most devices have huge trim limits, which can cause nasty latencies when deleting files. Enable the admin to configure the size down. We will look into having a more sane default instead of UINT_MAX sectors. - Improvement of the SGP gaps logic from Keith Busch. - Enable the block core to handle arbitrarily sized bios, which enables a nice simplification of bio_add_page() (which is an IO hot path). From Kent. - Improvements to the partition io stats accounting, making it faster. From Ming Lei. - Also from Ming Lei, a basic fixup for overflow of the sysfs pending file in blk-mq, as well as a fix for a blk-mq timeout race condition. - Ming Lin has been carrying Kents above mentioned patches forward for a while, and testing them. Ming also did a few fixes around that. - Sasha Levin found and fixed a use-after-free problem introduced by the bio->bi_error changes from Christoph. - Small blk cgroup cleanup from Viresh Kumar" * 'for-4.3/core' of git://git.kernel.dk/linux-block: (26 commits) blk: Fix bio_io_vec index when checking bvec gaps block: Replace SG_GAPS with new queue limits mask block: bump BLK_DEF_MAX_SECTORS to 2560 Revert "block: remove artifical max_hw_sectors cap" blk-mq: fix race between timeout and freeing request blk-mq: fix buffer overflow when reading sysfs file of 'pending' Documentation: update notes in biovecs about arbitrarily sized bios block: remove bio_get_nr_vecs() fs: use helper bio_add_page() instead of open coding on bi_io_vec block: kill merge_bvec_fn() completely md/raid5: get rid of bio_fits_rdev() md/raid5: split bio for chunk_aligned_read block: remove split code in blkdev_issue_{discard,write_same} btrfs: remove bio splitting and merge_bvec_fn() calls bcache: remove driver private bio splitting code block: simplify bio_add_page() block: make generic_make_request handle arbitrarily sized bios blk-cgroup: Drop unlikely before IS_ERR(_OR_NULL) block: don't access bio->bi_error after bio_put() block: shrink struct bio down to 2 cache lines again ...
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c137
1 files changed, 11 insertions, 126 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 0d7ab20c58df..6ffc01bb85f2 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -124,9 +124,8 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
124#define DMF_FREEING 3 124#define DMF_FREEING 3
125#define DMF_DELETING 4 125#define DMF_DELETING 4
126#define DMF_NOFLUSH_SUSPENDING 5 126#define DMF_NOFLUSH_SUSPENDING 5
127#define DMF_MERGE_IS_OPTIONAL 6 127#define DMF_DEFERRED_REMOVE 6
128#define DMF_DEFERRED_REMOVE 7 128#define DMF_SUSPENDED_INTERNALLY 7
129#define DMF_SUSPENDED_INTERNALLY 8
130 129
131/* 130/*
132 * A dummy definition to make RCU happy. 131 * A dummy definition to make RCU happy.
@@ -944,7 +943,8 @@ static void dec_pending(struct dm_io *io, int error)
944 } else { 943 } else {
945 /* done with normal IO or empty flush */ 944 /* done with normal IO or empty flush */
946 trace_block_bio_complete(md->queue, bio, io_error); 945 trace_block_bio_complete(md->queue, bio, io_error);
947 bio_endio(bio, io_error); 946 bio->bi_error = io_error;
947 bio_endio(bio);
948 } 948 }
949 } 949 }
950} 950}
@@ -957,17 +957,15 @@ static void disable_write_same(struct mapped_device *md)
957 limits->max_write_same_sectors = 0; 957 limits->max_write_same_sectors = 0;
958} 958}
959 959
960static void clone_endio(struct bio *bio, int error) 960static void clone_endio(struct bio *bio)
961{ 961{
962 int error = bio->bi_error;
962 int r = error; 963 int r = error;
963 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); 964 struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
964 struct dm_io *io = tio->io; 965 struct dm_io *io = tio->io;
965 struct mapped_device *md = tio->io->md; 966 struct mapped_device *md = tio->io->md;
966 dm_endio_fn endio = tio->ti->type->end_io; 967 dm_endio_fn endio = tio->ti->type->end_io;
967 968
968 if (!bio_flagged(bio, BIO_UPTODATE) && !error)
969 error = -EIO;
970
971 if (endio) { 969 if (endio) {
972 r = endio(tio->ti, bio, error); 970 r = endio(tio->ti, bio, error);
973 if (r < 0 || r == DM_ENDIO_REQUEUE) 971 if (r < 0 || r == DM_ENDIO_REQUEUE)
@@ -996,7 +994,7 @@ static void clone_endio(struct bio *bio, int error)
996/* 994/*
997 * Partial completion handling for request-based dm 995 * Partial completion handling for request-based dm
998 */ 996 */
999static void end_clone_bio(struct bio *clone, int error) 997static void end_clone_bio(struct bio *clone)
1000{ 998{
1001 struct dm_rq_clone_bio_info *info = 999 struct dm_rq_clone_bio_info *info =
1002 container_of(clone, struct dm_rq_clone_bio_info, clone); 1000 container_of(clone, struct dm_rq_clone_bio_info, clone);
@@ -1013,13 +1011,13 @@ static void end_clone_bio(struct bio *clone, int error)
1013 * the remainder. 1011 * the remainder.
1014 */ 1012 */
1015 return; 1013 return;
1016 else if (error) { 1014 else if (bio->bi_error) {
1017 /* 1015 /*
1018 * Don't notice the error to the upper layer yet. 1016 * Don't notice the error to the upper layer yet.
1019 * The error handling decision is made by the target driver, 1017 * The error handling decision is made by the target driver,
1020 * when the request is completed. 1018 * when the request is completed.
1021 */ 1019 */
1022 tio->error = error; 1020 tio->error = bio->bi_error;
1023 return; 1021 return;
1024 } 1022 }
1025 1023
@@ -1722,60 +1720,6 @@ static void __split_and_process_bio(struct mapped_device *md,
1722 * CRUD END 1720 * CRUD END
1723 *---------------------------------------------------------------*/ 1721 *---------------------------------------------------------------*/
1724 1722
1725static int dm_merge_bvec(struct request_queue *q,
1726 struct bvec_merge_data *bvm,
1727 struct bio_vec *biovec)
1728{
1729 struct mapped_device *md = q->queuedata;
1730 struct dm_table *map = dm_get_live_table_fast(md);
1731 struct dm_target *ti;
1732 sector_t max_sectors;
1733 int max_size = 0;
1734
1735 if (unlikely(!map))
1736 goto out;
1737
1738 ti = dm_table_find_target(map, bvm->bi_sector);
1739 if (!dm_target_is_valid(ti))
1740 goto out;
1741
1742 /*
1743 * Find maximum amount of I/O that won't need splitting
1744 */
1745 max_sectors = min(max_io_len(bvm->bi_sector, ti),
1746 (sector_t) BIO_MAX_SECTORS);
1747 max_size = (max_sectors << SECTOR_SHIFT) - bvm->bi_size;
1748 if (max_size < 0)
1749 max_size = 0;
1750
1751 /*
1752 * merge_bvec_fn() returns number of bytes
1753 * it can accept at this offset
1754 * max is precomputed maximal io size
1755 */
1756 if (max_size && ti->type->merge)
1757 max_size = ti->type->merge(ti, bvm, biovec, max_size);
1758 /*
1759 * If the target doesn't support merge method and some of the devices
1760 * provided their merge_bvec method (we know this by looking at
1761 * queue_max_hw_sectors), then we can't allow bios with multiple vector
1762 * entries. So always set max_size to 0, and the code below allows
1763 * just one page.
1764 */
1765 else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9)
1766 max_size = 0;
1767
1768out:
1769 dm_put_live_table_fast(md);
1770 /*
1771 * Always allow an entire first page
1772 */
1773 if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT))
1774 max_size = biovec->bv_len;
1775
1776 return max_size;
1777}
1778
1779/* 1723/*
1780 * The request function that just remaps the bio built up by 1724 * The request function that just remaps the bio built up by
1781 * dm_merge_bvec. 1725 * dm_merge_bvec.
@@ -1789,6 +1733,8 @@ static void dm_make_request(struct request_queue *q, struct bio *bio)
1789 1733
1790 map = dm_get_live_table(md, &srcu_idx); 1734 map = dm_get_live_table(md, &srcu_idx);
1791 1735
1736 blk_queue_split(q, &bio, q->bio_split);
1737
1792 generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0); 1738 generic_start_io_acct(rw, bio_sectors(bio), &dm_disk(md)->part0);
1793 1739
1794 /* if we're suspended, we have to queue this io for later */ 1740 /* if we're suspended, we have to queue this io for later */
@@ -2496,59 +2442,6 @@ static void __set_size(struct mapped_device *md, sector_t size)
2496} 2442}
2497 2443
2498/* 2444/*
2499 * Return 1 if the queue has a compulsory merge_bvec_fn function.
2500 *
2501 * If this function returns 0, then the device is either a non-dm
2502 * device without a merge_bvec_fn, or it is a dm device that is
2503 * able to split any bios it receives that are too big.
2504 */
2505int dm_queue_merge_is_compulsory(struct request_queue *q)
2506{
2507 struct mapped_device *dev_md;
2508
2509 if (!q->merge_bvec_fn)
2510 return 0;
2511
2512 if (q->make_request_fn == dm_make_request) {
2513 dev_md = q->queuedata;
2514 if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
2515 return 0;
2516 }
2517
2518 return 1;
2519}
2520
2521static int dm_device_merge_is_compulsory(struct dm_target *ti,
2522 struct dm_dev *dev, sector_t start,
2523 sector_t len, void *data)
2524{
2525 struct block_device *bdev = dev->bdev;
2526 struct request_queue *q = bdev_get_queue(bdev);
2527
2528 return dm_queue_merge_is_compulsory(q);
2529}
2530
2531/*
2532 * Return 1 if it is acceptable to ignore merge_bvec_fn based
2533 * on the properties of the underlying devices.
2534 */
2535static int dm_table_merge_is_optional(struct dm_table *table)
2536{
2537 unsigned i = 0;
2538 struct dm_target *ti;
2539
2540 while (i < dm_table_get_num_targets(table)) {
2541 ti = dm_table_get_target(table, i++);
2542
2543 if (ti->type->iterate_devices &&
2544 ti->type->iterate_devices(ti, dm_device_merge_is_compulsory, NULL))
2545 return 0;
2546 }
2547
2548 return 1;
2549}
2550
2551/*
2552 * Returns old map, which caller must destroy. 2445 * Returns old map, which caller must destroy.
2553 */ 2446 */
2554static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t, 2447static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
@@ -2557,7 +2450,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2557 struct dm_table *old_map; 2450 struct dm_table *old_map;
2558 struct request_queue *q = md->queue; 2451 struct request_queue *q = md->queue;
2559 sector_t size; 2452 sector_t size;
2560 int merge_is_optional;
2561 2453
2562 size = dm_table_get_size(t); 2454 size = dm_table_get_size(t);
2563 2455
@@ -2583,17 +2475,11 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
2583 2475
2584 __bind_mempools(md, t); 2476 __bind_mempools(md, t);
2585 2477
2586 merge_is_optional = dm_table_merge_is_optional(t);
2587
2588 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock)); 2478 old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
2589 rcu_assign_pointer(md->map, t); 2479 rcu_assign_pointer(md->map, t);
2590 md->immutable_target_type = dm_table_get_immutable_target_type(t); 2480 md->immutable_target_type = dm_table_get_immutable_target_type(t);
2591 2481
2592 dm_table_set_restrictions(t, q, limits); 2482 dm_table_set_restrictions(t, q, limits);
2593 if (merge_is_optional)
2594 set_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
2595 else
2596 clear_bit(DMF_MERGE_IS_OPTIONAL, &md->flags);
2597 if (old_map) 2483 if (old_map)
2598 dm_sync_table(md); 2484 dm_sync_table(md);
2599 2485
@@ -2874,7 +2760,6 @@ int dm_setup_md_queue(struct mapped_device *md)
2874 case DM_TYPE_BIO_BASED: 2760 case DM_TYPE_BIO_BASED:
2875 dm_init_old_md_queue(md); 2761 dm_init_old_md_queue(md);
2876 blk_queue_make_request(md->queue, dm_make_request); 2762 blk_queue_make_request(md->queue, dm_make_request);
2877 blk_queue_merge_bvec(md->queue, dm_merge_bvec);
2878 break; 2763 break;
2879 } 2764 }
2880 2765