diff options
56 files changed, 2088 insertions, 986 deletions
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl index 4f676838da06..bcdfdb9a9277 100644 --- a/Documentation/DocBook/filesystems.tmpl +++ b/Documentation/DocBook/filesystems.tmpl | |||
@@ -62,7 +62,7 @@ | |||
62 | !Efs/mpage.c | 62 | !Efs/mpage.c |
63 | !Efs/namei.c | 63 | !Efs/namei.c |
64 | !Efs/buffer.c | 64 | !Efs/buffer.c |
65 | !Efs/bio.c | 65 | !Eblock/bio.c |
66 | !Efs/seq_file.c | 66 | !Efs/seq_file.c |
67 | !Efs/filesystems.c | 67 | !Efs/filesystems.c |
68 | !Efs/fs-writeback.c | 68 | !Efs/fs-writeback.c |
diff --git a/block/Makefile b/block/Makefile index 20645e88fb57..a2ce6ac935ec 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -2,13 +2,15 @@ | |||
2 | # Makefile for the kernel block layer | 2 | # Makefile for the kernel block layer |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | 5 | obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ |
7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
8 | blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ | 8 | blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ |
9 | blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ | 9 | blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ |
10 | genhd.o scsi_ioctl.o partition-generic.o partitions/ | 10 | genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ |
11 | partitions/ | ||
11 | 12 | ||
13 | obj-$(CONFIG_BOUNCE) += bounce.o | ||
12 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 14 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
13 | obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o | 15 | obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o |
14 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o | 16 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o |
@@ -20,3 +22,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o | |||
20 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o | 22 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o |
21 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o | 23 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o |
22 | obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o | 24 | obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o |
25 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o | ||
diff --git a/fs/bio-integrity.c b/block/bio-integrity.c index 1c2ce0c87711..9e241063a616 100644 --- a/fs/bio-integrity.c +++ b/block/bio-integrity.c | |||
@@ -617,7 +617,7 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size) | |||
617 | if (!bs->bio_integrity_pool) | 617 | if (!bs->bio_integrity_pool) |
618 | return -1; | 618 | return -1; |
619 | 619 | ||
620 | bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); | 620 | bs->bvec_integrity_pool = biovec_create_pool(pool_size); |
621 | if (!bs->bvec_integrity_pool) { | 621 | if (!bs->bvec_integrity_pool) { |
622 | mempool_destroy(bs->bio_integrity_pool); | 622 | mempool_destroy(bs->bio_integrity_pool); |
623 | return -1; | 623 | return -1; |
diff --git a/fs/bio.c b/block/bio.c index 6f0362b77806..96d28eee8a1e 100644 --- a/fs/bio.c +++ b/block/bio.c | |||
@@ -305,6 +305,8 @@ static void bio_chain_endio(struct bio *bio, int error) | |||
305 | 305 | ||
306 | /** | 306 | /** |
307 | * bio_chain - chain bio completions | 307 | * bio_chain - chain bio completions |
308 | * @bio: the target bio | ||
309 | * @parent: the @bio's parent bio | ||
308 | * | 310 | * |
309 | * The caller won't have a bi_end_io called when @bio completes - instead, | 311 | * The caller won't have a bi_end_io called when @bio completes - instead, |
310 | * @parent's bi_end_io won't be called until both @parent and @bio have | 312 | * @parent's bi_end_io won't be called until both @parent and @bio have |
@@ -1011,8 +1013,7 @@ static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, | |||
1011 | bio->bi_private = bmd; | 1013 | bio->bi_private = bmd; |
1012 | } | 1014 | } |
1013 | 1015 | ||
1014 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, | 1016 | static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, |
1015 | unsigned int iov_count, | ||
1016 | gfp_t gfp_mask) | 1017 | gfp_t gfp_mask) |
1017 | { | 1018 | { |
1018 | if (iov_count > UIO_MAXIOV) | 1019 | if (iov_count > UIO_MAXIOV) |
@@ -1154,7 +1155,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, | |||
1154 | if (offset) | 1155 | if (offset) |
1155 | nr_pages++; | 1156 | nr_pages++; |
1156 | 1157 | ||
1157 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); | 1158 | bmd = bio_alloc_map_data(iov_count, gfp_mask); |
1158 | if (!bmd) | 1159 | if (!bmd) |
1159 | return ERR_PTR(-ENOMEM); | 1160 | return ERR_PTR(-ENOMEM); |
1160 | 1161 | ||
@@ -1859,7 +1860,7 @@ EXPORT_SYMBOL_GPL(bio_trim); | |||
1859 | * create memory pools for biovec's in a bio_set. | 1860 | * create memory pools for biovec's in a bio_set. |
1860 | * use the global biovec slabs created for general use. | 1861 | * use the global biovec slabs created for general use. |
1861 | */ | 1862 | */ |
1862 | mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries) | 1863 | mempool_t *biovec_create_pool(int pool_entries) |
1863 | { | 1864 | { |
1864 | struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; | 1865 | struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; |
1865 | 1866 | ||
@@ -1922,7 +1923,7 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) | |||
1922 | if (!bs->bio_pool) | 1923 | if (!bs->bio_pool) |
1923 | goto bad; | 1924 | goto bad; |
1924 | 1925 | ||
1925 | bs->bvec_pool = biovec_create_pool(bs, pool_size); | 1926 | bs->bvec_pool = biovec_create_pool(pool_size); |
1926 | if (!bs->bvec_pool) | 1927 | if (!bs->bvec_pool) |
1927 | goto bad; | 1928 | goto bad; |
1928 | 1929 | ||
diff --git a/block/blk-core.c b/block/blk-core.c index a0e3096c4bb5..40d654861c33 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -146,8 +146,8 @@ void blk_dump_rq_flags(struct request *rq, char *msg) | |||
146 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", | 146 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", |
147 | (unsigned long long)blk_rq_pos(rq), | 147 | (unsigned long long)blk_rq_pos(rq), |
148 | blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); | 148 | blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); |
149 | printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", | 149 | printk(KERN_INFO " bio %p, biotail %p, len %u\n", |
150 | rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); | 150 | rq->bio, rq->biotail, blk_rq_bytes(rq)); |
151 | 151 | ||
152 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 152 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
153 | printk(KERN_INFO " cdb: "); | 153 | printk(KERN_INFO " cdb: "); |
@@ -251,8 +251,10 @@ void blk_sync_queue(struct request_queue *q) | |||
251 | struct blk_mq_hw_ctx *hctx; | 251 | struct blk_mq_hw_ctx *hctx; |
252 | int i; | 252 | int i; |
253 | 253 | ||
254 | queue_for_each_hw_ctx(q, hctx, i) | 254 | queue_for_each_hw_ctx(q, hctx, i) { |
255 | cancel_delayed_work_sync(&hctx->delayed_work); | 255 | cancel_delayed_work_sync(&hctx->run_work); |
256 | cancel_delayed_work_sync(&hctx->delay_work); | ||
257 | } | ||
256 | } else { | 258 | } else { |
257 | cancel_delayed_work_sync(&q->delay_work); | 259 | cancel_delayed_work_sync(&q->delay_work); |
258 | } | 260 | } |
@@ -574,12 +576,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
574 | if (!q) | 576 | if (!q) |
575 | return NULL; | 577 | return NULL; |
576 | 578 | ||
577 | if (percpu_counter_init(&q->mq_usage_counter, 0)) | ||
578 | goto fail_q; | ||
579 | |||
580 | q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); | 579 | q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); |
581 | if (q->id < 0) | 580 | if (q->id < 0) |
582 | goto fail_c; | 581 | goto fail_q; |
583 | 582 | ||
584 | q->backing_dev_info.ra_pages = | 583 | q->backing_dev_info.ra_pages = |
585 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 584 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
@@ -637,8 +636,6 @@ fail_bdi: | |||
637 | bdi_destroy(&q->backing_dev_info); | 636 | bdi_destroy(&q->backing_dev_info); |
638 | fail_id: | 637 | fail_id: |
639 | ida_simple_remove(&blk_queue_ida, q->id); | 638 | ida_simple_remove(&blk_queue_ida, q->id); |
640 | fail_c: | ||
641 | percpu_counter_destroy(&q->mq_usage_counter); | ||
642 | fail_q: | 639 | fail_q: |
643 | kmem_cache_free(blk_requestq_cachep, q); | 640 | kmem_cache_free(blk_requestq_cachep, q); |
644 | return NULL; | 641 | return NULL; |
@@ -846,6 +843,47 @@ static void freed_request(struct request_list *rl, unsigned int flags) | |||
846 | __freed_request(rl, sync ^ 1); | 843 | __freed_request(rl, sync ^ 1); |
847 | } | 844 | } |
848 | 845 | ||
846 | int blk_update_nr_requests(struct request_queue *q, unsigned int nr) | ||
847 | { | ||
848 | struct request_list *rl; | ||
849 | |||
850 | spin_lock_irq(q->queue_lock); | ||
851 | q->nr_requests = nr; | ||
852 | blk_queue_congestion_threshold(q); | ||
853 | |||
854 | /* congestion isn't cgroup aware and follows root blkcg for now */ | ||
855 | rl = &q->root_rl; | ||
856 | |||
857 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) | ||
858 | blk_set_queue_congested(q, BLK_RW_SYNC); | ||
859 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) | ||
860 | blk_clear_queue_congested(q, BLK_RW_SYNC); | ||
861 | |||
862 | if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) | ||
863 | blk_set_queue_congested(q, BLK_RW_ASYNC); | ||
864 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) | ||
865 | blk_clear_queue_congested(q, BLK_RW_ASYNC); | ||
866 | |||
867 | blk_queue_for_each_rl(rl, q) { | ||
868 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | ||
869 | blk_set_rl_full(rl, BLK_RW_SYNC); | ||
870 | } else { | ||
871 | blk_clear_rl_full(rl, BLK_RW_SYNC); | ||
872 | wake_up(&rl->wait[BLK_RW_SYNC]); | ||
873 | } | ||
874 | |||
875 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
876 | blk_set_rl_full(rl, BLK_RW_ASYNC); | ||
877 | } else { | ||
878 | blk_clear_rl_full(rl, BLK_RW_ASYNC); | ||
879 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | spin_unlock_irq(q->queue_lock); | ||
884 | return 0; | ||
885 | } | ||
886 | |||
849 | /* | 887 | /* |
850 | * Determine if elevator data should be initialized when allocating the | 888 | * Determine if elevator data should be initialized when allocating the |
851 | * request associated with @bio. | 889 | * request associated with @bio. |
@@ -1135,7 +1173,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, | |||
1135 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 1173 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
1136 | { | 1174 | { |
1137 | if (q->mq_ops) | 1175 | if (q->mq_ops) |
1138 | return blk_mq_alloc_request(q, rw, gfp_mask); | 1176 | return blk_mq_alloc_request(q, rw, gfp_mask, false); |
1139 | else | 1177 | else |
1140 | return blk_old_get_request(q, rw, gfp_mask); | 1178 | return blk_old_get_request(q, rw, gfp_mask); |
1141 | } | 1179 | } |
@@ -1231,12 +1269,15 @@ static void add_acct_request(struct request_queue *q, struct request *rq, | |||
1231 | static void part_round_stats_single(int cpu, struct hd_struct *part, | 1269 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
1232 | unsigned long now) | 1270 | unsigned long now) |
1233 | { | 1271 | { |
1272 | int inflight; | ||
1273 | |||
1234 | if (now == part->stamp) | 1274 | if (now == part->stamp) |
1235 | return; | 1275 | return; |
1236 | 1276 | ||
1237 | if (part_in_flight(part)) { | 1277 | inflight = part_in_flight(part); |
1278 | if (inflight) { | ||
1238 | __part_stat_add(cpu, part, time_in_queue, | 1279 | __part_stat_add(cpu, part, time_in_queue, |
1239 | part_in_flight(part) * (now - part->stamp)); | 1280 | inflight * (now - part->stamp)); |
1240 | __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); | 1281 | __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); |
1241 | } | 1282 | } |
1242 | part->stamp = now; | 1283 | part->stamp = now; |
@@ -1360,7 +1401,6 @@ void blk_add_request_payload(struct request *rq, struct page *page, | |||
1360 | 1401 | ||
1361 | rq->__data_len = rq->resid_len = len; | 1402 | rq->__data_len = rq->resid_len = len; |
1362 | rq->nr_phys_segments = 1; | 1403 | rq->nr_phys_segments = 1; |
1363 | rq->buffer = bio_data(bio); | ||
1364 | } | 1404 | } |
1365 | EXPORT_SYMBOL_GPL(blk_add_request_payload); | 1405 | EXPORT_SYMBOL_GPL(blk_add_request_payload); |
1366 | 1406 | ||
@@ -1402,12 +1442,6 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, | |||
1402 | bio->bi_next = req->bio; | 1442 | bio->bi_next = req->bio; |
1403 | req->bio = bio; | 1443 | req->bio = bio; |
1404 | 1444 | ||
1405 | /* | ||
1406 | * may not be valid. if the low level driver said | ||
1407 | * it didn't need a bounce buffer then it better | ||
1408 | * not touch req->buffer either... | ||
1409 | */ | ||
1410 | req->buffer = bio_data(bio); | ||
1411 | req->__sector = bio->bi_iter.bi_sector; | 1445 | req->__sector = bio->bi_iter.bi_sector; |
1412 | req->__data_len += bio->bi_iter.bi_size; | 1446 | req->__data_len += bio->bi_iter.bi_size; |
1413 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | 1447 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
@@ -1432,6 +1466,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, | |||
1432 | * added on the elevator at this point. In addition, we don't have | 1466 | * added on the elevator at this point. In addition, we don't have |
1433 | * reliable access to the elevator outside queue lock. Only check basic | 1467 | * reliable access to the elevator outside queue lock. Only check basic |
1434 | * merging parameters without querying the elevator. | 1468 | * merging parameters without querying the elevator. |
1469 | * | ||
1470 | * Caller must ensure !blk_queue_nomerges(q) beforehand. | ||
1435 | */ | 1471 | */ |
1436 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, | 1472 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, |
1437 | unsigned int *request_count) | 1473 | unsigned int *request_count) |
@@ -1441,9 +1477,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, | |||
1441 | bool ret = false; | 1477 | bool ret = false; |
1442 | struct list_head *plug_list; | 1478 | struct list_head *plug_list; |
1443 | 1479 | ||
1444 | if (blk_queue_nomerges(q)) | ||
1445 | goto out; | ||
1446 | |||
1447 | plug = current->plug; | 1480 | plug = current->plug; |
1448 | if (!plug) | 1481 | if (!plug) |
1449 | goto out; | 1482 | goto out; |
@@ -1522,7 +1555,8 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) | |||
1522 | * Check if we can merge with the plugged list before grabbing | 1555 | * Check if we can merge with the plugged list before grabbing |
1523 | * any locks. | 1556 | * any locks. |
1524 | */ | 1557 | */ |
1525 | if (blk_attempt_plug_merge(q, bio, &request_count)) | 1558 | if (!blk_queue_nomerges(q) && |
1559 | blk_attempt_plug_merge(q, bio, &request_count)) | ||
1526 | return; | 1560 | return; |
1527 | 1561 | ||
1528 | spin_lock_irq(q->queue_lock); | 1562 | spin_lock_irq(q->queue_lock); |
@@ -1654,7 +1688,7 @@ static int __init fail_make_request_debugfs(void) | |||
1654 | struct dentry *dir = fault_create_debugfs_attr("fail_make_request", | 1688 | struct dentry *dir = fault_create_debugfs_attr("fail_make_request", |
1655 | NULL, &fail_make_request); | 1689 | NULL, &fail_make_request); |
1656 | 1690 | ||
1657 | return IS_ERR(dir) ? PTR_ERR(dir) : 0; | 1691 | return PTR_ERR_OR_ZERO(dir); |
1658 | } | 1692 | } |
1659 | 1693 | ||
1660 | late_initcall(fail_make_request_debugfs); | 1694 | late_initcall(fail_make_request_debugfs); |
@@ -2434,7 +2468,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | |||
2434 | } | 2468 | } |
2435 | 2469 | ||
2436 | req->__data_len -= total_bytes; | 2470 | req->__data_len -= total_bytes; |
2437 | req->buffer = bio_data(req->bio); | ||
2438 | 2471 | ||
2439 | /* update sector only for requests with clear definition of sector */ | 2472 | /* update sector only for requests with clear definition of sector */ |
2440 | if (req->cmd_type == REQ_TYPE_FS) | 2473 | if (req->cmd_type == REQ_TYPE_FS) |
@@ -2503,7 +2536,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request); | |||
2503 | /* | 2536 | /* |
2504 | * queue lock must be held | 2537 | * queue lock must be held |
2505 | */ | 2538 | */ |
2506 | static void blk_finish_request(struct request *req, int error) | 2539 | void blk_finish_request(struct request *req, int error) |
2507 | { | 2540 | { |
2508 | if (blk_rq_tagged(req)) | 2541 | if (blk_rq_tagged(req)) |
2509 | blk_queue_end_tag(req->q, req); | 2542 | blk_queue_end_tag(req->q, req); |
@@ -2529,6 +2562,7 @@ static void blk_finish_request(struct request *req, int error) | |||
2529 | __blk_put_request(req->q, req); | 2562 | __blk_put_request(req->q, req); |
2530 | } | 2563 | } |
2531 | } | 2564 | } |
2565 | EXPORT_SYMBOL(blk_finish_request); | ||
2532 | 2566 | ||
2533 | /** | 2567 | /** |
2534 | * blk_end_bidi_request - Complete a bidi request | 2568 | * blk_end_bidi_request - Complete a bidi request |
@@ -2752,10 +2786,9 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | |||
2752 | /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ | 2786 | /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ |
2753 | rq->cmd_flags |= bio->bi_rw & REQ_WRITE; | 2787 | rq->cmd_flags |= bio->bi_rw & REQ_WRITE; |
2754 | 2788 | ||
2755 | if (bio_has_data(bio)) { | 2789 | if (bio_has_data(bio)) |
2756 | rq->nr_phys_segments = bio_phys_segments(q, bio); | 2790 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
2757 | rq->buffer = bio_data(bio); | 2791 | |
2758 | } | ||
2759 | rq->__data_len = bio->bi_iter.bi_size; | 2792 | rq->__data_len = bio->bi_iter.bi_size; |
2760 | rq->bio = rq->biotail = bio; | 2793 | rq->bio = rq->biotail = bio; |
2761 | 2794 | ||
@@ -2831,7 +2864,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); | |||
2831 | 2864 | ||
2832 | /* | 2865 | /* |
2833 | * Copy attributes of the original request to the clone request. | 2866 | * Copy attributes of the original request to the clone request. |
2834 | * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. | 2867 | * The actual data parts (e.g. ->cmd, ->sense) are not copied. |
2835 | */ | 2868 | */ |
2836 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) | 2869 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) |
2837 | { | 2870 | { |
@@ -2857,7 +2890,7 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src) | |||
2857 | * | 2890 | * |
2858 | * Description: | 2891 | * Description: |
2859 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. | 2892 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. |
2860 | * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) | 2893 | * The actual data parts of @rq_src (e.g. ->cmd, ->sense) |
2861 | * are not copied, and copying such parts is the caller's responsibility. | 2894 | * are not copied, and copying such parts is the caller's responsibility. |
2862 | * Also, pages which the original bios are pointing to are not copied | 2895 | * Also, pages which the original bios are pointing to are not copied |
2863 | * and the cloned bios just point same pages. | 2896 | * and the cloned bios just point same pages. |
@@ -2904,20 +2937,25 @@ free_and_out: | |||
2904 | } | 2937 | } |
2905 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); | 2938 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); |
2906 | 2939 | ||
2907 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) | 2940 | int kblockd_schedule_work(struct work_struct *work) |
2908 | { | 2941 | { |
2909 | return queue_work(kblockd_workqueue, work); | 2942 | return queue_work(kblockd_workqueue, work); |
2910 | } | 2943 | } |
2911 | EXPORT_SYMBOL(kblockd_schedule_work); | 2944 | EXPORT_SYMBOL(kblockd_schedule_work); |
2912 | 2945 | ||
2913 | int kblockd_schedule_delayed_work(struct request_queue *q, | 2946 | int kblockd_schedule_delayed_work(struct delayed_work *dwork, |
2914 | struct delayed_work *dwork, unsigned long delay) | 2947 | unsigned long delay) |
2915 | { | 2948 | { |
2916 | return queue_delayed_work(kblockd_workqueue, dwork, delay); | 2949 | return queue_delayed_work(kblockd_workqueue, dwork, delay); |
2917 | } | 2950 | } |
2918 | EXPORT_SYMBOL(kblockd_schedule_delayed_work); | 2951 | EXPORT_SYMBOL(kblockd_schedule_delayed_work); |
2919 | 2952 | ||
2920 | #define PLUG_MAGIC 0x91827364 | 2953 | int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, |
2954 | unsigned long delay) | ||
2955 | { | ||
2956 | return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); | ||
2957 | } | ||
2958 | EXPORT_SYMBOL(kblockd_schedule_delayed_work_on); | ||
2921 | 2959 | ||
2922 | /** | 2960 | /** |
2923 | * blk_start_plug - initialize blk_plug and track it inside the task_struct | 2961 | * blk_start_plug - initialize blk_plug and track it inside the task_struct |
@@ -2937,7 +2975,6 @@ void blk_start_plug(struct blk_plug *plug) | |||
2937 | { | 2975 | { |
2938 | struct task_struct *tsk = current; | 2976 | struct task_struct *tsk = current; |
2939 | 2977 | ||
2940 | plug->magic = PLUG_MAGIC; | ||
2941 | INIT_LIST_HEAD(&plug->list); | 2978 | INIT_LIST_HEAD(&plug->list); |
2942 | INIT_LIST_HEAD(&plug->mq_list); | 2979 | INIT_LIST_HEAD(&plug->mq_list); |
2943 | INIT_LIST_HEAD(&plug->cb_list); | 2980 | INIT_LIST_HEAD(&plug->cb_list); |
@@ -3034,8 +3071,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |||
3034 | LIST_HEAD(list); | 3071 | LIST_HEAD(list); |
3035 | unsigned int depth; | 3072 | unsigned int depth; |
3036 | 3073 | ||
3037 | BUG_ON(plug->magic != PLUG_MAGIC); | ||
3038 | |||
3039 | flush_plug_callbacks(plug, from_schedule); | 3074 | flush_plug_callbacks(plug, from_schedule); |
3040 | 3075 | ||
3041 | if (!list_empty(&plug->mq_list)) | 3076 | if (!list_empty(&plug->mq_list)) |
diff --git a/block/blk-flush.c b/block/blk-flush.c index 43e6b4755e9a..ff87c664b7df 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -130,21 +130,13 @@ static void blk_flush_restore_request(struct request *rq) | |||
130 | blk_clear_rq_complete(rq); | 130 | blk_clear_rq_complete(rq); |
131 | } | 131 | } |
132 | 132 | ||
133 | static void mq_flush_run(struct work_struct *work) | ||
134 | { | ||
135 | struct request *rq; | ||
136 | |||
137 | rq = container_of(work, struct request, mq_flush_work); | ||
138 | |||
139 | memset(&rq->csd, 0, sizeof(rq->csd)); | ||
140 | blk_mq_insert_request(rq, false, true, false); | ||
141 | } | ||
142 | |||
143 | static bool blk_flush_queue_rq(struct request *rq, bool add_front) | 133 | static bool blk_flush_queue_rq(struct request *rq, bool add_front) |
144 | { | 134 | { |
145 | if (rq->q->mq_ops) { | 135 | if (rq->q->mq_ops) { |
146 | INIT_WORK(&rq->mq_flush_work, mq_flush_run); | 136 | struct request_queue *q = rq->q; |
147 | kblockd_schedule_work(rq->q, &rq->mq_flush_work); | 137 | |
138 | blk_mq_add_to_requeue_list(rq, add_front); | ||
139 | blk_mq_kick_requeue_list(q); | ||
148 | return false; | 140 | return false; |
149 | } else { | 141 | } else { |
150 | if (add_front) | 142 | if (add_front) |
@@ -231,8 +223,10 @@ static void flush_end_io(struct request *flush_rq, int error) | |||
231 | struct request *rq, *n; | 223 | struct request *rq, *n; |
232 | unsigned long flags = 0; | 224 | unsigned long flags = 0; |
233 | 225 | ||
234 | if (q->mq_ops) | 226 | if (q->mq_ops) { |
235 | spin_lock_irqsave(&q->mq_flush_lock, flags); | 227 | spin_lock_irqsave(&q->mq_flush_lock, flags); |
228 | q->flush_rq->cmd_flags = 0; | ||
229 | } | ||
236 | 230 | ||
237 | running = &q->flush_queue[q->flush_running_idx]; | 231 | running = &q->flush_queue[q->flush_running_idx]; |
238 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); | 232 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); |
@@ -306,23 +300,9 @@ static bool blk_kick_flush(struct request_queue *q) | |||
306 | */ | 300 | */ |
307 | q->flush_pending_idx ^= 1; | 301 | q->flush_pending_idx ^= 1; |
308 | 302 | ||
309 | if (q->mq_ops) { | 303 | blk_rq_init(q, q->flush_rq); |
310 | struct blk_mq_ctx *ctx = first_rq->mq_ctx; | 304 | if (q->mq_ops) |
311 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); | 305 | blk_mq_clone_flush_request(q->flush_rq, first_rq); |
312 | |||
313 | blk_mq_rq_init(hctx, q->flush_rq); | ||
314 | q->flush_rq->mq_ctx = ctx; | ||
315 | |||
316 | /* | ||
317 | * Reuse the tag value from the fist waiting request, | ||
318 | * with blk-mq the tag is generated during request | ||
319 | * allocation and drivers can rely on it being inside | ||
320 | * the range they asked for. | ||
321 | */ | ||
322 | q->flush_rq->tag = first_rq->tag; | ||
323 | } else { | ||
324 | blk_rq_init(q, q->flush_rq); | ||
325 | } | ||
326 | 306 | ||
327 | q->flush_rq->cmd_type = REQ_TYPE_FS; | 307 | q->flush_rq->cmd_type = REQ_TYPE_FS; |
328 | q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; | 308 | q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; |
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index c11d24e379e2..d828b44a404b 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c | |||
@@ -64,12 +64,12 @@ EXPORT_SYMBOL(__blk_iopoll_complete); | |||
64 | * iopoll handler will not be invoked again before blk_iopoll_sched_prep() | 64 | * iopoll handler will not be invoked again before blk_iopoll_sched_prep() |
65 | * is called. | 65 | * is called. |
66 | **/ | 66 | **/ |
67 | void blk_iopoll_complete(struct blk_iopoll *iopoll) | 67 | void blk_iopoll_complete(struct blk_iopoll *iop) |
68 | { | 68 | { |
69 | unsigned long flags; | 69 | unsigned long flags; |
70 | 70 | ||
71 | local_irq_save(flags); | 71 | local_irq_save(flags); |
72 | __blk_iopoll_complete(iopoll); | 72 | __blk_iopoll_complete(iop); |
73 | local_irq_restore(flags); | 73 | local_irq_restore(flags); |
74 | } | 74 | } |
75 | EXPORT_SYMBOL(blk_iopoll_complete); | 75 | EXPORT_SYMBOL(blk_iopoll_complete); |
diff --git a/block/blk-lib.c b/block/blk-lib.c index 97a733cf3d5f..8411be3c19d3 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -226,8 +226,8 @@ EXPORT_SYMBOL(blkdev_issue_write_same); | |||
226 | * Generate and issue number of bios with zerofiled pages. | 226 | * Generate and issue number of bios with zerofiled pages. |
227 | */ | 227 | */ |
228 | 228 | ||
229 | int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 229 | static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
230 | sector_t nr_sects, gfp_t gfp_mask) | 230 | sector_t nr_sects, gfp_t gfp_mask) |
231 | { | 231 | { |
232 | int ret; | 232 | int ret; |
233 | struct bio *bio; | 233 | struct bio *bio; |
diff --git a/block/blk-map.c b/block/blk-map.c index f7b22bc21518..f890d4345b0c 100644 --- a/block/blk-map.c +++ b/block/blk-map.c | |||
@@ -155,7 +155,6 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, | |||
155 | if (!bio_flagged(bio, BIO_USER_MAPPED)) | 155 | if (!bio_flagged(bio, BIO_USER_MAPPED)) |
156 | rq->cmd_flags |= REQ_COPY_USER; | 156 | rq->cmd_flags |= REQ_COPY_USER; |
157 | 157 | ||
158 | rq->buffer = NULL; | ||
159 | return 0; | 158 | return 0; |
160 | unmap_rq: | 159 | unmap_rq: |
161 | blk_rq_unmap_user(bio); | 160 | blk_rq_unmap_user(bio); |
@@ -238,7 +237,6 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
238 | blk_queue_bounce(q, &bio); | 237 | blk_queue_bounce(q, &bio); |
239 | bio_get(bio); | 238 | bio_get(bio); |
240 | blk_rq_bio_prep(q, rq, bio); | 239 | blk_rq_bio_prep(q, rq, bio); |
241 | rq->buffer = NULL; | ||
242 | return 0; | 240 | return 0; |
243 | } | 241 | } |
244 | EXPORT_SYMBOL(blk_rq_map_user_iov); | 242 | EXPORT_SYMBOL(blk_rq_map_user_iov); |
@@ -325,7 +323,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, | |||
325 | } | 323 | } |
326 | 324 | ||
327 | blk_queue_bounce(q, &rq->bio); | 325 | blk_queue_bounce(q, &rq->bio); |
328 | rq->buffer = NULL; | ||
329 | return 0; | 326 | return 0; |
330 | } | 327 | } |
331 | EXPORT_SYMBOL(blk_rq_map_kern); | 328 | EXPORT_SYMBOL(blk_rq_map_kern); |
diff --git a/block/blk-merge.c b/block/blk-merge.c index 6c583f9c5b65..b3bf0df0f4c2 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -13,7 +13,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, | |||
13 | struct bio *bio) | 13 | struct bio *bio) |
14 | { | 14 | { |
15 | struct bio_vec bv, bvprv = { NULL }; | 15 | struct bio_vec bv, bvprv = { NULL }; |
16 | int cluster, high, highprv = 1; | 16 | int cluster, high, highprv = 1, no_sg_merge; |
17 | unsigned int seg_size, nr_phys_segs; | 17 | unsigned int seg_size, nr_phys_segs; |
18 | struct bio *fbio, *bbio; | 18 | struct bio *fbio, *bbio; |
19 | struct bvec_iter iter; | 19 | struct bvec_iter iter; |
@@ -35,12 +35,21 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, | |||
35 | cluster = blk_queue_cluster(q); | 35 | cluster = blk_queue_cluster(q); |
36 | seg_size = 0; | 36 | seg_size = 0; |
37 | nr_phys_segs = 0; | 37 | nr_phys_segs = 0; |
38 | no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); | ||
39 | high = 0; | ||
38 | for_each_bio(bio) { | 40 | for_each_bio(bio) { |
39 | bio_for_each_segment(bv, bio, iter) { | 41 | bio_for_each_segment(bv, bio, iter) { |
40 | /* | 42 | /* |
43 | * If SG merging is disabled, each bio vector is | ||
44 | * a segment | ||
45 | */ | ||
46 | if (no_sg_merge) | ||
47 | goto new_segment; | ||
48 | |||
49 | /* | ||
41 | * the trick here is making sure that a high page is | 50 | * the trick here is making sure that a high page is |
42 | * never considered part of another segment, since that | 51 | * never considered part of another segment, since |
43 | * might change with the bounce page. | 52 | * that might change with the bounce page. |
44 | */ | 53 | */ |
45 | high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); | 54 | high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); |
46 | if (!high && !highprv && cluster) { | 55 | if (!high && !highprv && cluster) { |
@@ -84,11 +93,16 @@ void blk_recalc_rq_segments(struct request *rq) | |||
84 | 93 | ||
85 | void blk_recount_segments(struct request_queue *q, struct bio *bio) | 94 | void blk_recount_segments(struct request_queue *q, struct bio *bio) |
86 | { | 95 | { |
87 | struct bio *nxt = bio->bi_next; | 96 | if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags)) |
97 | bio->bi_phys_segments = bio->bi_vcnt; | ||
98 | else { | ||
99 | struct bio *nxt = bio->bi_next; | ||
100 | |||
101 | bio->bi_next = NULL; | ||
102 | bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); | ||
103 | bio->bi_next = nxt; | ||
104 | } | ||
88 | 105 | ||
89 | bio->bi_next = NULL; | ||
90 | bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); | ||
91 | bio->bi_next = nxt; | ||
92 | bio->bi_flags |= (1 << BIO_SEG_VALID); | 106 | bio->bi_flags |= (1 << BIO_SEG_VALID); |
93 | } | 107 | } |
94 | EXPORT_SYMBOL(blk_recount_segments); | 108 | EXPORT_SYMBOL(blk_recount_segments); |
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c index 136ef8643bba..bb3ed488f7b5 100644 --- a/block/blk-mq-cpu.c +++ b/block/blk-mq-cpu.c | |||
@@ -1,3 +1,8 @@ | |||
1 | /* | ||
2 | * CPU notifier helper code for blk-mq | ||
3 | * | ||
4 | * Copyright (C) 2013-2014 Jens Axboe | ||
5 | */ | ||
1 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 7 | #include <linux/module.h> |
3 | #include <linux/init.h> | 8 | #include <linux/init.h> |
@@ -18,14 +23,18 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, | |||
18 | { | 23 | { |
19 | unsigned int cpu = (unsigned long) hcpu; | 24 | unsigned int cpu = (unsigned long) hcpu; |
20 | struct blk_mq_cpu_notifier *notify; | 25 | struct blk_mq_cpu_notifier *notify; |
26 | int ret = NOTIFY_OK; | ||
21 | 27 | ||
22 | raw_spin_lock(&blk_mq_cpu_notify_lock); | 28 | raw_spin_lock(&blk_mq_cpu_notify_lock); |
23 | 29 | ||
24 | list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) | 30 | list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) { |
25 | notify->notify(notify->data, action, cpu); | 31 | ret = notify->notify(notify->data, action, cpu); |
32 | if (ret != NOTIFY_OK) | ||
33 | break; | ||
34 | } | ||
26 | 35 | ||
27 | raw_spin_unlock(&blk_mq_cpu_notify_lock); | 36 | raw_spin_unlock(&blk_mq_cpu_notify_lock); |
28 | return NOTIFY_OK; | 37 | return ret; |
29 | } | 38 | } |
30 | 39 | ||
31 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | 40 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) |
@@ -45,7 +54,7 @@ void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | |||
45 | } | 54 | } |
46 | 55 | ||
47 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | 56 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, |
48 | void (*fn)(void *, unsigned long, unsigned int), | 57 | int (*fn)(void *, unsigned long, unsigned int), |
49 | void *data) | 58 | void *data) |
50 | { | 59 | { |
51 | notifier->notify = fn; | 60 | notifier->notify = fn; |
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 097921329619..1065d7c65fa1 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c | |||
@@ -1,3 +1,8 @@ | |||
1 | /* | ||
2 | * CPU <-> hardware queue mapping helpers | ||
3 | * | ||
4 | * Copyright (C) 2013-2014 Jens Axboe | ||
5 | */ | ||
1 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
2 | #include <linux/threads.h> | 7 | #include <linux/threads.h> |
3 | #include <linux/module.h> | 8 | #include <linux/module.h> |
@@ -80,19 +85,35 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) | |||
80 | return 0; | 85 | return 0; |
81 | } | 86 | } |
82 | 87 | ||
83 | unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg) | 88 | unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) |
84 | { | 89 | { |
85 | unsigned int *map; | 90 | unsigned int *map; |
86 | 91 | ||
87 | /* If cpus are offline, map them to first hctx */ | 92 | /* If cpus are offline, map them to first hctx */ |
88 | map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, | 93 | map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, |
89 | reg->numa_node); | 94 | set->numa_node); |
90 | if (!map) | 95 | if (!map) |
91 | return NULL; | 96 | return NULL; |
92 | 97 | ||
93 | if (!blk_mq_update_queue_map(map, reg->nr_hw_queues)) | 98 | if (!blk_mq_update_queue_map(map, set->nr_hw_queues)) |
94 | return map; | 99 | return map; |
95 | 100 | ||
96 | kfree(map); | 101 | kfree(map); |
97 | return NULL; | 102 | return NULL; |
98 | } | 103 | } |
104 | |||
105 | /* | ||
106 | * We have no quick way of doing reverse lookups. This is only used at | ||
107 | * queue init time, so runtime isn't important. | ||
108 | */ | ||
109 | int blk_mq_hw_queue_to_node(unsigned int *mq_map, unsigned int index) | ||
110 | { | ||
111 | int i; | ||
112 | |||
113 | for_each_possible_cpu(i) { | ||
114 | if (index == mq_map[i]) | ||
115 | return cpu_to_node(i); | ||
116 | } | ||
117 | |||
118 | return NUMA_NO_NODE; | ||
119 | } | ||
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index b0ba264b0522..ed5217867555 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
@@ -203,59 +203,24 @@ static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx, | |||
203 | return ret; | 203 | return ret; |
204 | } | 204 | } |
205 | 205 | ||
206 | static ssize_t blk_mq_hw_sysfs_ipi_show(struct blk_mq_hw_ctx *hctx, char *page) | 206 | static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) |
207 | { | ||
208 | ssize_t ret; | ||
209 | |||
210 | spin_lock(&hctx->lock); | ||
211 | ret = sprintf(page, "%u\n", !!(hctx->flags & BLK_MQ_F_SHOULD_IPI)); | ||
212 | spin_unlock(&hctx->lock); | ||
213 | |||
214 | return ret; | ||
215 | } | ||
216 | |||
217 | static ssize_t blk_mq_hw_sysfs_ipi_store(struct blk_mq_hw_ctx *hctx, | ||
218 | const char *page, size_t len) | ||
219 | { | 207 | { |
220 | struct blk_mq_ctx *ctx; | 208 | return blk_mq_tag_sysfs_show(hctx->tags, page); |
221 | unsigned long ret; | ||
222 | unsigned int i; | ||
223 | |||
224 | if (kstrtoul(page, 10, &ret)) { | ||
225 | pr_err("blk-mq-sysfs: invalid input '%s'\n", page); | ||
226 | return -EINVAL; | ||
227 | } | ||
228 | |||
229 | spin_lock(&hctx->lock); | ||
230 | if (ret) | ||
231 | hctx->flags |= BLK_MQ_F_SHOULD_IPI; | ||
232 | else | ||
233 | hctx->flags &= ~BLK_MQ_F_SHOULD_IPI; | ||
234 | spin_unlock(&hctx->lock); | ||
235 | |||
236 | hctx_for_each_ctx(hctx, ctx, i) | ||
237 | ctx->ipi_redirect = !!ret; | ||
238 | |||
239 | return len; | ||
240 | } | 209 | } |
241 | 210 | ||
242 | static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) | 211 | static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page) |
243 | { | 212 | { |
244 | return blk_mq_tag_sysfs_show(hctx->tags, page); | 213 | return sprintf(page, "%u\n", atomic_read(&hctx->nr_active)); |
245 | } | 214 | } |
246 | 215 | ||
247 | static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) | 216 | static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) |
248 | { | 217 | { |
249 | unsigned int i, queue_num, first = 1; | 218 | unsigned int i, first = 1; |
250 | ssize_t ret = 0; | 219 | ssize_t ret = 0; |
251 | 220 | ||
252 | blk_mq_disable_hotplug(); | 221 | blk_mq_disable_hotplug(); |
253 | 222 | ||
254 | for_each_online_cpu(i) { | 223 | for_each_cpu(i, hctx->cpumask) { |
255 | queue_num = hctx->queue->mq_map[i]; | ||
256 | if (queue_num != hctx->queue_num) | ||
257 | continue; | ||
258 | |||
259 | if (first) | 224 | if (first) |
260 | ret += sprintf(ret + page, "%u", i); | 225 | ret += sprintf(ret + page, "%u", i); |
261 | else | 226 | else |
@@ -307,15 +272,14 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = { | |||
307 | .attr = {.name = "dispatched", .mode = S_IRUGO }, | 272 | .attr = {.name = "dispatched", .mode = S_IRUGO }, |
308 | .show = blk_mq_hw_sysfs_dispatched_show, | 273 | .show = blk_mq_hw_sysfs_dispatched_show, |
309 | }; | 274 | }; |
275 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = { | ||
276 | .attr = {.name = "active", .mode = S_IRUGO }, | ||
277 | .show = blk_mq_hw_sysfs_active_show, | ||
278 | }; | ||
310 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { | 279 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { |
311 | .attr = {.name = "pending", .mode = S_IRUGO }, | 280 | .attr = {.name = "pending", .mode = S_IRUGO }, |
312 | .show = blk_mq_hw_sysfs_rq_list_show, | 281 | .show = blk_mq_hw_sysfs_rq_list_show, |
313 | }; | 282 | }; |
314 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_ipi = { | ||
315 | .attr = {.name = "ipi_redirect", .mode = S_IRUGO | S_IWUSR}, | ||
316 | .show = blk_mq_hw_sysfs_ipi_show, | ||
317 | .store = blk_mq_hw_sysfs_ipi_store, | ||
318 | }; | ||
319 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { | 283 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { |
320 | .attr = {.name = "tags", .mode = S_IRUGO }, | 284 | .attr = {.name = "tags", .mode = S_IRUGO }, |
321 | .show = blk_mq_hw_sysfs_tags_show, | 285 | .show = blk_mq_hw_sysfs_tags_show, |
@@ -330,9 +294,9 @@ static struct attribute *default_hw_ctx_attrs[] = { | |||
330 | &blk_mq_hw_sysfs_run.attr, | 294 | &blk_mq_hw_sysfs_run.attr, |
331 | &blk_mq_hw_sysfs_dispatched.attr, | 295 | &blk_mq_hw_sysfs_dispatched.attr, |
332 | &blk_mq_hw_sysfs_pending.attr, | 296 | &blk_mq_hw_sysfs_pending.attr, |
333 | &blk_mq_hw_sysfs_ipi.attr, | ||
334 | &blk_mq_hw_sysfs_tags.attr, | 297 | &blk_mq_hw_sysfs_tags.attr, |
335 | &blk_mq_hw_sysfs_cpus.attr, | 298 | &blk_mq_hw_sysfs_cpus.attr, |
299 | &blk_mq_hw_sysfs_active.attr, | ||
336 | NULL, | 300 | NULL, |
337 | }; | 301 | }; |
338 | 302 | ||
@@ -363,6 +327,42 @@ static struct kobj_type blk_mq_hw_ktype = { | |||
363 | .release = blk_mq_sysfs_release, | 327 | .release = blk_mq_sysfs_release, |
364 | }; | 328 | }; |
365 | 329 | ||
330 | static void blk_mq_unregister_hctx(struct blk_mq_hw_ctx *hctx) | ||
331 | { | ||
332 | struct blk_mq_ctx *ctx; | ||
333 | int i; | ||
334 | |||
335 | if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP)) | ||
336 | return; | ||
337 | |||
338 | hctx_for_each_ctx(hctx, ctx, i) | ||
339 | kobject_del(&ctx->kobj); | ||
340 | |||
341 | kobject_del(&hctx->kobj); | ||
342 | } | ||
343 | |||
344 | static int blk_mq_register_hctx(struct blk_mq_hw_ctx *hctx) | ||
345 | { | ||
346 | struct request_queue *q = hctx->queue; | ||
347 | struct blk_mq_ctx *ctx; | ||
348 | int i, ret; | ||
349 | |||
350 | if (!hctx->nr_ctx || !(hctx->flags & BLK_MQ_F_SYSFS_UP)) | ||
351 | return 0; | ||
352 | |||
353 | ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", hctx->queue_num); | ||
354 | if (ret) | ||
355 | return ret; | ||
356 | |||
357 | hctx_for_each_ctx(hctx, ctx, i) { | ||
358 | ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu); | ||
359 | if (ret) | ||
360 | break; | ||
361 | } | ||
362 | |||
363 | return ret; | ||
364 | } | ||
365 | |||
366 | void blk_mq_unregister_disk(struct gendisk *disk) | 366 | void blk_mq_unregister_disk(struct gendisk *disk) |
367 | { | 367 | { |
368 | struct request_queue *q = disk->queue; | 368 | struct request_queue *q = disk->queue; |
@@ -371,11 +371,11 @@ void blk_mq_unregister_disk(struct gendisk *disk) | |||
371 | int i, j; | 371 | int i, j; |
372 | 372 | ||
373 | queue_for_each_hw_ctx(q, hctx, i) { | 373 | queue_for_each_hw_ctx(q, hctx, i) { |
374 | hctx_for_each_ctx(hctx, ctx, j) { | 374 | blk_mq_unregister_hctx(hctx); |
375 | kobject_del(&ctx->kobj); | 375 | |
376 | hctx_for_each_ctx(hctx, ctx, j) | ||
376 | kobject_put(&ctx->kobj); | 377 | kobject_put(&ctx->kobj); |
377 | } | 378 | |
378 | kobject_del(&hctx->kobj); | ||
379 | kobject_put(&hctx->kobj); | 379 | kobject_put(&hctx->kobj); |
380 | } | 380 | } |
381 | 381 | ||
@@ -386,15 +386,30 @@ void blk_mq_unregister_disk(struct gendisk *disk) | |||
386 | kobject_put(&disk_to_dev(disk)->kobj); | 386 | kobject_put(&disk_to_dev(disk)->kobj); |
387 | } | 387 | } |
388 | 388 | ||
389 | static void blk_mq_sysfs_init(struct request_queue *q) | ||
390 | { | ||
391 | struct blk_mq_hw_ctx *hctx; | ||
392 | struct blk_mq_ctx *ctx; | ||
393 | int i, j; | ||
394 | |||
395 | kobject_init(&q->mq_kobj, &blk_mq_ktype); | ||
396 | |||
397 | queue_for_each_hw_ctx(q, hctx, i) { | ||
398 | kobject_init(&hctx->kobj, &blk_mq_hw_ktype); | ||
399 | |||
400 | hctx_for_each_ctx(hctx, ctx, j) | ||
401 | kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); | ||
402 | } | ||
403 | } | ||
404 | |||
389 | int blk_mq_register_disk(struct gendisk *disk) | 405 | int blk_mq_register_disk(struct gendisk *disk) |
390 | { | 406 | { |
391 | struct device *dev = disk_to_dev(disk); | 407 | struct device *dev = disk_to_dev(disk); |
392 | struct request_queue *q = disk->queue; | 408 | struct request_queue *q = disk->queue; |
393 | struct blk_mq_hw_ctx *hctx; | 409 | struct blk_mq_hw_ctx *hctx; |
394 | struct blk_mq_ctx *ctx; | 410 | int ret, i; |
395 | int ret, i, j; | ||
396 | 411 | ||
397 | kobject_init(&q->mq_kobj, &blk_mq_ktype); | 412 | blk_mq_sysfs_init(q); |
398 | 413 | ||
399 | ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); | 414 | ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); |
400 | if (ret < 0) | 415 | if (ret < 0) |
@@ -403,20 +418,10 @@ int blk_mq_register_disk(struct gendisk *disk) | |||
403 | kobject_uevent(&q->mq_kobj, KOBJ_ADD); | 418 | kobject_uevent(&q->mq_kobj, KOBJ_ADD); |
404 | 419 | ||
405 | queue_for_each_hw_ctx(q, hctx, i) { | 420 | queue_for_each_hw_ctx(q, hctx, i) { |
406 | kobject_init(&hctx->kobj, &blk_mq_hw_ktype); | 421 | hctx->flags |= BLK_MQ_F_SYSFS_UP; |
407 | ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", i); | 422 | ret = blk_mq_register_hctx(hctx); |
408 | if (ret) | 423 | if (ret) |
409 | break; | 424 | break; |
410 | |||
411 | if (!hctx->nr_ctx) | ||
412 | continue; | ||
413 | |||
414 | hctx_for_each_ctx(hctx, ctx, j) { | ||
415 | kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); | ||
416 | ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu); | ||
417 | if (ret) | ||
418 | break; | ||
419 | } | ||
420 | } | 425 | } |
421 | 426 | ||
422 | if (ret) { | 427 | if (ret) { |
@@ -426,3 +431,26 @@ int blk_mq_register_disk(struct gendisk *disk) | |||
426 | 431 | ||
427 | return 0; | 432 | return 0; |
428 | } | 433 | } |
434 | |||
435 | void blk_mq_sysfs_unregister(struct request_queue *q) | ||
436 | { | ||
437 | struct blk_mq_hw_ctx *hctx; | ||
438 | int i; | ||
439 | |||
440 | queue_for_each_hw_ctx(q, hctx, i) | ||
441 | blk_mq_unregister_hctx(hctx); | ||
442 | } | ||
443 | |||
444 | int blk_mq_sysfs_register(struct request_queue *q) | ||
445 | { | ||
446 | struct blk_mq_hw_ctx *hctx; | ||
447 | int i, ret = 0; | ||
448 | |||
449 | queue_for_each_hw_ctx(q, hctx, i) { | ||
450 | ret = blk_mq_register_hctx(hctx); | ||
451 | if (ret) | ||
452 | break; | ||
453 | } | ||
454 | |||
455 | return ret; | ||
456 | } | ||
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 83ae96c51a27..d90c4aeb7dd3 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -1,78 +1,345 @@ | |||
1 | /* | ||
2 | * Fast and scalable bitmap tagging variant. Uses sparser bitmaps spread | ||
3 | * over multiple cachelines to avoid ping-pong between multiple submitters | ||
4 | * or submitter and completer. Uses rolling wakeups to avoid falling of | ||
5 | * the scaling cliff when we run out of tags and have to start putting | ||
6 | * submitters to sleep. | ||
7 | * | ||
8 | * Uses active queue tracking to support fairer distribution of tags | ||
9 | * between multiple submitters when a shared tag map is used. | ||
10 | * | ||
11 | * Copyright (C) 2013-2014 Jens Axboe | ||
12 | */ | ||
1 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 14 | #include <linux/module.h> |
3 | #include <linux/percpu_ida.h> | 15 | #include <linux/random.h> |
4 | 16 | ||
5 | #include <linux/blk-mq.h> | 17 | #include <linux/blk-mq.h> |
6 | #include "blk.h" | 18 | #include "blk.h" |
7 | #include "blk-mq.h" | 19 | #include "blk-mq.h" |
8 | #include "blk-mq-tag.h" | 20 | #include "blk-mq-tag.h" |
9 | 21 | ||
22 | static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt) | ||
23 | { | ||
24 | int i; | ||
25 | |||
26 | for (i = 0; i < bt->map_nr; i++) { | ||
27 | struct blk_align_bitmap *bm = &bt->map[i]; | ||
28 | int ret; | ||
29 | |||
30 | ret = find_first_zero_bit(&bm->word, bm->depth); | ||
31 | if (ret < bm->depth) | ||
32 | return true; | ||
33 | } | ||
34 | |||
35 | return false; | ||
36 | } | ||
37 | |||
38 | bool blk_mq_has_free_tags(struct blk_mq_tags *tags) | ||
39 | { | ||
40 | if (!tags) | ||
41 | return true; | ||
42 | |||
43 | return bt_has_free_tags(&tags->bitmap_tags); | ||
44 | } | ||
45 | |||
46 | static inline void bt_index_inc(unsigned int *index) | ||
47 | { | ||
48 | *index = (*index + 1) & (BT_WAIT_QUEUES - 1); | ||
49 | } | ||
50 | |||
10 | /* | 51 | /* |
11 | * Per tagged queue (tag address space) map | 52 | * If a previously inactive queue goes active, bump the active user count. |
12 | */ | 53 | */ |
13 | struct blk_mq_tags { | 54 | bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) |
14 | unsigned int nr_tags; | 55 | { |
15 | unsigned int nr_reserved_tags; | 56 | if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && |
16 | unsigned int nr_batch_move; | 57 | !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) |
17 | unsigned int nr_max_cache; | 58 | atomic_inc(&hctx->tags->active_queues); |
18 | 59 | ||
19 | struct percpu_ida free_tags; | 60 | return true; |
20 | struct percpu_ida reserved_tags; | 61 | } |
21 | }; | ||
22 | 62 | ||
23 | void blk_mq_wait_for_tags(struct blk_mq_tags *tags) | 63 | /* |
64 | * Wakeup all potentially sleeping on normal (non-reserved) tags | ||
65 | */ | ||
66 | static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) | ||
24 | { | 67 | { |
25 | int tag = blk_mq_get_tag(tags, __GFP_WAIT, false); | 68 | struct blk_mq_bitmap_tags *bt; |
26 | blk_mq_put_tag(tags, tag); | 69 | int i, wake_index; |
70 | |||
71 | bt = &tags->bitmap_tags; | ||
72 | wake_index = bt->wake_index; | ||
73 | for (i = 0; i < BT_WAIT_QUEUES; i++) { | ||
74 | struct bt_wait_state *bs = &bt->bs[wake_index]; | ||
75 | |||
76 | if (waitqueue_active(&bs->wait)) | ||
77 | wake_up(&bs->wait); | ||
78 | |||
79 | bt_index_inc(&wake_index); | ||
80 | } | ||
27 | } | 81 | } |
28 | 82 | ||
29 | bool blk_mq_has_free_tags(struct blk_mq_tags *tags) | 83 | /* |
84 | * If a previously busy queue goes inactive, potential waiters could now | ||
85 | * be allowed to queue. Wake them up and check. | ||
86 | */ | ||
87 | void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) | ||
88 | { | ||
89 | struct blk_mq_tags *tags = hctx->tags; | ||
90 | |||
91 | if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) | ||
92 | return; | ||
93 | |||
94 | atomic_dec(&tags->active_queues); | ||
95 | |||
96 | blk_mq_tag_wakeup_all(tags); | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * For shared tag users, we track the number of currently active users | ||
101 | * and attempt to provide a fair share of the tag depth for each of them. | ||
102 | */ | ||
103 | static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, | ||
104 | struct blk_mq_bitmap_tags *bt) | ||
105 | { | ||
106 | unsigned int depth, users; | ||
107 | |||
108 | if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
109 | return true; | ||
110 | if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) | ||
111 | return true; | ||
112 | |||
113 | /* | ||
114 | * Don't try dividing an ant | ||
115 | */ | ||
116 | if (bt->depth == 1) | ||
117 | return true; | ||
118 | |||
119 | users = atomic_read(&hctx->tags->active_queues); | ||
120 | if (!users) | ||
121 | return true; | ||
122 | |||
123 | /* | ||
124 | * Allow at least some tags | ||
125 | */ | ||
126 | depth = max((bt->depth + users - 1) / users, 4U); | ||
127 | return atomic_read(&hctx->nr_active) < depth; | ||
128 | } | ||
129 | |||
130 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) | ||
30 | { | 131 | { |
31 | return !tags || | 132 | int tag, org_last_tag, end; |
32 | percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids) != 0; | 133 | |
134 | org_last_tag = last_tag; | ||
135 | end = bm->depth; | ||
136 | do { | ||
137 | restart: | ||
138 | tag = find_next_zero_bit(&bm->word, end, last_tag); | ||
139 | if (unlikely(tag >= end)) { | ||
140 | /* | ||
141 | * We started with an offset, start from 0 to | ||
142 | * exhaust the map. | ||
143 | */ | ||
144 | if (org_last_tag && last_tag) { | ||
145 | end = last_tag; | ||
146 | last_tag = 0; | ||
147 | goto restart; | ||
148 | } | ||
149 | return -1; | ||
150 | } | ||
151 | last_tag = tag + 1; | ||
152 | } while (test_and_set_bit_lock(tag, &bm->word)); | ||
153 | |||
154 | return tag; | ||
33 | } | 155 | } |
34 | 156 | ||
35 | static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp) | 157 | /* |
158 | * Straight forward bitmap tag implementation, where each bit is a tag | ||
159 | * (cleared == free, and set == busy). The small twist is using per-cpu | ||
160 | * last_tag caches, which blk-mq stores in the blk_mq_ctx software queue | ||
161 | * contexts. This enables us to drastically limit the space searched, | ||
162 | * without dirtying an extra shared cacheline like we would if we stored | ||
163 | * the cache value inside the shared blk_mq_bitmap_tags structure. On top | ||
164 | * of that, each word of tags is in a separate cacheline. This means that | ||
165 | * multiple users will tend to stick to different cachelines, at least | ||
166 | * until the map is exhausted. | ||
167 | */ | ||
168 | static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, | ||
169 | unsigned int *tag_cache) | ||
36 | { | 170 | { |
171 | unsigned int last_tag, org_last_tag; | ||
172 | int index, i, tag; | ||
173 | |||
174 | if (!hctx_may_queue(hctx, bt)) | ||
175 | return -1; | ||
176 | |||
177 | last_tag = org_last_tag = *tag_cache; | ||
178 | index = TAG_TO_INDEX(bt, last_tag); | ||
179 | |||
180 | for (i = 0; i < bt->map_nr; i++) { | ||
181 | tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag)); | ||
182 | if (tag != -1) { | ||
183 | tag += (index << bt->bits_per_word); | ||
184 | goto done; | ||
185 | } | ||
186 | |||
187 | last_tag = 0; | ||
188 | if (++index >= bt->map_nr) | ||
189 | index = 0; | ||
190 | } | ||
191 | |||
192 | *tag_cache = 0; | ||
193 | return -1; | ||
194 | |||
195 | /* | ||
196 | * Only update the cache from the allocation path, if we ended | ||
197 | * up using the specific cached tag. | ||
198 | */ | ||
199 | done: | ||
200 | if (tag == org_last_tag) { | ||
201 | last_tag = tag + 1; | ||
202 | if (last_tag >= bt->depth - 1) | ||
203 | last_tag = 0; | ||
204 | |||
205 | *tag_cache = last_tag; | ||
206 | } | ||
207 | |||
208 | return tag; | ||
209 | } | ||
210 | |||
211 | static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, | ||
212 | struct blk_mq_hw_ctx *hctx) | ||
213 | { | ||
214 | struct bt_wait_state *bs; | ||
215 | |||
216 | if (!hctx) | ||
217 | return &bt->bs[0]; | ||
218 | |||
219 | bs = &bt->bs[hctx->wait_index]; | ||
220 | bt_index_inc(&hctx->wait_index); | ||
221 | return bs; | ||
222 | } | ||
223 | |||
224 | static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx, | ||
225 | unsigned int *last_tag, gfp_t gfp) | ||
226 | { | ||
227 | struct bt_wait_state *bs; | ||
228 | DEFINE_WAIT(wait); | ||
37 | int tag; | 229 | int tag; |
38 | 230 | ||
39 | tag = percpu_ida_alloc(&tags->free_tags, (gfp & __GFP_WAIT) ? | 231 | tag = __bt_get(hctx, bt, last_tag); |
40 | TASK_UNINTERRUPTIBLE : TASK_RUNNING); | 232 | if (tag != -1) |
41 | if (tag < 0) | 233 | return tag; |
42 | return BLK_MQ_TAG_FAIL; | 234 | |
43 | return tag + tags->nr_reserved_tags; | 235 | if (!(gfp & __GFP_WAIT)) |
236 | return -1; | ||
237 | |||
238 | bs = bt_wait_ptr(bt, hctx); | ||
239 | do { | ||
240 | bool was_empty; | ||
241 | |||
242 | was_empty = list_empty(&wait.task_list); | ||
243 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
244 | |||
245 | tag = __bt_get(hctx, bt, last_tag); | ||
246 | if (tag != -1) | ||
247 | break; | ||
248 | |||
249 | if (was_empty) | ||
250 | atomic_set(&bs->wait_cnt, bt->wake_cnt); | ||
251 | |||
252 | io_schedule(); | ||
253 | } while (1); | ||
254 | |||
255 | finish_wait(&bs->wait, &wait); | ||
256 | return tag; | ||
257 | } | ||
258 | |||
259 | static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, | ||
260 | struct blk_mq_hw_ctx *hctx, | ||
261 | unsigned int *last_tag, gfp_t gfp) | ||
262 | { | ||
263 | int tag; | ||
264 | |||
265 | tag = bt_get(&tags->bitmap_tags, hctx, last_tag, gfp); | ||
266 | if (tag >= 0) | ||
267 | return tag + tags->nr_reserved_tags; | ||
268 | |||
269 | return BLK_MQ_TAG_FAIL; | ||
44 | } | 270 | } |
45 | 271 | ||
46 | static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags, | 272 | static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags, |
47 | gfp_t gfp) | 273 | gfp_t gfp) |
48 | { | 274 | { |
49 | int tag; | 275 | int tag, zero = 0; |
50 | 276 | ||
51 | if (unlikely(!tags->nr_reserved_tags)) { | 277 | if (unlikely(!tags->nr_reserved_tags)) { |
52 | WARN_ON_ONCE(1); | 278 | WARN_ON_ONCE(1); |
53 | return BLK_MQ_TAG_FAIL; | 279 | return BLK_MQ_TAG_FAIL; |
54 | } | 280 | } |
55 | 281 | ||
56 | tag = percpu_ida_alloc(&tags->reserved_tags, (gfp & __GFP_WAIT) ? | 282 | tag = bt_get(&tags->breserved_tags, NULL, &zero, gfp); |
57 | TASK_UNINTERRUPTIBLE : TASK_RUNNING); | ||
58 | if (tag < 0) | 283 | if (tag < 0) |
59 | return BLK_MQ_TAG_FAIL; | 284 | return BLK_MQ_TAG_FAIL; |
285 | |||
60 | return tag; | 286 | return tag; |
61 | } | 287 | } |
62 | 288 | ||
63 | unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved) | 289 | unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, |
290 | gfp_t gfp, bool reserved) | ||
64 | { | 291 | { |
65 | if (!reserved) | 292 | if (!reserved) |
66 | return __blk_mq_get_tag(tags, gfp); | 293 | return __blk_mq_get_tag(hctx->tags, hctx, last_tag, gfp); |
67 | 294 | ||
68 | return __blk_mq_get_reserved_tag(tags, gfp); | 295 | return __blk_mq_get_reserved_tag(hctx->tags, gfp); |
296 | } | ||
297 | |||
298 | static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) | ||
299 | { | ||
300 | int i, wake_index; | ||
301 | |||
302 | wake_index = bt->wake_index; | ||
303 | for (i = 0; i < BT_WAIT_QUEUES; i++) { | ||
304 | struct bt_wait_state *bs = &bt->bs[wake_index]; | ||
305 | |||
306 | if (waitqueue_active(&bs->wait)) { | ||
307 | if (wake_index != bt->wake_index) | ||
308 | bt->wake_index = wake_index; | ||
309 | |||
310 | return bs; | ||
311 | } | ||
312 | |||
313 | bt_index_inc(&wake_index); | ||
314 | } | ||
315 | |||
316 | return NULL; | ||
317 | } | ||
318 | |||
319 | static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) | ||
320 | { | ||
321 | const int index = TAG_TO_INDEX(bt, tag); | ||
322 | struct bt_wait_state *bs; | ||
323 | |||
324 | /* | ||
325 | * The unlock memory barrier need to order access to req in free | ||
326 | * path and clearing tag bit | ||
327 | */ | ||
328 | clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); | ||
329 | |||
330 | bs = bt_wake_ptr(bt); | ||
331 | if (bs && atomic_dec_and_test(&bs->wait_cnt)) { | ||
332 | atomic_set(&bs->wait_cnt, bt->wake_cnt); | ||
333 | bt_index_inc(&bt->wake_index); | ||
334 | wake_up(&bs->wait); | ||
335 | } | ||
69 | } | 336 | } |
70 | 337 | ||
71 | static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag) | 338 | static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag) |
72 | { | 339 | { |
73 | BUG_ON(tag >= tags->nr_tags); | 340 | BUG_ON(tag >= tags->nr_tags); |
74 | 341 | ||
75 | percpu_ida_free(&tags->free_tags, tag - tags->nr_reserved_tags); | 342 | bt_clear_tag(&tags->bitmap_tags, tag); |
76 | } | 343 | } |
77 | 344 | ||
78 | static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags, | 345 | static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags, |
@@ -80,22 +347,43 @@ static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags, | |||
80 | { | 347 | { |
81 | BUG_ON(tag >= tags->nr_reserved_tags); | 348 | BUG_ON(tag >= tags->nr_reserved_tags); |
82 | 349 | ||
83 | percpu_ida_free(&tags->reserved_tags, tag); | 350 | bt_clear_tag(&tags->breserved_tags, tag); |
84 | } | 351 | } |
85 | 352 | ||
86 | void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag) | 353 | void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, |
354 | unsigned int *last_tag) | ||
87 | { | 355 | { |
88 | if (tag >= tags->nr_reserved_tags) | 356 | struct blk_mq_tags *tags = hctx->tags; |
89 | __blk_mq_put_tag(tags, tag); | 357 | |
90 | else | 358 | if (tag >= tags->nr_reserved_tags) { |
359 | const int real_tag = tag - tags->nr_reserved_tags; | ||
360 | |||
361 | __blk_mq_put_tag(tags, real_tag); | ||
362 | *last_tag = real_tag; | ||
363 | } else | ||
91 | __blk_mq_put_reserved_tag(tags, tag); | 364 | __blk_mq_put_reserved_tag(tags, tag); |
92 | } | 365 | } |
93 | 366 | ||
94 | static int __blk_mq_tag_iter(unsigned id, void *data) | 367 | static void bt_for_each_free(struct blk_mq_bitmap_tags *bt, |
368 | unsigned long *free_map, unsigned int off) | ||
95 | { | 369 | { |
96 | unsigned long *tag_map = data; | 370 | int i; |
97 | __set_bit(id, tag_map); | 371 | |
98 | return 0; | 372 | for (i = 0; i < bt->map_nr; i++) { |
373 | struct blk_align_bitmap *bm = &bt->map[i]; | ||
374 | int bit = 0; | ||
375 | |||
376 | do { | ||
377 | bit = find_next_zero_bit(&bm->word, bm->depth, bit); | ||
378 | if (bit >= bm->depth) | ||
379 | break; | ||
380 | |||
381 | __set_bit(bit + off, free_map); | ||
382 | bit++; | ||
383 | } while (1); | ||
384 | |||
385 | off += (1 << bt->bits_per_word); | ||
386 | } | ||
99 | } | 387 | } |
100 | 388 | ||
101 | void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, | 389 | void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, |
@@ -109,21 +397,128 @@ void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, | |||
109 | if (!tag_map) | 397 | if (!tag_map) |
110 | return; | 398 | return; |
111 | 399 | ||
112 | percpu_ida_for_each_free(&tags->free_tags, __blk_mq_tag_iter, tag_map); | 400 | bt_for_each_free(&tags->bitmap_tags, tag_map, tags->nr_reserved_tags); |
113 | if (tags->nr_reserved_tags) | 401 | if (tags->nr_reserved_tags) |
114 | percpu_ida_for_each_free(&tags->reserved_tags, __blk_mq_tag_iter, | 402 | bt_for_each_free(&tags->breserved_tags, tag_map, 0); |
115 | tag_map); | ||
116 | 403 | ||
117 | fn(data, tag_map); | 404 | fn(data, tag_map); |
118 | kfree(tag_map); | 405 | kfree(tag_map); |
119 | } | 406 | } |
407 | EXPORT_SYMBOL(blk_mq_tag_busy_iter); | ||
408 | |||
409 | static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt) | ||
410 | { | ||
411 | unsigned int i, used; | ||
412 | |||
413 | for (i = 0, used = 0; i < bt->map_nr; i++) { | ||
414 | struct blk_align_bitmap *bm = &bt->map[i]; | ||
415 | |||
416 | used += bitmap_weight(&bm->word, bm->depth); | ||
417 | } | ||
418 | |||
419 | return bt->depth - used; | ||
420 | } | ||
421 | |||
422 | static void bt_update_count(struct blk_mq_bitmap_tags *bt, | ||
423 | unsigned int depth) | ||
424 | { | ||
425 | unsigned int tags_per_word = 1U << bt->bits_per_word; | ||
426 | unsigned int map_depth = depth; | ||
427 | |||
428 | if (depth) { | ||
429 | int i; | ||
430 | |||
431 | for (i = 0; i < bt->map_nr; i++) { | ||
432 | bt->map[i].depth = min(map_depth, tags_per_word); | ||
433 | map_depth -= bt->map[i].depth; | ||
434 | } | ||
435 | } | ||
436 | |||
437 | bt->wake_cnt = BT_WAIT_BATCH; | ||
438 | if (bt->wake_cnt > depth / 4) | ||
439 | bt->wake_cnt = max(1U, depth / 4); | ||
440 | |||
441 | bt->depth = depth; | ||
442 | } | ||
443 | |||
444 | static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, | ||
445 | int node, bool reserved) | ||
446 | { | ||
447 | int i; | ||
448 | |||
449 | bt->bits_per_word = ilog2(BITS_PER_LONG); | ||
450 | |||
451 | /* | ||
452 | * Depth can be zero for reserved tags, that's not a failure | ||
453 | * condition. | ||
454 | */ | ||
455 | if (depth) { | ||
456 | unsigned int nr, tags_per_word; | ||
457 | |||
458 | tags_per_word = (1 << bt->bits_per_word); | ||
459 | |||
460 | /* | ||
461 | * If the tag space is small, shrink the number of tags | ||
462 | * per word so we spread over a few cachelines, at least. | ||
463 | * If less than 4 tags, just forget about it, it's not | ||
464 | * going to work optimally anyway. | ||
465 | */ | ||
466 | if (depth >= 4) { | ||
467 | while (tags_per_word * 4 > depth) { | ||
468 | bt->bits_per_word--; | ||
469 | tags_per_word = (1 << bt->bits_per_word); | ||
470 | } | ||
471 | } | ||
472 | |||
473 | nr = ALIGN(depth, tags_per_word) / tags_per_word; | ||
474 | bt->map = kzalloc_node(nr * sizeof(struct blk_align_bitmap), | ||
475 | GFP_KERNEL, node); | ||
476 | if (!bt->map) | ||
477 | return -ENOMEM; | ||
478 | |||
479 | bt->map_nr = nr; | ||
480 | } | ||
481 | |||
482 | bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); | ||
483 | if (!bt->bs) { | ||
484 | kfree(bt->map); | ||
485 | return -ENOMEM; | ||
486 | } | ||
487 | |||
488 | for (i = 0; i < BT_WAIT_QUEUES; i++) | ||
489 | init_waitqueue_head(&bt->bs[i].wait); | ||
490 | |||
491 | bt_update_count(bt, depth); | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | static void bt_free(struct blk_mq_bitmap_tags *bt) | ||
496 | { | ||
497 | kfree(bt->map); | ||
498 | kfree(bt->bs); | ||
499 | } | ||
500 | |||
501 | static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, | ||
502 | int node) | ||
503 | { | ||
504 | unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; | ||
505 | |||
506 | if (bt_alloc(&tags->bitmap_tags, depth, node, false)) | ||
507 | goto enomem; | ||
508 | if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true)) | ||
509 | goto enomem; | ||
510 | |||
511 | return tags; | ||
512 | enomem: | ||
513 | bt_free(&tags->bitmap_tags); | ||
514 | kfree(tags); | ||
515 | return NULL; | ||
516 | } | ||
120 | 517 | ||
121 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | 518 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, |
122 | unsigned int reserved_tags, int node) | 519 | unsigned int reserved_tags, int node) |
123 | { | 520 | { |
124 | unsigned int nr_tags, nr_cache; | ||
125 | struct blk_mq_tags *tags; | 521 | struct blk_mq_tags *tags; |
126 | int ret; | ||
127 | 522 | ||
128 | if (total_tags > BLK_MQ_TAG_MAX) { | 523 | if (total_tags > BLK_MQ_TAG_MAX) { |
129 | pr_err("blk-mq: tag depth too large\n"); | 524 | pr_err("blk-mq: tag depth too large\n"); |
@@ -134,73 +529,59 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | |||
134 | if (!tags) | 529 | if (!tags) |
135 | return NULL; | 530 | return NULL; |
136 | 531 | ||
137 | nr_tags = total_tags - reserved_tags; | ||
138 | nr_cache = nr_tags / num_possible_cpus(); | ||
139 | |||
140 | if (nr_cache < BLK_MQ_TAG_CACHE_MIN) | ||
141 | nr_cache = BLK_MQ_TAG_CACHE_MIN; | ||
142 | else if (nr_cache > BLK_MQ_TAG_CACHE_MAX) | ||
143 | nr_cache = BLK_MQ_TAG_CACHE_MAX; | ||
144 | |||
145 | tags->nr_tags = total_tags; | 532 | tags->nr_tags = total_tags; |
146 | tags->nr_reserved_tags = reserved_tags; | 533 | tags->nr_reserved_tags = reserved_tags; |
147 | tags->nr_max_cache = nr_cache; | ||
148 | tags->nr_batch_move = max(1u, nr_cache / 2); | ||
149 | 534 | ||
150 | ret = __percpu_ida_init(&tags->free_tags, tags->nr_tags - | 535 | return blk_mq_init_bitmap_tags(tags, node); |
151 | tags->nr_reserved_tags, | 536 | } |
152 | tags->nr_max_cache, | ||
153 | tags->nr_batch_move); | ||
154 | if (ret) | ||
155 | goto err_free_tags; | ||
156 | 537 | ||
157 | if (reserved_tags) { | 538 | void blk_mq_free_tags(struct blk_mq_tags *tags) |
158 | /* | 539 | { |
159 | * With max_cahe and batch set to 1, the allocator fallbacks to | 540 | bt_free(&tags->bitmap_tags); |
160 | * no cached. It's fine reserved tags allocation is slow. | 541 | bt_free(&tags->breserved_tags); |
161 | */ | 542 | kfree(tags); |
162 | ret = __percpu_ida_init(&tags->reserved_tags, reserved_tags, | 543 | } |
163 | 1, 1); | ||
164 | if (ret) | ||
165 | goto err_reserved_tags; | ||
166 | } | ||
167 | 544 | ||
168 | return tags; | 545 | void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag) |
546 | { | ||
547 | unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; | ||
169 | 548 | ||
170 | err_reserved_tags: | 549 | *tag = prandom_u32() % depth; |
171 | percpu_ida_destroy(&tags->free_tags); | ||
172 | err_free_tags: | ||
173 | kfree(tags); | ||
174 | return NULL; | ||
175 | } | 550 | } |
176 | 551 | ||
177 | void blk_mq_free_tags(struct blk_mq_tags *tags) | 552 | int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) |
178 | { | 553 | { |
179 | percpu_ida_destroy(&tags->free_tags); | 554 | tdepth -= tags->nr_reserved_tags; |
180 | percpu_ida_destroy(&tags->reserved_tags); | 555 | if (tdepth > tags->nr_tags) |
181 | kfree(tags); | 556 | return -EINVAL; |
557 | |||
558 | /* | ||
559 | * Don't need (or can't) update reserved tags here, they remain | ||
560 | * static and should never need resizing. | ||
561 | */ | ||
562 | bt_update_count(&tags->bitmap_tags, tdepth); | ||
563 | blk_mq_tag_wakeup_all(tags); | ||
564 | return 0; | ||
182 | } | 565 | } |
183 | 566 | ||
184 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) | 567 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) |
185 | { | 568 | { |
186 | char *orig_page = page; | 569 | char *orig_page = page; |
187 | unsigned int cpu; | 570 | unsigned int free, res; |
188 | 571 | ||
189 | if (!tags) | 572 | if (!tags) |
190 | return 0; | 573 | return 0; |
191 | 574 | ||
192 | page += sprintf(page, "nr_tags=%u, reserved_tags=%u, batch_move=%u," | 575 | page += sprintf(page, "nr_tags=%u, reserved_tags=%u, " |
193 | " max_cache=%u\n", tags->nr_tags, tags->nr_reserved_tags, | 576 | "bits_per_word=%u\n", |
194 | tags->nr_batch_move, tags->nr_max_cache); | 577 | tags->nr_tags, tags->nr_reserved_tags, |
578 | tags->bitmap_tags.bits_per_word); | ||
195 | 579 | ||
196 | page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", | 580 | free = bt_unused_tags(&tags->bitmap_tags); |
197 | percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids), | 581 | res = bt_unused_tags(&tags->breserved_tags); |
198 | percpu_ida_free_tags(&tags->reserved_tags, nr_cpu_ids)); | ||
199 | 582 | ||
200 | for_each_possible_cpu(cpu) { | 583 | page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res); |
201 | page += sprintf(page, " cpu%02u: nr_free=%u\n", cpu, | 584 | page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues)); |
202 | percpu_ida_free_tags(&tags->free_tags, cpu)); | ||
203 | } | ||
204 | 585 | ||
205 | return page - orig_page; | 586 | return page - orig_page; |
206 | } | 587 | } |
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 947ba2c6148e..c959de58d2a5 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h | |||
@@ -1,17 +1,59 @@ | |||
1 | #ifndef INT_BLK_MQ_TAG_H | 1 | #ifndef INT_BLK_MQ_TAG_H |
2 | #define INT_BLK_MQ_TAG_H | 2 | #define INT_BLK_MQ_TAG_H |
3 | 3 | ||
4 | struct blk_mq_tags; | 4 | #include "blk-mq.h" |
5 | |||
6 | enum { | ||
7 | BT_WAIT_QUEUES = 8, | ||
8 | BT_WAIT_BATCH = 8, | ||
9 | }; | ||
10 | |||
11 | struct bt_wait_state { | ||
12 | atomic_t wait_cnt; | ||
13 | wait_queue_head_t wait; | ||
14 | } ____cacheline_aligned_in_smp; | ||
15 | |||
16 | #define TAG_TO_INDEX(bt, tag) ((tag) >> (bt)->bits_per_word) | ||
17 | #define TAG_TO_BIT(bt, tag) ((tag) & ((1 << (bt)->bits_per_word) - 1)) | ||
18 | |||
19 | struct blk_mq_bitmap_tags { | ||
20 | unsigned int depth; | ||
21 | unsigned int wake_cnt; | ||
22 | unsigned int bits_per_word; | ||
23 | |||
24 | unsigned int map_nr; | ||
25 | struct blk_align_bitmap *map; | ||
26 | |||
27 | unsigned int wake_index; | ||
28 | struct bt_wait_state *bs; | ||
29 | }; | ||
30 | |||
31 | /* | ||
32 | * Tag address space map. | ||
33 | */ | ||
34 | struct blk_mq_tags { | ||
35 | unsigned int nr_tags; | ||
36 | unsigned int nr_reserved_tags; | ||
37 | |||
38 | atomic_t active_queues; | ||
39 | |||
40 | struct blk_mq_bitmap_tags bitmap_tags; | ||
41 | struct blk_mq_bitmap_tags breserved_tags; | ||
42 | |||
43 | struct request **rqs; | ||
44 | struct list_head page_list; | ||
45 | }; | ||
46 | |||
5 | 47 | ||
6 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); | 48 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); |
7 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); | 49 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); |
8 | 50 | ||
9 | extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved); | 51 | extern unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved); |
10 | extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags); | 52 | extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, unsigned int *last_tag); |
11 | extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag); | ||
12 | extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); | ||
13 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); | 53 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); |
14 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); | 54 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); |
55 | extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); | ||
56 | extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); | ||
15 | 57 | ||
16 | enum { | 58 | enum { |
17 | BLK_MQ_TAG_CACHE_MIN = 1, | 59 | BLK_MQ_TAG_CACHE_MIN = 1, |
@@ -24,4 +66,23 @@ enum { | |||
24 | BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1, | 66 | BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1, |
25 | }; | 67 | }; |
26 | 68 | ||
69 | extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *); | ||
70 | extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); | ||
71 | |||
72 | static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) | ||
73 | { | ||
74 | if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
75 | return false; | ||
76 | |||
77 | return __blk_mq_tag_busy(hctx); | ||
78 | } | ||
79 | |||
80 | static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) | ||
81 | { | ||
82 | if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
83 | return; | ||
84 | |||
85 | __blk_mq_tag_idle(hctx); | ||
86 | } | ||
87 | |||
27 | #endif | 88 | #endif |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 1d2a9bdbee57..0f5879c42dcd 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1,3 +1,9 @@ | |||
1 | /* | ||
2 | * Block multiqueue core code | ||
3 | * | ||
4 | * Copyright (C) 2013-2014 Jens Axboe | ||
5 | * Copyright (C) 2013-2014 Christoph Hellwig | ||
6 | */ | ||
1 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 8 | #include <linux/module.h> |
3 | #include <linux/backing-dev.h> | 9 | #include <linux/backing-dev.h> |
@@ -56,38 +62,40 @@ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) | |||
56 | { | 62 | { |
57 | unsigned int i; | 63 | unsigned int i; |
58 | 64 | ||
59 | for (i = 0; i < hctx->nr_ctx_map; i++) | 65 | for (i = 0; i < hctx->ctx_map.map_size; i++) |
60 | if (hctx->ctx_map[i]) | 66 | if (hctx->ctx_map.map[i].word) |
61 | return true; | 67 | return true; |
62 | 68 | ||
63 | return false; | 69 | return false; |
64 | } | 70 | } |
65 | 71 | ||
72 | static inline struct blk_align_bitmap *get_bm(struct blk_mq_hw_ctx *hctx, | ||
73 | struct blk_mq_ctx *ctx) | ||
74 | { | ||
75 | return &hctx->ctx_map.map[ctx->index_hw / hctx->ctx_map.bits_per_word]; | ||
76 | } | ||
77 | |||
78 | #define CTX_TO_BIT(hctx, ctx) \ | ||
79 | ((ctx)->index_hw & ((hctx)->ctx_map.bits_per_word - 1)) | ||
80 | |||
66 | /* | 81 | /* |
67 | * Mark this ctx as having pending work in this hardware queue | 82 | * Mark this ctx as having pending work in this hardware queue |
68 | */ | 83 | */ |
69 | static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, | 84 | static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, |
70 | struct blk_mq_ctx *ctx) | 85 | struct blk_mq_ctx *ctx) |
71 | { | 86 | { |
72 | if (!test_bit(ctx->index_hw, hctx->ctx_map)) | 87 | struct blk_align_bitmap *bm = get_bm(hctx, ctx); |
73 | set_bit(ctx->index_hw, hctx->ctx_map); | 88 | |
89 | if (!test_bit(CTX_TO_BIT(hctx, ctx), &bm->word)) | ||
90 | set_bit(CTX_TO_BIT(hctx, ctx), &bm->word); | ||
74 | } | 91 | } |
75 | 92 | ||
76 | static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, | 93 | static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, |
77 | gfp_t gfp, bool reserved) | 94 | struct blk_mq_ctx *ctx) |
78 | { | 95 | { |
79 | struct request *rq; | 96 | struct blk_align_bitmap *bm = get_bm(hctx, ctx); |
80 | unsigned int tag; | ||
81 | 97 | ||
82 | tag = blk_mq_get_tag(hctx->tags, gfp, reserved); | 98 | clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); |
83 | if (tag != BLK_MQ_TAG_FAIL) { | ||
84 | rq = hctx->rqs[tag]; | ||
85 | rq->tag = tag; | ||
86 | |||
87 | return rq; | ||
88 | } | ||
89 | |||
90 | return NULL; | ||
91 | } | 99 | } |
92 | 100 | ||
93 | static int blk_mq_queue_enter(struct request_queue *q) | 101 | static int blk_mq_queue_enter(struct request_queue *q) |
@@ -186,78 +194,95 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | |||
186 | if (blk_queue_io_stat(q)) | 194 | if (blk_queue_io_stat(q)) |
187 | rw_flags |= REQ_IO_STAT; | 195 | rw_flags |= REQ_IO_STAT; |
188 | 196 | ||
197 | INIT_LIST_HEAD(&rq->queuelist); | ||
198 | /* csd/requeue_work/fifo_time is initialized before use */ | ||
199 | rq->q = q; | ||
189 | rq->mq_ctx = ctx; | 200 | rq->mq_ctx = ctx; |
190 | rq->cmd_flags = rw_flags; | 201 | rq->cmd_flags |= rw_flags; |
191 | rq->start_time = jiffies; | 202 | /* do not touch atomic flags, it needs atomic ops against the timer */ |
203 | rq->cpu = -1; | ||
204 | INIT_HLIST_NODE(&rq->hash); | ||
205 | RB_CLEAR_NODE(&rq->rb_node); | ||
206 | rq->rq_disk = NULL; | ||
207 | rq->part = NULL; | ||
208 | #ifdef CONFIG_BLK_CGROUP | ||
209 | rq->rl = NULL; | ||
192 | set_start_time_ns(rq); | 210 | set_start_time_ns(rq); |
211 | rq->io_start_time_ns = 0; | ||
212 | #endif | ||
213 | rq->nr_phys_segments = 0; | ||
214 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | ||
215 | rq->nr_integrity_segments = 0; | ||
216 | #endif | ||
217 | rq->special = NULL; | ||
218 | /* tag was already set */ | ||
219 | rq->errors = 0; | ||
220 | |||
221 | rq->extra_len = 0; | ||
222 | rq->sense_len = 0; | ||
223 | rq->resid_len = 0; | ||
224 | rq->sense = NULL; | ||
225 | |||
226 | INIT_LIST_HEAD(&rq->timeout_list); | ||
227 | rq->end_io = NULL; | ||
228 | rq->end_io_data = NULL; | ||
229 | rq->next_rq = NULL; | ||
230 | |||
193 | ctx->rq_dispatched[rw_is_sync(rw_flags)]++; | 231 | ctx->rq_dispatched[rw_is_sync(rw_flags)]++; |
194 | } | 232 | } |
195 | 233 | ||
196 | static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, | 234 | static struct request * |
197 | int rw, gfp_t gfp, | 235 | __blk_mq_alloc_request(struct request_queue *q, struct blk_mq_hw_ctx *hctx, |
198 | bool reserved) | 236 | struct blk_mq_ctx *ctx, int rw, gfp_t gfp, bool reserved) |
199 | { | 237 | { |
200 | struct request *rq; | 238 | struct request *rq; |
239 | unsigned int tag; | ||
201 | 240 | ||
202 | do { | 241 | tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved); |
203 | struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); | 242 | if (tag != BLK_MQ_TAG_FAIL) { |
204 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); | 243 | rq = hctx->tags->rqs[tag]; |
205 | 244 | ||
206 | rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved); | 245 | rq->cmd_flags = 0; |
207 | if (rq) { | 246 | if (blk_mq_tag_busy(hctx)) { |
208 | blk_mq_rq_ctx_init(q, ctx, rq, rw); | 247 | rq->cmd_flags = REQ_MQ_INFLIGHT; |
209 | break; | 248 | atomic_inc(&hctx->nr_active); |
210 | } | 249 | } |
211 | 250 | ||
212 | blk_mq_put_ctx(ctx); | 251 | rq->tag = tag; |
213 | if (!(gfp & __GFP_WAIT)) | 252 | blk_mq_rq_ctx_init(q, ctx, rq, rw); |
214 | break; | 253 | return rq; |
215 | 254 | } | |
216 | __blk_mq_run_hw_queue(hctx); | ||
217 | blk_mq_wait_for_tags(hctx->tags); | ||
218 | } while (1); | ||
219 | 255 | ||
220 | return rq; | 256 | return NULL; |
221 | } | 257 | } |
222 | 258 | ||
223 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp) | 259 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, |
260 | bool reserved) | ||
224 | { | 261 | { |
262 | struct blk_mq_ctx *ctx; | ||
263 | struct blk_mq_hw_ctx *hctx; | ||
225 | struct request *rq; | 264 | struct request *rq; |
226 | 265 | ||
227 | if (blk_mq_queue_enter(q)) | 266 | if (blk_mq_queue_enter(q)) |
228 | return NULL; | 267 | return NULL; |
229 | 268 | ||
230 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, false); | 269 | ctx = blk_mq_get_ctx(q); |
231 | if (rq) | 270 | hctx = q->mq_ops->map_queue(q, ctx->cpu); |
232 | blk_mq_put_ctx(rq->mq_ctx); | ||
233 | return rq; | ||
234 | } | ||
235 | |||
236 | struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, | ||
237 | gfp_t gfp) | ||
238 | { | ||
239 | struct request *rq; | ||
240 | 271 | ||
241 | if (blk_mq_queue_enter(q)) | 272 | rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp & ~__GFP_WAIT, |
242 | return NULL; | 273 | reserved); |
274 | if (!rq && (gfp & __GFP_WAIT)) { | ||
275 | __blk_mq_run_hw_queue(hctx); | ||
276 | blk_mq_put_ctx(ctx); | ||
243 | 277 | ||
244 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, true); | 278 | ctx = blk_mq_get_ctx(q); |
245 | if (rq) | 279 | hctx = q->mq_ops->map_queue(q, ctx->cpu); |
246 | blk_mq_put_ctx(rq->mq_ctx); | 280 | rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp, reserved); |
281 | } | ||
282 | blk_mq_put_ctx(ctx); | ||
247 | return rq; | 283 | return rq; |
248 | } | 284 | } |
249 | EXPORT_SYMBOL(blk_mq_alloc_reserved_request); | 285 | EXPORT_SYMBOL(blk_mq_alloc_request); |
250 | |||
251 | /* | ||
252 | * Re-init and set pdu, if we have it | ||
253 | */ | ||
254 | void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) | ||
255 | { | ||
256 | blk_rq_init(hctx->queue, rq); | ||
257 | |||
258 | if (hctx->cmd_size) | ||
259 | rq->special = blk_mq_rq_to_pdu(rq); | ||
260 | } | ||
261 | 286 | ||
262 | static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | 287 | static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, |
263 | struct blk_mq_ctx *ctx, struct request *rq) | 288 | struct blk_mq_ctx *ctx, struct request *rq) |
@@ -265,9 +290,11 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | |||
265 | const int tag = rq->tag; | 290 | const int tag = rq->tag; |
266 | struct request_queue *q = rq->q; | 291 | struct request_queue *q = rq->q; |
267 | 292 | ||
268 | blk_mq_rq_init(hctx, rq); | 293 | if (rq->cmd_flags & REQ_MQ_INFLIGHT) |
269 | blk_mq_put_tag(hctx->tags, tag); | 294 | atomic_dec(&hctx->nr_active); |
270 | 295 | ||
296 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | ||
297 | blk_mq_put_tag(hctx, tag, &ctx->last_tag); | ||
271 | blk_mq_queue_exit(q); | 298 | blk_mq_queue_exit(q); |
272 | } | 299 | } |
273 | 300 | ||
@@ -283,20 +310,47 @@ void blk_mq_free_request(struct request *rq) | |||
283 | __blk_mq_free_request(hctx, ctx, rq); | 310 | __blk_mq_free_request(hctx, ctx, rq); |
284 | } | 311 | } |
285 | 312 | ||
286 | bool blk_mq_end_io_partial(struct request *rq, int error, unsigned int nr_bytes) | 313 | /* |
314 | * Clone all relevant state from a request that has been put on hold in | ||
315 | * the flush state machine into the preallocated flush request that hangs | ||
316 | * off the request queue. | ||
317 | * | ||
318 | * For a driver the flush request should be invisible, that's why we are | ||
319 | * impersonating the original request here. | ||
320 | */ | ||
321 | void blk_mq_clone_flush_request(struct request *flush_rq, | ||
322 | struct request *orig_rq) | ||
287 | { | 323 | { |
288 | if (blk_update_request(rq, error, blk_rq_bytes(rq))) | 324 | struct blk_mq_hw_ctx *hctx = |
289 | return true; | 325 | orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu); |
326 | |||
327 | flush_rq->mq_ctx = orig_rq->mq_ctx; | ||
328 | flush_rq->tag = orig_rq->tag; | ||
329 | memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq), | ||
330 | hctx->cmd_size); | ||
331 | } | ||
290 | 332 | ||
333 | inline void __blk_mq_end_io(struct request *rq, int error) | ||
334 | { | ||
291 | blk_account_io_done(rq); | 335 | blk_account_io_done(rq); |
292 | 336 | ||
293 | if (rq->end_io) | 337 | if (rq->end_io) { |
294 | rq->end_io(rq, error); | 338 | rq->end_io(rq, error); |
295 | else | 339 | } else { |
340 | if (unlikely(blk_bidi_rq(rq))) | ||
341 | blk_mq_free_request(rq->next_rq); | ||
296 | blk_mq_free_request(rq); | 342 | blk_mq_free_request(rq); |
297 | return false; | 343 | } |
344 | } | ||
345 | EXPORT_SYMBOL(__blk_mq_end_io); | ||
346 | |||
347 | void blk_mq_end_io(struct request *rq, int error) | ||
348 | { | ||
349 | if (blk_update_request(rq, error, blk_rq_bytes(rq))) | ||
350 | BUG(); | ||
351 | __blk_mq_end_io(rq, error); | ||
298 | } | 352 | } |
299 | EXPORT_SYMBOL(blk_mq_end_io_partial); | 353 | EXPORT_SYMBOL(blk_mq_end_io); |
300 | 354 | ||
301 | static void __blk_mq_complete_request_remote(void *data) | 355 | static void __blk_mq_complete_request_remote(void *data) |
302 | { | 356 | { |
@@ -305,18 +359,22 @@ static void __blk_mq_complete_request_remote(void *data) | |||
305 | rq->q->softirq_done_fn(rq); | 359 | rq->q->softirq_done_fn(rq); |
306 | } | 360 | } |
307 | 361 | ||
308 | void __blk_mq_complete_request(struct request *rq) | 362 | static void blk_mq_ipi_complete_request(struct request *rq) |
309 | { | 363 | { |
310 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 364 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
365 | bool shared = false; | ||
311 | int cpu; | 366 | int cpu; |
312 | 367 | ||
313 | if (!ctx->ipi_redirect) { | 368 | if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { |
314 | rq->q->softirq_done_fn(rq); | 369 | rq->q->softirq_done_fn(rq); |
315 | return; | 370 | return; |
316 | } | 371 | } |
317 | 372 | ||
318 | cpu = get_cpu(); | 373 | cpu = get_cpu(); |
319 | if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { | 374 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) |
375 | shared = cpus_share_cache(cpu, ctx->cpu); | ||
376 | |||
377 | if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { | ||
320 | rq->csd.func = __blk_mq_complete_request_remote; | 378 | rq->csd.func = __blk_mq_complete_request_remote; |
321 | rq->csd.info = rq; | 379 | rq->csd.info = rq; |
322 | rq->csd.flags = 0; | 380 | rq->csd.flags = 0; |
@@ -327,6 +385,16 @@ void __blk_mq_complete_request(struct request *rq) | |||
327 | put_cpu(); | 385 | put_cpu(); |
328 | } | 386 | } |
329 | 387 | ||
388 | void __blk_mq_complete_request(struct request *rq) | ||
389 | { | ||
390 | struct request_queue *q = rq->q; | ||
391 | |||
392 | if (!q->softirq_done_fn) | ||
393 | blk_mq_end_io(rq, rq->errors); | ||
394 | else | ||
395 | blk_mq_ipi_complete_request(rq); | ||
396 | } | ||
397 | |||
330 | /** | 398 | /** |
331 | * blk_mq_complete_request - end I/O on a request | 399 | * blk_mq_complete_request - end I/O on a request |
332 | * @rq: the request being processed | 400 | * @rq: the request being processed |
@@ -337,7 +405,9 @@ void __blk_mq_complete_request(struct request *rq) | |||
337 | **/ | 405 | **/ |
338 | void blk_mq_complete_request(struct request *rq) | 406 | void blk_mq_complete_request(struct request *rq) |
339 | { | 407 | { |
340 | if (unlikely(blk_should_fake_timeout(rq->q))) | 408 | struct request_queue *q = rq->q; |
409 | |||
410 | if (unlikely(blk_should_fake_timeout(q))) | ||
341 | return; | 411 | return; |
342 | if (!blk_mark_rq_complete(rq)) | 412 | if (!blk_mark_rq_complete(rq)) |
343 | __blk_mq_complete_request(rq); | 413 | __blk_mq_complete_request(rq); |
@@ -350,13 +420,31 @@ static void blk_mq_start_request(struct request *rq, bool last) | |||
350 | 420 | ||
351 | trace_block_rq_issue(q, rq); | 421 | trace_block_rq_issue(q, rq); |
352 | 422 | ||
423 | rq->resid_len = blk_rq_bytes(rq); | ||
424 | if (unlikely(blk_bidi_rq(rq))) | ||
425 | rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq); | ||
426 | |||
353 | /* | 427 | /* |
354 | * Just mark start time and set the started bit. Due to memory | 428 | * Just mark start time and set the started bit. Due to memory |
355 | * ordering, we know we'll see the correct deadline as long as | 429 | * ordering, we know we'll see the correct deadline as long as |
356 | * REQ_ATOMIC_STARTED is seen. | 430 | * REQ_ATOMIC_STARTED is seen. Use the default queue timeout, |
431 | * unless one has been set in the request. | ||
432 | */ | ||
433 | if (!rq->timeout) | ||
434 | rq->deadline = jiffies + q->rq_timeout; | ||
435 | else | ||
436 | rq->deadline = jiffies + rq->timeout; | ||
437 | |||
438 | /* | ||
439 | * Mark us as started and clear complete. Complete might have been | ||
440 | * set if requeue raced with timeout, which then marked it as | ||
441 | * complete. So be sure to clear complete again when we start | ||
442 | * the request, otherwise we'll ignore the completion event. | ||
357 | */ | 443 | */ |
358 | rq->deadline = jiffies + q->rq_timeout; | 444 | if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) |
359 | set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | 445 | set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); |
446 | if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags)) | ||
447 | clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); | ||
360 | 448 | ||
361 | if (q->dma_drain_size && blk_rq_bytes(rq)) { | 449 | if (q->dma_drain_size && blk_rq_bytes(rq)) { |
362 | /* | 450 | /* |
@@ -378,7 +466,7 @@ static void blk_mq_start_request(struct request *rq, bool last) | |||
378 | rq->cmd_flags |= REQ_END; | 466 | rq->cmd_flags |= REQ_END; |
379 | } | 467 | } |
380 | 468 | ||
381 | static void blk_mq_requeue_request(struct request *rq) | 469 | static void __blk_mq_requeue_request(struct request *rq) |
382 | { | 470 | { |
383 | struct request_queue *q = rq->q; | 471 | struct request_queue *q = rq->q; |
384 | 472 | ||
@@ -391,6 +479,86 @@ static void blk_mq_requeue_request(struct request *rq) | |||
391 | rq->nr_phys_segments--; | 479 | rq->nr_phys_segments--; |
392 | } | 480 | } |
393 | 481 | ||
482 | void blk_mq_requeue_request(struct request *rq) | ||
483 | { | ||
484 | __blk_mq_requeue_request(rq); | ||
485 | blk_clear_rq_complete(rq); | ||
486 | |||
487 | BUG_ON(blk_queued_rq(rq)); | ||
488 | blk_mq_add_to_requeue_list(rq, true); | ||
489 | } | ||
490 | EXPORT_SYMBOL(blk_mq_requeue_request); | ||
491 | |||
492 | static void blk_mq_requeue_work(struct work_struct *work) | ||
493 | { | ||
494 | struct request_queue *q = | ||
495 | container_of(work, struct request_queue, requeue_work); | ||
496 | LIST_HEAD(rq_list); | ||
497 | struct request *rq, *next; | ||
498 | unsigned long flags; | ||
499 | |||
500 | spin_lock_irqsave(&q->requeue_lock, flags); | ||
501 | list_splice_init(&q->requeue_list, &rq_list); | ||
502 | spin_unlock_irqrestore(&q->requeue_lock, flags); | ||
503 | |||
504 | list_for_each_entry_safe(rq, next, &rq_list, queuelist) { | ||
505 | if (!(rq->cmd_flags & REQ_SOFTBARRIER)) | ||
506 | continue; | ||
507 | |||
508 | rq->cmd_flags &= ~REQ_SOFTBARRIER; | ||
509 | list_del_init(&rq->queuelist); | ||
510 | blk_mq_insert_request(rq, true, false, false); | ||
511 | } | ||
512 | |||
513 | while (!list_empty(&rq_list)) { | ||
514 | rq = list_entry(rq_list.next, struct request, queuelist); | ||
515 | list_del_init(&rq->queuelist); | ||
516 | blk_mq_insert_request(rq, false, false, false); | ||
517 | } | ||
518 | |||
519 | blk_mq_run_queues(q, false); | ||
520 | } | ||
521 | |||
522 | void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) | ||
523 | { | ||
524 | struct request_queue *q = rq->q; | ||
525 | unsigned long flags; | ||
526 | |||
527 | /* | ||
528 | * We abuse this flag that is otherwise used by the I/O scheduler to | ||
529 | * request head insertation from the workqueue. | ||
530 | */ | ||
531 | BUG_ON(rq->cmd_flags & REQ_SOFTBARRIER); | ||
532 | |||
533 | spin_lock_irqsave(&q->requeue_lock, flags); | ||
534 | if (at_head) { | ||
535 | rq->cmd_flags |= REQ_SOFTBARRIER; | ||
536 | list_add(&rq->queuelist, &q->requeue_list); | ||
537 | } else { | ||
538 | list_add_tail(&rq->queuelist, &q->requeue_list); | ||
539 | } | ||
540 | spin_unlock_irqrestore(&q->requeue_lock, flags); | ||
541 | } | ||
542 | EXPORT_SYMBOL(blk_mq_add_to_requeue_list); | ||
543 | |||
544 | void blk_mq_kick_requeue_list(struct request_queue *q) | ||
545 | { | ||
546 | kblockd_schedule_work(&q->requeue_work); | ||
547 | } | ||
548 | EXPORT_SYMBOL(blk_mq_kick_requeue_list); | ||
549 | |||
550 | struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag) | ||
551 | { | ||
552 | struct request_queue *q = hctx->queue; | ||
553 | |||
554 | if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) && | ||
555 | q->flush_rq->tag == tag) | ||
556 | return q->flush_rq; | ||
557 | |||
558 | return hctx->tags->rqs[tag]; | ||
559 | } | ||
560 | EXPORT_SYMBOL(blk_mq_tag_to_rq); | ||
561 | |||
394 | struct blk_mq_timeout_data { | 562 | struct blk_mq_timeout_data { |
395 | struct blk_mq_hw_ctx *hctx; | 563 | struct blk_mq_hw_ctx *hctx; |
396 | unsigned long *next; | 564 | unsigned long *next; |
@@ -412,12 +580,13 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags) | |||
412 | do { | 580 | do { |
413 | struct request *rq; | 581 | struct request *rq; |
414 | 582 | ||
415 | tag = find_next_zero_bit(free_tags, hctx->queue_depth, tag); | 583 | tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag); |
416 | if (tag >= hctx->queue_depth) | 584 | if (tag >= hctx->tags->nr_tags) |
417 | break; | 585 | break; |
418 | 586 | ||
419 | rq = hctx->rqs[tag++]; | 587 | rq = blk_mq_tag_to_rq(hctx, tag++); |
420 | 588 | if (rq->q != hctx->queue) | |
589 | continue; | ||
421 | if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) | 590 | if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) |
422 | continue; | 591 | continue; |
423 | 592 | ||
@@ -442,6 +611,28 @@ static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx, | |||
442 | blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data); | 611 | blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data); |
443 | } | 612 | } |
444 | 613 | ||
614 | static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq) | ||
615 | { | ||
616 | struct request_queue *q = rq->q; | ||
617 | |||
618 | /* | ||
619 | * We know that complete is set at this point. If STARTED isn't set | ||
620 | * anymore, then the request isn't active and the "timeout" should | ||
621 | * just be ignored. This can happen due to the bitflag ordering. | ||
622 | * Timeout first checks if STARTED is set, and if it is, assumes | ||
623 | * the request is active. But if we race with completion, then | ||
624 | * we both flags will get cleared. So check here again, and ignore | ||
625 | * a timeout event with a request that isn't active. | ||
626 | */ | ||
627 | if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) | ||
628 | return BLK_EH_NOT_HANDLED; | ||
629 | |||
630 | if (!q->mq_ops->timeout) | ||
631 | return BLK_EH_RESET_TIMER; | ||
632 | |||
633 | return q->mq_ops->timeout(rq); | ||
634 | } | ||
635 | |||
445 | static void blk_mq_rq_timer(unsigned long data) | 636 | static void blk_mq_rq_timer(unsigned long data) |
446 | { | 637 | { |
447 | struct request_queue *q = (struct request_queue *) data; | 638 | struct request_queue *q = (struct request_queue *) data; |
@@ -449,11 +640,24 @@ static void blk_mq_rq_timer(unsigned long data) | |||
449 | unsigned long next = 0; | 640 | unsigned long next = 0; |
450 | int i, next_set = 0; | 641 | int i, next_set = 0; |
451 | 642 | ||
452 | queue_for_each_hw_ctx(q, hctx, i) | 643 | queue_for_each_hw_ctx(q, hctx, i) { |
644 | /* | ||
645 | * If not software queues are currently mapped to this | ||
646 | * hardware queue, there's nothing to check | ||
647 | */ | ||
648 | if (!hctx->nr_ctx || !hctx->tags) | ||
649 | continue; | ||
650 | |||
453 | blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); | 651 | blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); |
652 | } | ||
454 | 653 | ||
455 | if (next_set) | 654 | if (next_set) { |
456 | mod_timer(&q->timeout, round_jiffies_up(next)); | 655 | next = blk_rq_timeout(round_jiffies_up(next)); |
656 | mod_timer(&q->timeout, next); | ||
657 | } else { | ||
658 | queue_for_each_hw_ctx(q, hctx, i) | ||
659 | blk_mq_tag_idle(hctx); | ||
660 | } | ||
457 | } | 661 | } |
458 | 662 | ||
459 | /* | 663 | /* |
@@ -495,9 +699,38 @@ static bool blk_mq_attempt_merge(struct request_queue *q, | |||
495 | return false; | 699 | return false; |
496 | } | 700 | } |
497 | 701 | ||
498 | void blk_mq_add_timer(struct request *rq) | 702 | /* |
703 | * Process software queues that have been marked busy, splicing them | ||
704 | * to the for-dispatch | ||
705 | */ | ||
706 | static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) | ||
499 | { | 707 | { |
500 | __blk_add_timer(rq, NULL); | 708 | struct blk_mq_ctx *ctx; |
709 | int i; | ||
710 | |||
711 | for (i = 0; i < hctx->ctx_map.map_size; i++) { | ||
712 | struct blk_align_bitmap *bm = &hctx->ctx_map.map[i]; | ||
713 | unsigned int off, bit; | ||
714 | |||
715 | if (!bm->word) | ||
716 | continue; | ||
717 | |||
718 | bit = 0; | ||
719 | off = i * hctx->ctx_map.bits_per_word; | ||
720 | do { | ||
721 | bit = find_next_bit(&bm->word, bm->depth, bit); | ||
722 | if (bit >= bm->depth) | ||
723 | break; | ||
724 | |||
725 | ctx = hctx->ctxs[bit + off]; | ||
726 | clear_bit(bit, &bm->word); | ||
727 | spin_lock(&ctx->lock); | ||
728 | list_splice_tail_init(&ctx->rq_list, list); | ||
729 | spin_unlock(&ctx->lock); | ||
730 | |||
731 | bit++; | ||
732 | } while (1); | ||
733 | } | ||
501 | } | 734 | } |
502 | 735 | ||
503 | /* | 736 | /* |
@@ -509,10 +742,11 @@ void blk_mq_add_timer(struct request *rq) | |||
509 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | 742 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) |
510 | { | 743 | { |
511 | struct request_queue *q = hctx->queue; | 744 | struct request_queue *q = hctx->queue; |
512 | struct blk_mq_ctx *ctx; | ||
513 | struct request *rq; | 745 | struct request *rq; |
514 | LIST_HEAD(rq_list); | 746 | LIST_HEAD(rq_list); |
515 | int bit, queued; | 747 | int queued; |
748 | |||
749 | WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)); | ||
516 | 750 | ||
517 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) | 751 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) |
518 | return; | 752 | return; |
@@ -522,15 +756,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
522 | /* | 756 | /* |
523 | * Touch any software queue that has pending entries. | 757 | * Touch any software queue that has pending entries. |
524 | */ | 758 | */ |
525 | for_each_set_bit(bit, hctx->ctx_map, hctx->nr_ctx) { | 759 | flush_busy_ctxs(hctx, &rq_list); |
526 | clear_bit(bit, hctx->ctx_map); | ||
527 | ctx = hctx->ctxs[bit]; | ||
528 | BUG_ON(bit != ctx->index_hw); | ||
529 | |||
530 | spin_lock(&ctx->lock); | ||
531 | list_splice_tail_init(&ctx->rq_list, &rq_list); | ||
532 | spin_unlock(&ctx->lock); | ||
533 | } | ||
534 | 760 | ||
535 | /* | 761 | /* |
536 | * If we have previous entries on our dispatch list, grab them | 762 | * If we have previous entries on our dispatch list, grab them |
@@ -544,13 +770,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
544 | } | 770 | } |
545 | 771 | ||
546 | /* | 772 | /* |
547 | * Delete and return all entries from our dispatch list | ||
548 | */ | ||
549 | queued = 0; | ||
550 | |||
551 | /* | ||
552 | * Now process all the entries, sending them to the driver. | 773 | * Now process all the entries, sending them to the driver. |
553 | */ | 774 | */ |
775 | queued = 0; | ||
554 | while (!list_empty(&rq_list)) { | 776 | while (!list_empty(&rq_list)) { |
555 | int ret; | 777 | int ret; |
556 | 778 | ||
@@ -565,13 +787,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
565 | queued++; | 787 | queued++; |
566 | continue; | 788 | continue; |
567 | case BLK_MQ_RQ_QUEUE_BUSY: | 789 | case BLK_MQ_RQ_QUEUE_BUSY: |
568 | /* | ||
569 | * FIXME: we should have a mechanism to stop the queue | ||
570 | * like blk_stop_queue, otherwise we will waste cpu | ||
571 | * time | ||
572 | */ | ||
573 | list_add(&rq->queuelist, &rq_list); | 790 | list_add(&rq->queuelist, &rq_list); |
574 | blk_mq_requeue_request(rq); | 791 | __blk_mq_requeue_request(rq); |
575 | break; | 792 | break; |
576 | default: | 793 | default: |
577 | pr_err("blk-mq: bad return on queue: %d\n", ret); | 794 | pr_err("blk-mq: bad return on queue: %d\n", ret); |
@@ -601,17 +818,44 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
601 | } | 818 | } |
602 | } | 819 | } |
603 | 820 | ||
821 | /* | ||
822 | * It'd be great if the workqueue API had a way to pass | ||
823 | * in a mask and had some smarts for more clever placement. | ||
824 | * For now we just round-robin here, switching for every | ||
825 | * BLK_MQ_CPU_WORK_BATCH queued items. | ||
826 | */ | ||
827 | static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) | ||
828 | { | ||
829 | int cpu = hctx->next_cpu; | ||
830 | |||
831 | if (--hctx->next_cpu_batch <= 0) { | ||
832 | int next_cpu; | ||
833 | |||
834 | next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask); | ||
835 | if (next_cpu >= nr_cpu_ids) | ||
836 | next_cpu = cpumask_first(hctx->cpumask); | ||
837 | |||
838 | hctx->next_cpu = next_cpu; | ||
839 | hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; | ||
840 | } | ||
841 | |||
842 | return cpu; | ||
843 | } | ||
844 | |||
604 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | 845 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) |
605 | { | 846 | { |
606 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) | 847 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) |
607 | return; | 848 | return; |
608 | 849 | ||
609 | if (!async) | 850 | if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask)) |
610 | __blk_mq_run_hw_queue(hctx); | 851 | __blk_mq_run_hw_queue(hctx); |
852 | else if (hctx->queue->nr_hw_queues == 1) | ||
853 | kblockd_schedule_delayed_work(&hctx->run_work, 0); | ||
611 | else { | 854 | else { |
612 | struct request_queue *q = hctx->queue; | 855 | unsigned int cpu; |
613 | 856 | ||
614 | kblockd_schedule_delayed_work(q, &hctx->delayed_work, 0); | 857 | cpu = blk_mq_hctx_next_cpu(hctx); |
858 | kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0); | ||
615 | } | 859 | } |
616 | } | 860 | } |
617 | 861 | ||
@@ -626,14 +870,17 @@ void blk_mq_run_queues(struct request_queue *q, bool async) | |||
626 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) | 870 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) |
627 | continue; | 871 | continue; |
628 | 872 | ||
873 | preempt_disable(); | ||
629 | blk_mq_run_hw_queue(hctx, async); | 874 | blk_mq_run_hw_queue(hctx, async); |
875 | preempt_enable(); | ||
630 | } | 876 | } |
631 | } | 877 | } |
632 | EXPORT_SYMBOL(blk_mq_run_queues); | 878 | EXPORT_SYMBOL(blk_mq_run_queues); |
633 | 879 | ||
634 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) | 880 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) |
635 | { | 881 | { |
636 | cancel_delayed_work(&hctx->delayed_work); | 882 | cancel_delayed_work(&hctx->run_work); |
883 | cancel_delayed_work(&hctx->delay_work); | ||
637 | set_bit(BLK_MQ_S_STOPPED, &hctx->state); | 884 | set_bit(BLK_MQ_S_STOPPED, &hctx->state); |
638 | } | 885 | } |
639 | EXPORT_SYMBOL(blk_mq_stop_hw_queue); | 886 | EXPORT_SYMBOL(blk_mq_stop_hw_queue); |
@@ -651,11 +898,25 @@ EXPORT_SYMBOL(blk_mq_stop_hw_queues); | |||
651 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) | 898 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) |
652 | { | 899 | { |
653 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); | 900 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); |
901 | |||
902 | preempt_disable(); | ||
654 | __blk_mq_run_hw_queue(hctx); | 903 | __blk_mq_run_hw_queue(hctx); |
904 | preempt_enable(); | ||
655 | } | 905 | } |
656 | EXPORT_SYMBOL(blk_mq_start_hw_queue); | 906 | EXPORT_SYMBOL(blk_mq_start_hw_queue); |
657 | 907 | ||
658 | void blk_mq_start_stopped_hw_queues(struct request_queue *q) | 908 | void blk_mq_start_hw_queues(struct request_queue *q) |
909 | { | ||
910 | struct blk_mq_hw_ctx *hctx; | ||
911 | int i; | ||
912 | |||
913 | queue_for_each_hw_ctx(q, hctx, i) | ||
914 | blk_mq_start_hw_queue(hctx); | ||
915 | } | ||
916 | EXPORT_SYMBOL(blk_mq_start_hw_queues); | ||
917 | |||
918 | |||
919 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) | ||
659 | { | 920 | { |
660 | struct blk_mq_hw_ctx *hctx; | 921 | struct blk_mq_hw_ctx *hctx; |
661 | int i; | 922 | int i; |
@@ -665,19 +926,47 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q) | |||
665 | continue; | 926 | continue; |
666 | 927 | ||
667 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); | 928 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); |
668 | blk_mq_run_hw_queue(hctx, true); | 929 | preempt_disable(); |
930 | blk_mq_run_hw_queue(hctx, async); | ||
931 | preempt_enable(); | ||
669 | } | 932 | } |
670 | } | 933 | } |
671 | EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); | 934 | EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); |
672 | 935 | ||
673 | static void blk_mq_work_fn(struct work_struct *work) | 936 | static void blk_mq_run_work_fn(struct work_struct *work) |
674 | { | 937 | { |
675 | struct blk_mq_hw_ctx *hctx; | 938 | struct blk_mq_hw_ctx *hctx; |
676 | 939 | ||
677 | hctx = container_of(work, struct blk_mq_hw_ctx, delayed_work.work); | 940 | hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work); |
941 | |||
678 | __blk_mq_run_hw_queue(hctx); | 942 | __blk_mq_run_hw_queue(hctx); |
679 | } | 943 | } |
680 | 944 | ||
945 | static void blk_mq_delay_work_fn(struct work_struct *work) | ||
946 | { | ||
947 | struct blk_mq_hw_ctx *hctx; | ||
948 | |||
949 | hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work); | ||
950 | |||
951 | if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state)) | ||
952 | __blk_mq_run_hw_queue(hctx); | ||
953 | } | ||
954 | |||
955 | void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) | ||
956 | { | ||
957 | unsigned long tmo = msecs_to_jiffies(msecs); | ||
958 | |||
959 | if (hctx->queue->nr_hw_queues == 1) | ||
960 | kblockd_schedule_delayed_work(&hctx->delay_work, tmo); | ||
961 | else { | ||
962 | unsigned int cpu; | ||
963 | |||
964 | cpu = blk_mq_hctx_next_cpu(hctx); | ||
965 | kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo); | ||
966 | } | ||
967 | } | ||
968 | EXPORT_SYMBOL(blk_mq_delay_queue); | ||
969 | |||
681 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | 970 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, |
682 | struct request *rq, bool at_head) | 971 | struct request *rq, bool at_head) |
683 | { | 972 | { |
@@ -689,12 +978,13 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | |||
689 | list_add(&rq->queuelist, &ctx->rq_list); | 978 | list_add(&rq->queuelist, &ctx->rq_list); |
690 | else | 979 | else |
691 | list_add_tail(&rq->queuelist, &ctx->rq_list); | 980 | list_add_tail(&rq->queuelist, &ctx->rq_list); |
981 | |||
692 | blk_mq_hctx_mark_pending(hctx, ctx); | 982 | blk_mq_hctx_mark_pending(hctx, ctx); |
693 | 983 | ||
694 | /* | 984 | /* |
695 | * We do this early, to ensure we are on the right CPU. | 985 | * We do this early, to ensure we are on the right CPU. |
696 | */ | 986 | */ |
697 | blk_mq_add_timer(rq); | 987 | blk_add_timer(rq); |
698 | } | 988 | } |
699 | 989 | ||
700 | void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, | 990 | void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, |
@@ -719,10 +1009,10 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, | |||
719 | spin_unlock(&ctx->lock); | 1009 | spin_unlock(&ctx->lock); |
720 | } | 1010 | } |
721 | 1011 | ||
722 | blk_mq_put_ctx(current_ctx); | ||
723 | |||
724 | if (run_queue) | 1012 | if (run_queue) |
725 | blk_mq_run_hw_queue(hctx, async); | 1013 | blk_mq_run_hw_queue(hctx, async); |
1014 | |||
1015 | blk_mq_put_ctx(current_ctx); | ||
726 | } | 1016 | } |
727 | 1017 | ||
728 | static void blk_mq_insert_requests(struct request_queue *q, | 1018 | static void blk_mq_insert_requests(struct request_queue *q, |
@@ -758,9 +1048,8 @@ static void blk_mq_insert_requests(struct request_queue *q, | |||
758 | } | 1048 | } |
759 | spin_unlock(&ctx->lock); | 1049 | spin_unlock(&ctx->lock); |
760 | 1050 | ||
761 | blk_mq_put_ctx(current_ctx); | ||
762 | |||
763 | blk_mq_run_hw_queue(hctx, from_schedule); | 1051 | blk_mq_run_hw_queue(hctx, from_schedule); |
1052 | blk_mq_put_ctx(current_ctx); | ||
764 | } | 1053 | } |
765 | 1054 | ||
766 | static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) | 1055 | static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) |
@@ -823,24 +1112,169 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |||
823 | static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) | 1112 | static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) |
824 | { | 1113 | { |
825 | init_request_from_bio(rq, bio); | 1114 | init_request_from_bio(rq, bio); |
826 | blk_account_io_start(rq, 1); | 1115 | |
1116 | if (blk_do_io_stat(rq)) { | ||
1117 | rq->start_time = jiffies; | ||
1118 | blk_account_io_start(rq, 1); | ||
1119 | } | ||
827 | } | 1120 | } |
828 | 1121 | ||
829 | static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | 1122 | static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, |
1123 | struct blk_mq_ctx *ctx, | ||
1124 | struct request *rq, struct bio *bio) | ||
1125 | { | ||
1126 | struct request_queue *q = hctx->queue; | ||
1127 | |||
1128 | if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) { | ||
1129 | blk_mq_bio_to_request(rq, bio); | ||
1130 | spin_lock(&ctx->lock); | ||
1131 | insert_rq: | ||
1132 | __blk_mq_insert_request(hctx, rq, false); | ||
1133 | spin_unlock(&ctx->lock); | ||
1134 | return false; | ||
1135 | } else { | ||
1136 | spin_lock(&ctx->lock); | ||
1137 | if (!blk_mq_attempt_merge(q, ctx, bio)) { | ||
1138 | blk_mq_bio_to_request(rq, bio); | ||
1139 | goto insert_rq; | ||
1140 | } | ||
1141 | |||
1142 | spin_unlock(&ctx->lock); | ||
1143 | __blk_mq_free_request(hctx, ctx, rq); | ||
1144 | return true; | ||
1145 | } | ||
1146 | } | ||
1147 | |||
1148 | struct blk_map_ctx { | ||
1149 | struct blk_mq_hw_ctx *hctx; | ||
1150 | struct blk_mq_ctx *ctx; | ||
1151 | }; | ||
1152 | |||
1153 | static struct request *blk_mq_map_request(struct request_queue *q, | ||
1154 | struct bio *bio, | ||
1155 | struct blk_map_ctx *data) | ||
830 | { | 1156 | { |
831 | struct blk_mq_hw_ctx *hctx; | 1157 | struct blk_mq_hw_ctx *hctx; |
832 | struct blk_mq_ctx *ctx; | 1158 | struct blk_mq_ctx *ctx; |
1159 | struct request *rq; | ||
1160 | int rw = bio_data_dir(bio); | ||
1161 | |||
1162 | if (unlikely(blk_mq_queue_enter(q))) { | ||
1163 | bio_endio(bio, -EIO); | ||
1164 | return NULL; | ||
1165 | } | ||
1166 | |||
1167 | ctx = blk_mq_get_ctx(q); | ||
1168 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
1169 | |||
1170 | if (rw_is_sync(bio->bi_rw)) | ||
1171 | rw |= REQ_SYNC; | ||
1172 | |||
1173 | trace_block_getrq(q, bio, rw); | ||
1174 | rq = __blk_mq_alloc_request(q, hctx, ctx, rw, GFP_ATOMIC, false); | ||
1175 | if (unlikely(!rq)) { | ||
1176 | __blk_mq_run_hw_queue(hctx); | ||
1177 | blk_mq_put_ctx(ctx); | ||
1178 | trace_block_sleeprq(q, bio, rw); | ||
1179 | |||
1180 | ctx = blk_mq_get_ctx(q); | ||
1181 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
1182 | rq = __blk_mq_alloc_request(q, hctx, ctx, rw, | ||
1183 | __GFP_WAIT|GFP_ATOMIC, false); | ||
1184 | } | ||
1185 | |||
1186 | hctx->queued++; | ||
1187 | data->hctx = hctx; | ||
1188 | data->ctx = ctx; | ||
1189 | return rq; | ||
1190 | } | ||
1191 | |||
1192 | /* | ||
1193 | * Multiple hardware queue variant. This will not use per-process plugs, | ||
1194 | * but will attempt to bypass the hctx queueing if we can go straight to | ||
1195 | * hardware for SYNC IO. | ||
1196 | */ | ||
1197 | static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | ||
1198 | { | ||
833 | const int is_sync = rw_is_sync(bio->bi_rw); | 1199 | const int is_sync = rw_is_sync(bio->bi_rw); |
834 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); | 1200 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); |
835 | int rw = bio_data_dir(bio); | 1201 | struct blk_map_ctx data; |
836 | struct request *rq; | 1202 | struct request *rq; |
1203 | |||
1204 | blk_queue_bounce(q, &bio); | ||
1205 | |||
1206 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { | ||
1207 | bio_endio(bio, -EIO); | ||
1208 | return; | ||
1209 | } | ||
1210 | |||
1211 | rq = blk_mq_map_request(q, bio, &data); | ||
1212 | if (unlikely(!rq)) | ||
1213 | return; | ||
1214 | |||
1215 | if (unlikely(is_flush_fua)) { | ||
1216 | blk_mq_bio_to_request(rq, bio); | ||
1217 | blk_insert_flush(rq); | ||
1218 | goto run_queue; | ||
1219 | } | ||
1220 | |||
1221 | if (is_sync) { | ||
1222 | int ret; | ||
1223 | |||
1224 | blk_mq_bio_to_request(rq, bio); | ||
1225 | blk_mq_start_request(rq, true); | ||
1226 | blk_add_timer(rq); | ||
1227 | |||
1228 | /* | ||
1229 | * For OK queue, we are done. For error, kill it. Any other | ||
1230 | * error (busy), just add it to our list as we previously | ||
1231 | * would have done | ||
1232 | */ | ||
1233 | ret = q->mq_ops->queue_rq(data.hctx, rq); | ||
1234 | if (ret == BLK_MQ_RQ_QUEUE_OK) | ||
1235 | goto done; | ||
1236 | else { | ||
1237 | __blk_mq_requeue_request(rq); | ||
1238 | |||
1239 | if (ret == BLK_MQ_RQ_QUEUE_ERROR) { | ||
1240 | rq->errors = -EIO; | ||
1241 | blk_mq_end_io(rq, rq->errors); | ||
1242 | goto done; | ||
1243 | } | ||
1244 | } | ||
1245 | } | ||
1246 | |||
1247 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { | ||
1248 | /* | ||
1249 | * For a SYNC request, send it to the hardware immediately. For | ||
1250 | * an ASYNC request, just ensure that we run it later on. The | ||
1251 | * latter allows for merging opportunities and more efficient | ||
1252 | * dispatching. | ||
1253 | */ | ||
1254 | run_queue: | ||
1255 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); | ||
1256 | } | ||
1257 | done: | ||
1258 | blk_mq_put_ctx(data.ctx); | ||
1259 | } | ||
1260 | |||
1261 | /* | ||
1262 | * Single hardware queue variant. This will attempt to use any per-process | ||
1263 | * plug for merging and IO deferral. | ||
1264 | */ | ||
1265 | static void blk_sq_make_request(struct request_queue *q, struct bio *bio) | ||
1266 | { | ||
1267 | const int is_sync = rw_is_sync(bio->bi_rw); | ||
1268 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); | ||
837 | unsigned int use_plug, request_count = 0; | 1269 | unsigned int use_plug, request_count = 0; |
1270 | struct blk_map_ctx data; | ||
1271 | struct request *rq; | ||
838 | 1272 | ||
839 | /* | 1273 | /* |
840 | * If we have multiple hardware queues, just go directly to | 1274 | * If we have multiple hardware queues, just go directly to |
841 | * one of those for sync IO. | 1275 | * one of those for sync IO. |
842 | */ | 1276 | */ |
843 | use_plug = !is_flush_fua && ((q->nr_hw_queues == 1) || !is_sync); | 1277 | use_plug = !is_flush_fua && !is_sync; |
844 | 1278 | ||
845 | blk_queue_bounce(q, &bio); | 1279 | blk_queue_bounce(q, &bio); |
846 | 1280 | ||
@@ -849,37 +1283,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
849 | return; | 1283 | return; |
850 | } | 1284 | } |
851 | 1285 | ||
852 | if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) | 1286 | if (use_plug && !blk_queue_nomerges(q) && |
1287 | blk_attempt_plug_merge(q, bio, &request_count)) | ||
853 | return; | 1288 | return; |
854 | 1289 | ||
855 | if (blk_mq_queue_enter(q)) { | 1290 | rq = blk_mq_map_request(q, bio, &data); |
856 | bio_endio(bio, -EIO); | ||
857 | return; | ||
858 | } | ||
859 | |||
860 | ctx = blk_mq_get_ctx(q); | ||
861 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
862 | |||
863 | if (is_sync) | ||
864 | rw |= REQ_SYNC; | ||
865 | trace_block_getrq(q, bio, rw); | ||
866 | rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); | ||
867 | if (likely(rq)) | ||
868 | blk_mq_rq_ctx_init(q, ctx, rq, rw); | ||
869 | else { | ||
870 | blk_mq_put_ctx(ctx); | ||
871 | trace_block_sleeprq(q, bio, rw); | ||
872 | rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC, | ||
873 | false); | ||
874 | ctx = rq->mq_ctx; | ||
875 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
876 | } | ||
877 | |||
878 | hctx->queued++; | ||
879 | 1291 | ||
880 | if (unlikely(is_flush_fua)) { | 1292 | if (unlikely(is_flush_fua)) { |
881 | blk_mq_bio_to_request(rq, bio); | 1293 | blk_mq_bio_to_request(rq, bio); |
882 | blk_mq_put_ctx(ctx); | ||
883 | blk_insert_flush(rq); | 1294 | blk_insert_flush(rq); |
884 | goto run_queue; | 1295 | goto run_queue; |
885 | } | 1296 | } |
@@ -901,31 +1312,23 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
901 | trace_block_plug(q); | 1312 | trace_block_plug(q); |
902 | } | 1313 | } |
903 | list_add_tail(&rq->queuelist, &plug->mq_list); | 1314 | list_add_tail(&rq->queuelist, &plug->mq_list); |
904 | blk_mq_put_ctx(ctx); | 1315 | blk_mq_put_ctx(data.ctx); |
905 | return; | 1316 | return; |
906 | } | 1317 | } |
907 | } | 1318 | } |
908 | 1319 | ||
909 | spin_lock(&ctx->lock); | 1320 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { |
910 | 1321 | /* | |
911 | if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) && | 1322 | * For a SYNC request, send it to the hardware immediately. For |
912 | blk_mq_attempt_merge(q, ctx, bio)) | 1323 | * an ASYNC request, just ensure that we run it later on. The |
913 | __blk_mq_free_request(hctx, ctx, rq); | 1324 | * latter allows for merging opportunities and more efficient |
914 | else { | 1325 | * dispatching. |
915 | blk_mq_bio_to_request(rq, bio); | 1326 | */ |
916 | __blk_mq_insert_request(hctx, rq, false); | 1327 | run_queue: |
1328 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); | ||
917 | } | 1329 | } |
918 | 1330 | ||
919 | spin_unlock(&ctx->lock); | 1331 | blk_mq_put_ctx(data.ctx); |
920 | blk_mq_put_ctx(ctx); | ||
921 | |||
922 | /* | ||
923 | * For a SYNC request, send it to the hardware immediately. For an | ||
924 | * ASYNC request, just ensure that we run it later on. The latter | ||
925 | * allows for merging opportunities and more efficient dispatching. | ||
926 | */ | ||
927 | run_queue: | ||
928 | blk_mq_run_hw_queue(hctx, !is_sync || is_flush_fua); | ||
929 | } | 1332 | } |
930 | 1333 | ||
931 | /* | 1334 | /* |
@@ -937,32 +1340,153 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu) | |||
937 | } | 1340 | } |
938 | EXPORT_SYMBOL(blk_mq_map_queue); | 1341 | EXPORT_SYMBOL(blk_mq_map_queue); |
939 | 1342 | ||
940 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *reg, | 1343 | static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, |
941 | unsigned int hctx_index) | 1344 | struct blk_mq_tags *tags, unsigned int hctx_idx) |
942 | { | 1345 | { |
943 | return kmalloc_node(sizeof(struct blk_mq_hw_ctx), | 1346 | struct page *page; |
944 | GFP_KERNEL | __GFP_ZERO, reg->numa_node); | 1347 | |
1348 | if (tags->rqs && set->ops->exit_request) { | ||
1349 | int i; | ||
1350 | |||
1351 | for (i = 0; i < tags->nr_tags; i++) { | ||
1352 | if (!tags->rqs[i]) | ||
1353 | continue; | ||
1354 | set->ops->exit_request(set->driver_data, tags->rqs[i], | ||
1355 | hctx_idx, i); | ||
1356 | } | ||
1357 | } | ||
1358 | |||
1359 | while (!list_empty(&tags->page_list)) { | ||
1360 | page = list_first_entry(&tags->page_list, struct page, lru); | ||
1361 | list_del_init(&page->lru); | ||
1362 | __free_pages(page, page->private); | ||
1363 | } | ||
1364 | |||
1365 | kfree(tags->rqs); | ||
1366 | |||
1367 | blk_mq_free_tags(tags); | ||
945 | } | 1368 | } |
946 | EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue); | ||
947 | 1369 | ||
948 | void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *hctx, | 1370 | static size_t order_to_size(unsigned int order) |
949 | unsigned int hctx_index) | ||
950 | { | 1371 | { |
951 | kfree(hctx); | 1372 | return (size_t)PAGE_SIZE << order; |
952 | } | 1373 | } |
953 | EXPORT_SYMBOL(blk_mq_free_single_hw_queue); | ||
954 | 1374 | ||
955 | static void blk_mq_hctx_notify(void *data, unsigned long action, | 1375 | static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, |
956 | unsigned int cpu) | 1376 | unsigned int hctx_idx) |
1377 | { | ||
1378 | struct blk_mq_tags *tags; | ||
1379 | unsigned int i, j, entries_per_page, max_order = 4; | ||
1380 | size_t rq_size, left; | ||
1381 | |||
1382 | tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, | ||
1383 | set->numa_node); | ||
1384 | if (!tags) | ||
1385 | return NULL; | ||
1386 | |||
1387 | INIT_LIST_HEAD(&tags->page_list); | ||
1388 | |||
1389 | tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), | ||
1390 | GFP_KERNEL, set->numa_node); | ||
1391 | if (!tags->rqs) { | ||
1392 | blk_mq_free_tags(tags); | ||
1393 | return NULL; | ||
1394 | } | ||
1395 | |||
1396 | /* | ||
1397 | * rq_size is the size of the request plus driver payload, rounded | ||
1398 | * to the cacheline size | ||
1399 | */ | ||
1400 | rq_size = round_up(sizeof(struct request) + set->cmd_size, | ||
1401 | cache_line_size()); | ||
1402 | left = rq_size * set->queue_depth; | ||
1403 | |||
1404 | for (i = 0; i < set->queue_depth; ) { | ||
1405 | int this_order = max_order; | ||
1406 | struct page *page; | ||
1407 | int to_do; | ||
1408 | void *p; | ||
1409 | |||
1410 | while (left < order_to_size(this_order - 1) && this_order) | ||
1411 | this_order--; | ||
1412 | |||
1413 | do { | ||
1414 | page = alloc_pages_node(set->numa_node, GFP_KERNEL, | ||
1415 | this_order); | ||
1416 | if (page) | ||
1417 | break; | ||
1418 | if (!this_order--) | ||
1419 | break; | ||
1420 | if (order_to_size(this_order) < rq_size) | ||
1421 | break; | ||
1422 | } while (1); | ||
1423 | |||
1424 | if (!page) | ||
1425 | goto fail; | ||
1426 | |||
1427 | page->private = this_order; | ||
1428 | list_add_tail(&page->lru, &tags->page_list); | ||
1429 | |||
1430 | p = page_address(page); | ||
1431 | entries_per_page = order_to_size(this_order) / rq_size; | ||
1432 | to_do = min(entries_per_page, set->queue_depth - i); | ||
1433 | left -= to_do * rq_size; | ||
1434 | for (j = 0; j < to_do; j++) { | ||
1435 | tags->rqs[i] = p; | ||
1436 | if (set->ops->init_request) { | ||
1437 | if (set->ops->init_request(set->driver_data, | ||
1438 | tags->rqs[i], hctx_idx, i, | ||
1439 | set->numa_node)) | ||
1440 | goto fail; | ||
1441 | } | ||
1442 | |||
1443 | p += rq_size; | ||
1444 | i++; | ||
1445 | } | ||
1446 | } | ||
1447 | |||
1448 | return tags; | ||
1449 | |||
1450 | fail: | ||
1451 | pr_warn("%s: failed to allocate requests\n", __func__); | ||
1452 | blk_mq_free_rq_map(set, tags, hctx_idx); | ||
1453 | return NULL; | ||
1454 | } | ||
1455 | |||
1456 | static void blk_mq_free_bitmap(struct blk_mq_ctxmap *bitmap) | ||
1457 | { | ||
1458 | kfree(bitmap->map); | ||
1459 | } | ||
1460 | |||
1461 | static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) | ||
1462 | { | ||
1463 | unsigned int bpw = 8, total, num_maps, i; | ||
1464 | |||
1465 | bitmap->bits_per_word = bpw; | ||
1466 | |||
1467 | num_maps = ALIGN(nr_cpu_ids, bpw) / bpw; | ||
1468 | bitmap->map = kzalloc_node(num_maps * sizeof(struct blk_align_bitmap), | ||
1469 | GFP_KERNEL, node); | ||
1470 | if (!bitmap->map) | ||
1471 | return -ENOMEM; | ||
1472 | |||
1473 | bitmap->map_size = num_maps; | ||
1474 | |||
1475 | total = nr_cpu_ids; | ||
1476 | for (i = 0; i < num_maps; i++) { | ||
1477 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); | ||
1478 | total -= bitmap->map[i].depth; | ||
1479 | } | ||
1480 | |||
1481 | return 0; | ||
1482 | } | ||
1483 | |||
1484 | static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) | ||
957 | { | 1485 | { |
958 | struct blk_mq_hw_ctx *hctx = data; | ||
959 | struct request_queue *q = hctx->queue; | 1486 | struct request_queue *q = hctx->queue; |
960 | struct blk_mq_ctx *ctx; | 1487 | struct blk_mq_ctx *ctx; |
961 | LIST_HEAD(tmp); | 1488 | LIST_HEAD(tmp); |
962 | 1489 | ||
963 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) | ||
964 | return; | ||
965 | |||
966 | /* | 1490 | /* |
967 | * Move ctx entries to new CPU, if this one is going away. | 1491 | * Move ctx entries to new CPU, if this one is going away. |
968 | */ | 1492 | */ |
@@ -971,12 +1495,12 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, | |||
971 | spin_lock(&ctx->lock); | 1495 | spin_lock(&ctx->lock); |
972 | if (!list_empty(&ctx->rq_list)) { | 1496 | if (!list_empty(&ctx->rq_list)) { |
973 | list_splice_init(&ctx->rq_list, &tmp); | 1497 | list_splice_init(&ctx->rq_list, &tmp); |
974 | clear_bit(ctx->index_hw, hctx->ctx_map); | 1498 | blk_mq_hctx_clear_pending(hctx, ctx); |
975 | } | 1499 | } |
976 | spin_unlock(&ctx->lock); | 1500 | spin_unlock(&ctx->lock); |
977 | 1501 | ||
978 | if (list_empty(&tmp)) | 1502 | if (list_empty(&tmp)) |
979 | return; | 1503 | return NOTIFY_OK; |
980 | 1504 | ||
981 | ctx = blk_mq_get_ctx(q); | 1505 | ctx = blk_mq_get_ctx(q); |
982 | spin_lock(&ctx->lock); | 1506 | spin_lock(&ctx->lock); |
@@ -993,210 +1517,103 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, | |||
993 | blk_mq_hctx_mark_pending(hctx, ctx); | 1517 | blk_mq_hctx_mark_pending(hctx, ctx); |
994 | 1518 | ||
995 | spin_unlock(&ctx->lock); | 1519 | spin_unlock(&ctx->lock); |
996 | blk_mq_put_ctx(ctx); | ||
997 | 1520 | ||
998 | blk_mq_run_hw_queue(hctx, true); | 1521 | blk_mq_run_hw_queue(hctx, true); |
1522 | blk_mq_put_ctx(ctx); | ||
1523 | return NOTIFY_OK; | ||
999 | } | 1524 | } |
1000 | 1525 | ||
1001 | static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx, | 1526 | static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu) |
1002 | int (*init)(void *, struct blk_mq_hw_ctx *, | ||
1003 | struct request *, unsigned int), | ||
1004 | void *data) | ||
1005 | { | 1527 | { |
1006 | unsigned int i; | 1528 | struct request_queue *q = hctx->queue; |
1007 | int ret = 0; | 1529 | struct blk_mq_tag_set *set = q->tag_set; |
1008 | |||
1009 | for (i = 0; i < hctx->queue_depth; i++) { | ||
1010 | struct request *rq = hctx->rqs[i]; | ||
1011 | |||
1012 | ret = init(data, hctx, rq, i); | ||
1013 | if (ret) | ||
1014 | break; | ||
1015 | } | ||
1016 | |||
1017 | return ret; | ||
1018 | } | ||
1019 | 1530 | ||
1020 | int blk_mq_init_commands(struct request_queue *q, | 1531 | if (set->tags[hctx->queue_num]) |
1021 | int (*init)(void *, struct blk_mq_hw_ctx *, | 1532 | return NOTIFY_OK; |
1022 | struct request *, unsigned int), | ||
1023 | void *data) | ||
1024 | { | ||
1025 | struct blk_mq_hw_ctx *hctx; | ||
1026 | unsigned int i; | ||
1027 | int ret = 0; | ||
1028 | 1533 | ||
1029 | queue_for_each_hw_ctx(q, hctx, i) { | 1534 | set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num); |
1030 | ret = blk_mq_init_hw_commands(hctx, init, data); | 1535 | if (!set->tags[hctx->queue_num]) |
1031 | if (ret) | 1536 | return NOTIFY_STOP; |
1032 | break; | ||
1033 | } | ||
1034 | 1537 | ||
1035 | return ret; | 1538 | hctx->tags = set->tags[hctx->queue_num]; |
1539 | return NOTIFY_OK; | ||
1036 | } | 1540 | } |
1037 | EXPORT_SYMBOL(blk_mq_init_commands); | ||
1038 | 1541 | ||
1039 | static void blk_mq_free_hw_commands(struct blk_mq_hw_ctx *hctx, | 1542 | static int blk_mq_hctx_notify(void *data, unsigned long action, |
1040 | void (*free)(void *, struct blk_mq_hw_ctx *, | 1543 | unsigned int cpu) |
1041 | struct request *, unsigned int), | ||
1042 | void *data) | ||
1043 | { | 1544 | { |
1044 | unsigned int i; | 1545 | struct blk_mq_hw_ctx *hctx = data; |
1045 | 1546 | ||
1046 | for (i = 0; i < hctx->queue_depth; i++) { | 1547 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) |
1047 | struct request *rq = hctx->rqs[i]; | 1548 | return blk_mq_hctx_cpu_offline(hctx, cpu); |
1549 | else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | ||
1550 | return blk_mq_hctx_cpu_online(hctx, cpu); | ||
1048 | 1551 | ||
1049 | free(data, hctx, rq, i); | 1552 | return NOTIFY_OK; |
1050 | } | ||
1051 | } | 1553 | } |
1052 | 1554 | ||
1053 | void blk_mq_free_commands(struct request_queue *q, | 1555 | static void blk_mq_exit_hw_queues(struct request_queue *q, |
1054 | void (*free)(void *, struct blk_mq_hw_ctx *, | 1556 | struct blk_mq_tag_set *set, int nr_queue) |
1055 | struct request *, unsigned int), | ||
1056 | void *data) | ||
1057 | { | 1557 | { |
1058 | struct blk_mq_hw_ctx *hctx; | 1558 | struct blk_mq_hw_ctx *hctx; |
1059 | unsigned int i; | 1559 | unsigned int i; |
1060 | 1560 | ||
1061 | queue_for_each_hw_ctx(q, hctx, i) | 1561 | queue_for_each_hw_ctx(q, hctx, i) { |
1062 | blk_mq_free_hw_commands(hctx, free, data); | 1562 | if (i == nr_queue) |
1063 | } | 1563 | break; |
1064 | EXPORT_SYMBOL(blk_mq_free_commands); | ||
1065 | 1564 | ||
1066 | static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx) | 1565 | if (set->ops->exit_hctx) |
1067 | { | 1566 | set->ops->exit_hctx(hctx, i); |
1068 | struct page *page; | ||
1069 | 1567 | ||
1070 | while (!list_empty(&hctx->page_list)) { | 1568 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); |
1071 | page = list_first_entry(&hctx->page_list, struct page, lru); | 1569 | kfree(hctx->ctxs); |
1072 | list_del_init(&page->lru); | 1570 | blk_mq_free_bitmap(&hctx->ctx_map); |
1073 | __free_pages(page, page->private); | ||
1074 | } | 1571 | } |
1075 | 1572 | ||
1076 | kfree(hctx->rqs); | ||
1077 | |||
1078 | if (hctx->tags) | ||
1079 | blk_mq_free_tags(hctx->tags); | ||
1080 | } | ||
1081 | |||
1082 | static size_t order_to_size(unsigned int order) | ||
1083 | { | ||
1084 | size_t ret = PAGE_SIZE; | ||
1085 | |||
1086 | while (order--) | ||
1087 | ret *= 2; | ||
1088 | |||
1089 | return ret; | ||
1090 | } | 1573 | } |
1091 | 1574 | ||
1092 | static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, | 1575 | static void blk_mq_free_hw_queues(struct request_queue *q, |
1093 | unsigned int reserved_tags, int node) | 1576 | struct blk_mq_tag_set *set) |
1094 | { | 1577 | { |
1095 | unsigned int i, j, entries_per_page, max_order = 4; | 1578 | struct blk_mq_hw_ctx *hctx; |
1096 | size_t rq_size, left; | 1579 | unsigned int i; |
1097 | |||
1098 | INIT_LIST_HEAD(&hctx->page_list); | ||
1099 | |||
1100 | hctx->rqs = kmalloc_node(hctx->queue_depth * sizeof(struct request *), | ||
1101 | GFP_KERNEL, node); | ||
1102 | if (!hctx->rqs) | ||
1103 | return -ENOMEM; | ||
1104 | |||
1105 | /* | ||
1106 | * rq_size is the size of the request plus driver payload, rounded | ||
1107 | * to the cacheline size | ||
1108 | */ | ||
1109 | rq_size = round_up(sizeof(struct request) + hctx->cmd_size, | ||
1110 | cache_line_size()); | ||
1111 | left = rq_size * hctx->queue_depth; | ||
1112 | |||
1113 | for (i = 0; i < hctx->queue_depth;) { | ||
1114 | int this_order = max_order; | ||
1115 | struct page *page; | ||
1116 | int to_do; | ||
1117 | void *p; | ||
1118 | |||
1119 | while (left < order_to_size(this_order - 1) && this_order) | ||
1120 | this_order--; | ||
1121 | |||
1122 | do { | ||
1123 | page = alloc_pages_node(node, GFP_KERNEL, this_order); | ||
1124 | if (page) | ||
1125 | break; | ||
1126 | if (!this_order--) | ||
1127 | break; | ||
1128 | if (order_to_size(this_order) < rq_size) | ||
1129 | break; | ||
1130 | } while (1); | ||
1131 | |||
1132 | if (!page) | ||
1133 | break; | ||
1134 | |||
1135 | page->private = this_order; | ||
1136 | list_add_tail(&page->lru, &hctx->page_list); | ||
1137 | |||
1138 | p = page_address(page); | ||
1139 | entries_per_page = order_to_size(this_order) / rq_size; | ||
1140 | to_do = min(entries_per_page, hctx->queue_depth - i); | ||
1141 | left -= to_do * rq_size; | ||
1142 | for (j = 0; j < to_do; j++) { | ||
1143 | hctx->rqs[i] = p; | ||
1144 | blk_mq_rq_init(hctx, hctx->rqs[i]); | ||
1145 | p += rq_size; | ||
1146 | i++; | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | if (i < (reserved_tags + BLK_MQ_TAG_MIN)) | ||
1151 | goto err_rq_map; | ||
1152 | else if (i != hctx->queue_depth) { | ||
1153 | hctx->queue_depth = i; | ||
1154 | pr_warn("%s: queue depth set to %u because of low memory\n", | ||
1155 | __func__, i); | ||
1156 | } | ||
1157 | 1580 | ||
1158 | hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node); | 1581 | queue_for_each_hw_ctx(q, hctx, i) { |
1159 | if (!hctx->tags) { | 1582 | free_cpumask_var(hctx->cpumask); |
1160 | err_rq_map: | 1583 | kfree(hctx); |
1161 | blk_mq_free_rq_map(hctx); | ||
1162 | return -ENOMEM; | ||
1163 | } | 1584 | } |
1164 | |||
1165 | return 0; | ||
1166 | } | 1585 | } |
1167 | 1586 | ||
1168 | static int blk_mq_init_hw_queues(struct request_queue *q, | 1587 | static int blk_mq_init_hw_queues(struct request_queue *q, |
1169 | struct blk_mq_reg *reg, void *driver_data) | 1588 | struct blk_mq_tag_set *set) |
1170 | { | 1589 | { |
1171 | struct blk_mq_hw_ctx *hctx; | 1590 | struct blk_mq_hw_ctx *hctx; |
1172 | unsigned int i, j; | 1591 | unsigned int i; |
1173 | 1592 | ||
1174 | /* | 1593 | /* |
1175 | * Initialize hardware queues | 1594 | * Initialize hardware queues |
1176 | */ | 1595 | */ |
1177 | queue_for_each_hw_ctx(q, hctx, i) { | 1596 | queue_for_each_hw_ctx(q, hctx, i) { |
1178 | unsigned int num_maps; | ||
1179 | int node; | 1597 | int node; |
1180 | 1598 | ||
1181 | node = hctx->numa_node; | 1599 | node = hctx->numa_node; |
1182 | if (node == NUMA_NO_NODE) | 1600 | if (node == NUMA_NO_NODE) |
1183 | node = hctx->numa_node = reg->numa_node; | 1601 | node = hctx->numa_node = set->numa_node; |
1184 | 1602 | ||
1185 | INIT_DELAYED_WORK(&hctx->delayed_work, blk_mq_work_fn); | 1603 | INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); |
1604 | INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); | ||
1186 | spin_lock_init(&hctx->lock); | 1605 | spin_lock_init(&hctx->lock); |
1187 | INIT_LIST_HEAD(&hctx->dispatch); | 1606 | INIT_LIST_HEAD(&hctx->dispatch); |
1188 | hctx->queue = q; | 1607 | hctx->queue = q; |
1189 | hctx->queue_num = i; | 1608 | hctx->queue_num = i; |
1190 | hctx->flags = reg->flags; | 1609 | hctx->flags = set->flags; |
1191 | hctx->queue_depth = reg->queue_depth; | 1610 | hctx->cmd_size = set->cmd_size; |
1192 | hctx->cmd_size = reg->cmd_size; | ||
1193 | 1611 | ||
1194 | blk_mq_init_cpu_notifier(&hctx->cpu_notifier, | 1612 | blk_mq_init_cpu_notifier(&hctx->cpu_notifier, |
1195 | blk_mq_hctx_notify, hctx); | 1613 | blk_mq_hctx_notify, hctx); |
1196 | blk_mq_register_cpu_notifier(&hctx->cpu_notifier); | 1614 | blk_mq_register_cpu_notifier(&hctx->cpu_notifier); |
1197 | 1615 | ||
1198 | if (blk_mq_init_rq_map(hctx, reg->reserved_tags, node)) | 1616 | hctx->tags = set->tags[i]; |
1199 | break; | ||
1200 | 1617 | ||
1201 | /* | 1618 | /* |
1202 | * Allocate space for all possible cpus to avoid allocation in | 1619 | * Allocate space for all possible cpus to avoid allocation in |
@@ -1207,17 +1624,13 @@ static int blk_mq_init_hw_queues(struct request_queue *q, | |||
1207 | if (!hctx->ctxs) | 1624 | if (!hctx->ctxs) |
1208 | break; | 1625 | break; |
1209 | 1626 | ||
1210 | num_maps = ALIGN(nr_cpu_ids, BITS_PER_LONG) / BITS_PER_LONG; | 1627 | if (blk_mq_alloc_bitmap(&hctx->ctx_map, node)) |
1211 | hctx->ctx_map = kzalloc_node(num_maps * sizeof(unsigned long), | ||
1212 | GFP_KERNEL, node); | ||
1213 | if (!hctx->ctx_map) | ||
1214 | break; | 1628 | break; |
1215 | 1629 | ||
1216 | hctx->nr_ctx_map = num_maps; | ||
1217 | hctx->nr_ctx = 0; | 1630 | hctx->nr_ctx = 0; |
1218 | 1631 | ||
1219 | if (reg->ops->init_hctx && | 1632 | if (set->ops->init_hctx && |
1220 | reg->ops->init_hctx(hctx, driver_data, i)) | 1633 | set->ops->init_hctx(hctx, set->driver_data, i)) |
1221 | break; | 1634 | break; |
1222 | } | 1635 | } |
1223 | 1636 | ||
@@ -1227,17 +1640,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q, | |||
1227 | /* | 1640 | /* |
1228 | * Init failed | 1641 | * Init failed |
1229 | */ | 1642 | */ |
1230 | queue_for_each_hw_ctx(q, hctx, j) { | 1643 | blk_mq_exit_hw_queues(q, set, i); |
1231 | if (i == j) | ||
1232 | break; | ||
1233 | |||
1234 | if (reg->ops->exit_hctx) | ||
1235 | reg->ops->exit_hctx(hctx, j); | ||
1236 | |||
1237 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); | ||
1238 | blk_mq_free_rq_map(hctx); | ||
1239 | kfree(hctx->ctxs); | ||
1240 | } | ||
1241 | 1644 | ||
1242 | return 1; | 1645 | return 1; |
1243 | } | 1646 | } |
@@ -1258,12 +1661,13 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||
1258 | __ctx->queue = q; | 1661 | __ctx->queue = q; |
1259 | 1662 | ||
1260 | /* If the cpu isn't online, the cpu is mapped to first hctx */ | 1663 | /* If the cpu isn't online, the cpu is mapped to first hctx */ |
1261 | hctx = q->mq_ops->map_queue(q, i); | ||
1262 | hctx->nr_ctx++; | ||
1263 | |||
1264 | if (!cpu_online(i)) | 1664 | if (!cpu_online(i)) |
1265 | continue; | 1665 | continue; |
1266 | 1666 | ||
1667 | hctx = q->mq_ops->map_queue(q, i); | ||
1668 | cpumask_set_cpu(i, hctx->cpumask); | ||
1669 | hctx->nr_ctx++; | ||
1670 | |||
1267 | /* | 1671 | /* |
1268 | * Set local node, IFF we have more than one hw queue. If | 1672 | * Set local node, IFF we have more than one hw queue. If |
1269 | * not, we remain on the home node of the device | 1673 | * not, we remain on the home node of the device |
@@ -1280,6 +1684,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1280 | struct blk_mq_ctx *ctx; | 1684 | struct blk_mq_ctx *ctx; |
1281 | 1685 | ||
1282 | queue_for_each_hw_ctx(q, hctx, i) { | 1686 | queue_for_each_hw_ctx(q, hctx, i) { |
1687 | cpumask_clear(hctx->cpumask); | ||
1283 | hctx->nr_ctx = 0; | 1688 | hctx->nr_ctx = 0; |
1284 | } | 1689 | } |
1285 | 1690 | ||
@@ -1288,115 +1693,208 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1288 | */ | 1693 | */ |
1289 | queue_for_each_ctx(q, ctx, i) { | 1694 | queue_for_each_ctx(q, ctx, i) { |
1290 | /* If the cpu isn't online, the cpu is mapped to first hctx */ | 1695 | /* If the cpu isn't online, the cpu is mapped to first hctx */ |
1696 | if (!cpu_online(i)) | ||
1697 | continue; | ||
1698 | |||
1291 | hctx = q->mq_ops->map_queue(q, i); | 1699 | hctx = q->mq_ops->map_queue(q, i); |
1700 | cpumask_set_cpu(i, hctx->cpumask); | ||
1292 | ctx->index_hw = hctx->nr_ctx; | 1701 | ctx->index_hw = hctx->nr_ctx; |
1293 | hctx->ctxs[hctx->nr_ctx++] = ctx; | 1702 | hctx->ctxs[hctx->nr_ctx++] = ctx; |
1294 | } | 1703 | } |
1704 | |||
1705 | queue_for_each_hw_ctx(q, hctx, i) { | ||
1706 | /* | ||
1707 | * If not software queues are mapped to this hardware queue, | ||
1708 | * disable it and free the request entries | ||
1709 | */ | ||
1710 | if (!hctx->nr_ctx) { | ||
1711 | struct blk_mq_tag_set *set = q->tag_set; | ||
1712 | |||
1713 | if (set->tags[i]) { | ||
1714 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
1715 | set->tags[i] = NULL; | ||
1716 | hctx->tags = NULL; | ||
1717 | } | ||
1718 | continue; | ||
1719 | } | ||
1720 | |||
1721 | /* | ||
1722 | * Initialize batch roundrobin counts | ||
1723 | */ | ||
1724 | hctx->next_cpu = cpumask_first(hctx->cpumask); | ||
1725 | hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; | ||
1726 | } | ||
1295 | } | 1727 | } |
1296 | 1728 | ||
1297 | struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | 1729 | static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) |
1298 | void *driver_data) | ||
1299 | { | 1730 | { |
1300 | struct blk_mq_hw_ctx **hctxs; | 1731 | struct blk_mq_hw_ctx *hctx; |
1301 | struct blk_mq_ctx *ctx; | ||
1302 | struct request_queue *q; | 1732 | struct request_queue *q; |
1733 | bool shared; | ||
1303 | int i; | 1734 | int i; |
1304 | 1735 | ||
1305 | if (!reg->nr_hw_queues || | 1736 | if (set->tag_list.next == set->tag_list.prev) |
1306 | !reg->ops->queue_rq || !reg->ops->map_queue || | 1737 | shared = false; |
1307 | !reg->ops->alloc_hctx || !reg->ops->free_hctx) | 1738 | else |
1308 | return ERR_PTR(-EINVAL); | 1739 | shared = true; |
1740 | |||
1741 | list_for_each_entry(q, &set->tag_list, tag_set_list) { | ||
1742 | blk_mq_freeze_queue(q); | ||
1309 | 1743 | ||
1310 | if (!reg->queue_depth) | 1744 | queue_for_each_hw_ctx(q, hctx, i) { |
1311 | reg->queue_depth = BLK_MQ_MAX_DEPTH; | 1745 | if (shared) |
1312 | else if (reg->queue_depth > BLK_MQ_MAX_DEPTH) { | 1746 | hctx->flags |= BLK_MQ_F_TAG_SHARED; |
1313 | pr_err("blk-mq: queuedepth too large (%u)\n", reg->queue_depth); | 1747 | else |
1314 | reg->queue_depth = BLK_MQ_MAX_DEPTH; | 1748 | hctx->flags &= ~BLK_MQ_F_TAG_SHARED; |
1749 | } | ||
1750 | blk_mq_unfreeze_queue(q); | ||
1315 | } | 1751 | } |
1752 | } | ||
1316 | 1753 | ||
1317 | if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) | 1754 | static void blk_mq_del_queue_tag_set(struct request_queue *q) |
1318 | return ERR_PTR(-EINVAL); | 1755 | { |
1756 | struct blk_mq_tag_set *set = q->tag_set; | ||
1757 | |||
1758 | blk_mq_freeze_queue(q); | ||
1759 | |||
1760 | mutex_lock(&set->tag_list_lock); | ||
1761 | list_del_init(&q->tag_set_list); | ||
1762 | blk_mq_update_tag_set_depth(set); | ||
1763 | mutex_unlock(&set->tag_list_lock); | ||
1764 | |||
1765 | blk_mq_unfreeze_queue(q); | ||
1766 | } | ||
1767 | |||
1768 | static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, | ||
1769 | struct request_queue *q) | ||
1770 | { | ||
1771 | q->tag_set = set; | ||
1772 | |||
1773 | mutex_lock(&set->tag_list_lock); | ||
1774 | list_add_tail(&q->tag_set_list, &set->tag_list); | ||
1775 | blk_mq_update_tag_set_depth(set); | ||
1776 | mutex_unlock(&set->tag_list_lock); | ||
1777 | } | ||
1778 | |||
1779 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | ||
1780 | { | ||
1781 | struct blk_mq_hw_ctx **hctxs; | ||
1782 | struct blk_mq_ctx *ctx; | ||
1783 | struct request_queue *q; | ||
1784 | unsigned int *map; | ||
1785 | int i; | ||
1319 | 1786 | ||
1320 | ctx = alloc_percpu(struct blk_mq_ctx); | 1787 | ctx = alloc_percpu(struct blk_mq_ctx); |
1321 | if (!ctx) | 1788 | if (!ctx) |
1322 | return ERR_PTR(-ENOMEM); | 1789 | return ERR_PTR(-ENOMEM); |
1323 | 1790 | ||
1324 | hctxs = kmalloc_node(reg->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, | 1791 | hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, |
1325 | reg->numa_node); | 1792 | set->numa_node); |
1326 | 1793 | ||
1327 | if (!hctxs) | 1794 | if (!hctxs) |
1328 | goto err_percpu; | 1795 | goto err_percpu; |
1329 | 1796 | ||
1330 | for (i = 0; i < reg->nr_hw_queues; i++) { | 1797 | map = blk_mq_make_queue_map(set); |
1331 | hctxs[i] = reg->ops->alloc_hctx(reg, i); | 1798 | if (!map) |
1799 | goto err_map; | ||
1800 | |||
1801 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
1802 | int node = blk_mq_hw_queue_to_node(map, i); | ||
1803 | |||
1804 | hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx), | ||
1805 | GFP_KERNEL, node); | ||
1332 | if (!hctxs[i]) | 1806 | if (!hctxs[i]) |
1333 | goto err_hctxs; | 1807 | goto err_hctxs; |
1334 | 1808 | ||
1335 | hctxs[i]->numa_node = NUMA_NO_NODE; | 1809 | if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL)) |
1810 | goto err_hctxs; | ||
1811 | |||
1812 | atomic_set(&hctxs[i]->nr_active, 0); | ||
1813 | hctxs[i]->numa_node = node; | ||
1336 | hctxs[i]->queue_num = i; | 1814 | hctxs[i]->queue_num = i; |
1337 | } | 1815 | } |
1338 | 1816 | ||
1339 | q = blk_alloc_queue_node(GFP_KERNEL, reg->numa_node); | 1817 | q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); |
1340 | if (!q) | 1818 | if (!q) |
1341 | goto err_hctxs; | 1819 | goto err_hctxs; |
1342 | 1820 | ||
1343 | q->mq_map = blk_mq_make_queue_map(reg); | 1821 | if (percpu_counter_init(&q->mq_usage_counter, 0)) |
1344 | if (!q->mq_map) | ||
1345 | goto err_map; | 1822 | goto err_map; |
1346 | 1823 | ||
1347 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); | 1824 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); |
1348 | blk_queue_rq_timeout(q, 30000); | 1825 | blk_queue_rq_timeout(q, 30000); |
1349 | 1826 | ||
1350 | q->nr_queues = nr_cpu_ids; | 1827 | q->nr_queues = nr_cpu_ids; |
1351 | q->nr_hw_queues = reg->nr_hw_queues; | 1828 | q->nr_hw_queues = set->nr_hw_queues; |
1829 | q->mq_map = map; | ||
1352 | 1830 | ||
1353 | q->queue_ctx = ctx; | 1831 | q->queue_ctx = ctx; |
1354 | q->queue_hw_ctx = hctxs; | 1832 | q->queue_hw_ctx = hctxs; |
1355 | 1833 | ||
1356 | q->mq_ops = reg->ops; | 1834 | q->mq_ops = set->ops; |
1357 | q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; | 1835 | q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; |
1358 | 1836 | ||
1837 | if (!(set->flags & BLK_MQ_F_SG_MERGE)) | ||
1838 | q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE; | ||
1839 | |||
1359 | q->sg_reserved_size = INT_MAX; | 1840 | q->sg_reserved_size = INT_MAX; |
1360 | 1841 | ||
1361 | blk_queue_make_request(q, blk_mq_make_request); | 1842 | INIT_WORK(&q->requeue_work, blk_mq_requeue_work); |
1362 | blk_queue_rq_timed_out(q, reg->ops->timeout); | 1843 | INIT_LIST_HEAD(&q->requeue_list); |
1363 | if (reg->timeout) | 1844 | spin_lock_init(&q->requeue_lock); |
1364 | blk_queue_rq_timeout(q, reg->timeout); | 1845 | |
1846 | if (q->nr_hw_queues > 1) | ||
1847 | blk_queue_make_request(q, blk_mq_make_request); | ||
1848 | else | ||
1849 | blk_queue_make_request(q, blk_sq_make_request); | ||
1850 | |||
1851 | blk_queue_rq_timed_out(q, blk_mq_rq_timed_out); | ||
1852 | if (set->timeout) | ||
1853 | blk_queue_rq_timeout(q, set->timeout); | ||
1854 | |||
1855 | /* | ||
1856 | * Do this after blk_queue_make_request() overrides it... | ||
1857 | */ | ||
1858 | q->nr_requests = set->queue_depth; | ||
1365 | 1859 | ||
1366 | if (reg->ops->complete) | 1860 | if (set->ops->complete) |
1367 | blk_queue_softirq_done(q, reg->ops->complete); | 1861 | blk_queue_softirq_done(q, set->ops->complete); |
1368 | 1862 | ||
1369 | blk_mq_init_flush(q); | 1863 | blk_mq_init_flush(q); |
1370 | blk_mq_init_cpu_queues(q, reg->nr_hw_queues); | 1864 | blk_mq_init_cpu_queues(q, set->nr_hw_queues); |
1371 | 1865 | ||
1372 | q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size, | 1866 | q->flush_rq = kzalloc(round_up(sizeof(struct request) + |
1373 | cache_line_size()), GFP_KERNEL); | 1867 | set->cmd_size, cache_line_size()), |
1868 | GFP_KERNEL); | ||
1374 | if (!q->flush_rq) | 1869 | if (!q->flush_rq) |
1375 | goto err_hw; | 1870 | goto err_hw; |
1376 | 1871 | ||
1377 | if (blk_mq_init_hw_queues(q, reg, driver_data)) | 1872 | if (blk_mq_init_hw_queues(q, set)) |
1378 | goto err_flush_rq; | 1873 | goto err_flush_rq; |
1379 | 1874 | ||
1380 | blk_mq_map_swqueue(q); | ||
1381 | |||
1382 | mutex_lock(&all_q_mutex); | 1875 | mutex_lock(&all_q_mutex); |
1383 | list_add_tail(&q->all_q_node, &all_q_list); | 1876 | list_add_tail(&q->all_q_node, &all_q_list); |
1384 | mutex_unlock(&all_q_mutex); | 1877 | mutex_unlock(&all_q_mutex); |
1385 | 1878 | ||
1879 | blk_mq_add_queue_tag_set(set, q); | ||
1880 | |||
1881 | blk_mq_map_swqueue(q); | ||
1882 | |||
1386 | return q; | 1883 | return q; |
1387 | 1884 | ||
1388 | err_flush_rq: | 1885 | err_flush_rq: |
1389 | kfree(q->flush_rq); | 1886 | kfree(q->flush_rq); |
1390 | err_hw: | 1887 | err_hw: |
1391 | kfree(q->mq_map); | ||
1392 | err_map: | ||
1393 | blk_cleanup_queue(q); | 1888 | blk_cleanup_queue(q); |
1394 | err_hctxs: | 1889 | err_hctxs: |
1395 | for (i = 0; i < reg->nr_hw_queues; i++) { | 1890 | kfree(map); |
1891 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
1396 | if (!hctxs[i]) | 1892 | if (!hctxs[i]) |
1397 | break; | 1893 | break; |
1398 | reg->ops->free_hctx(hctxs[i], i); | 1894 | free_cpumask_var(hctxs[i]->cpumask); |
1895 | kfree(hctxs[i]); | ||
1399 | } | 1896 | } |
1897 | err_map: | ||
1400 | kfree(hctxs); | 1898 | kfree(hctxs); |
1401 | err_percpu: | 1899 | err_percpu: |
1402 | free_percpu(ctx); | 1900 | free_percpu(ctx); |
@@ -1406,18 +1904,14 @@ EXPORT_SYMBOL(blk_mq_init_queue); | |||
1406 | 1904 | ||
1407 | void blk_mq_free_queue(struct request_queue *q) | 1905 | void blk_mq_free_queue(struct request_queue *q) |
1408 | { | 1906 | { |
1409 | struct blk_mq_hw_ctx *hctx; | 1907 | struct blk_mq_tag_set *set = q->tag_set; |
1410 | int i; | ||
1411 | 1908 | ||
1412 | queue_for_each_hw_ctx(q, hctx, i) { | 1909 | blk_mq_del_queue_tag_set(q); |
1413 | kfree(hctx->ctx_map); | 1910 | |
1414 | kfree(hctx->ctxs); | 1911 | blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); |
1415 | blk_mq_free_rq_map(hctx); | 1912 | blk_mq_free_hw_queues(q, set); |
1416 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); | 1913 | |
1417 | if (q->mq_ops->exit_hctx) | 1914 | percpu_counter_destroy(&q->mq_usage_counter); |
1418 | q->mq_ops->exit_hctx(hctx, i); | ||
1419 | q->mq_ops->free_hctx(hctx, i); | ||
1420 | } | ||
1421 | 1915 | ||
1422 | free_percpu(q->queue_ctx); | 1916 | free_percpu(q->queue_ctx); |
1423 | kfree(q->queue_hw_ctx); | 1917 | kfree(q->queue_hw_ctx); |
@@ -1437,6 +1931,8 @@ static void blk_mq_queue_reinit(struct request_queue *q) | |||
1437 | { | 1931 | { |
1438 | blk_mq_freeze_queue(q); | 1932 | blk_mq_freeze_queue(q); |
1439 | 1933 | ||
1934 | blk_mq_sysfs_unregister(q); | ||
1935 | |||
1440 | blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues); | 1936 | blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues); |
1441 | 1937 | ||
1442 | /* | 1938 | /* |
@@ -1447,6 +1943,8 @@ static void blk_mq_queue_reinit(struct request_queue *q) | |||
1447 | 1943 | ||
1448 | blk_mq_map_swqueue(q); | 1944 | blk_mq_map_swqueue(q); |
1449 | 1945 | ||
1946 | blk_mq_sysfs_register(q); | ||
1947 | |||
1450 | blk_mq_unfreeze_queue(q); | 1948 | blk_mq_unfreeze_queue(q); |
1451 | } | 1949 | } |
1452 | 1950 | ||
@@ -1456,10 +1954,10 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
1456 | struct request_queue *q; | 1954 | struct request_queue *q; |
1457 | 1955 | ||
1458 | /* | 1956 | /* |
1459 | * Before new mapping is established, hotadded cpu might already start | 1957 | * Before new mappings are established, hotadded cpu might already |
1460 | * handling requests. This doesn't break anything as we map offline | 1958 | * start handling requests. This doesn't break anything as we map |
1461 | * CPUs to first hardware queue. We will re-init queue below to get | 1959 | * offline CPUs to first hardware queue. We will re-init the queue |
1462 | * optimal settings. | 1960 | * below to get optimal settings. |
1463 | */ | 1961 | */ |
1464 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && | 1962 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && |
1465 | action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) | 1963 | action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) |
@@ -1472,6 +1970,81 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
1472 | return NOTIFY_OK; | 1970 | return NOTIFY_OK; |
1473 | } | 1971 | } |
1474 | 1972 | ||
1973 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | ||
1974 | { | ||
1975 | int i; | ||
1976 | |||
1977 | if (!set->nr_hw_queues) | ||
1978 | return -EINVAL; | ||
1979 | if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH) | ||
1980 | return -EINVAL; | ||
1981 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) | ||
1982 | return -EINVAL; | ||
1983 | |||
1984 | if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue) | ||
1985 | return -EINVAL; | ||
1986 | |||
1987 | |||
1988 | set->tags = kmalloc_node(set->nr_hw_queues * | ||
1989 | sizeof(struct blk_mq_tags *), | ||
1990 | GFP_KERNEL, set->numa_node); | ||
1991 | if (!set->tags) | ||
1992 | goto out; | ||
1993 | |||
1994 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
1995 | set->tags[i] = blk_mq_init_rq_map(set, i); | ||
1996 | if (!set->tags[i]) | ||
1997 | goto out_unwind; | ||
1998 | } | ||
1999 | |||
2000 | mutex_init(&set->tag_list_lock); | ||
2001 | INIT_LIST_HEAD(&set->tag_list); | ||
2002 | |||
2003 | return 0; | ||
2004 | |||
2005 | out_unwind: | ||
2006 | while (--i >= 0) | ||
2007 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
2008 | out: | ||
2009 | return -ENOMEM; | ||
2010 | } | ||
2011 | EXPORT_SYMBOL(blk_mq_alloc_tag_set); | ||
2012 | |||
2013 | void blk_mq_free_tag_set(struct blk_mq_tag_set *set) | ||
2014 | { | ||
2015 | int i; | ||
2016 | |||
2017 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
2018 | if (set->tags[i]) | ||
2019 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
2020 | } | ||
2021 | |||
2022 | kfree(set->tags); | ||
2023 | } | ||
2024 | EXPORT_SYMBOL(blk_mq_free_tag_set); | ||
2025 | |||
2026 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) | ||
2027 | { | ||
2028 | struct blk_mq_tag_set *set = q->tag_set; | ||
2029 | struct blk_mq_hw_ctx *hctx; | ||
2030 | int i, ret; | ||
2031 | |||
2032 | if (!set || nr > set->queue_depth) | ||
2033 | return -EINVAL; | ||
2034 | |||
2035 | ret = 0; | ||
2036 | queue_for_each_hw_ctx(q, hctx, i) { | ||
2037 | ret = blk_mq_tag_update_depth(hctx->tags, nr); | ||
2038 | if (ret) | ||
2039 | break; | ||
2040 | } | ||
2041 | |||
2042 | if (!ret) | ||
2043 | q->nr_requests = nr; | ||
2044 | |||
2045 | return ret; | ||
2046 | } | ||
2047 | |||
1475 | void blk_mq_disable_hotplug(void) | 2048 | void blk_mq_disable_hotplug(void) |
1476 | { | 2049 | { |
1477 | mutex_lock(&all_q_mutex); | 2050 | mutex_lock(&all_q_mutex); |
diff --git a/block/blk-mq.h b/block/blk-mq.h index ebbe6bac9d61..de7b3bbd5bd6 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef INT_BLK_MQ_H | 1 | #ifndef INT_BLK_MQ_H |
2 | #define INT_BLK_MQ_H | 2 | #define INT_BLK_MQ_H |
3 | 3 | ||
4 | struct blk_mq_tag_set; | ||
5 | |||
4 | struct blk_mq_ctx { | 6 | struct blk_mq_ctx { |
5 | struct { | 7 | struct { |
6 | spinlock_t lock; | 8 | spinlock_t lock; |
@@ -9,7 +11,8 @@ struct blk_mq_ctx { | |||
9 | 11 | ||
10 | unsigned int cpu; | 12 | unsigned int cpu; |
11 | unsigned int index_hw; | 13 | unsigned int index_hw; |
12 | unsigned int ipi_redirect; | 14 | |
15 | unsigned int last_tag ____cacheline_aligned_in_smp; | ||
13 | 16 | ||
14 | /* incremented at dispatch time */ | 17 | /* incremented at dispatch time */ |
15 | unsigned long rq_dispatched[2]; | 18 | unsigned long rq_dispatched[2]; |
@@ -20,21 +23,23 @@ struct blk_mq_ctx { | |||
20 | 23 | ||
21 | struct request_queue *queue; | 24 | struct request_queue *queue; |
22 | struct kobject kobj; | 25 | struct kobject kobj; |
23 | }; | 26 | } ____cacheline_aligned_in_smp; |
24 | 27 | ||
25 | void __blk_mq_complete_request(struct request *rq); | 28 | void __blk_mq_complete_request(struct request *rq); |
26 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); | 29 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
27 | void blk_mq_init_flush(struct request_queue *q); | 30 | void blk_mq_init_flush(struct request_queue *q); |
28 | void blk_mq_drain_queue(struct request_queue *q); | 31 | void blk_mq_drain_queue(struct request_queue *q); |
29 | void blk_mq_free_queue(struct request_queue *q); | 32 | void blk_mq_free_queue(struct request_queue *q); |
30 | void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq); | 33 | void blk_mq_clone_flush_request(struct request *flush_rq, |
34 | struct request *orig_rq); | ||
35 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); | ||
31 | 36 | ||
32 | /* | 37 | /* |
33 | * CPU hotplug helpers | 38 | * CPU hotplug helpers |
34 | */ | 39 | */ |
35 | struct blk_mq_cpu_notifier; | 40 | struct blk_mq_cpu_notifier; |
36 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | 41 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, |
37 | void (*fn)(void *, unsigned long, unsigned int), | 42 | int (*fn)(void *, unsigned long, unsigned int), |
38 | void *data); | 43 | void *data); |
39 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); | 44 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); |
40 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); | 45 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); |
@@ -45,10 +50,23 @@ void blk_mq_disable_hotplug(void); | |||
45 | /* | 50 | /* |
46 | * CPU -> queue mappings | 51 | * CPU -> queue mappings |
47 | */ | 52 | */ |
48 | struct blk_mq_reg; | 53 | extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); |
49 | extern unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg); | ||
50 | extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); | 54 | extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); |
55 | extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); | ||
51 | 56 | ||
52 | void blk_mq_add_timer(struct request *rq); | 57 | /* |
58 | * sysfs helpers | ||
59 | */ | ||
60 | extern int blk_mq_sysfs_register(struct request_queue *q); | ||
61 | extern void blk_mq_sysfs_unregister(struct request_queue *q); | ||
62 | |||
63 | /* | ||
64 | * Basic implementation of sparser bitmap, allowing the user to spread | ||
65 | * the bits over more cachelines. | ||
66 | */ | ||
67 | struct blk_align_bitmap { | ||
68 | unsigned long word; | ||
69 | unsigned long depth; | ||
70 | } ____cacheline_aligned_in_smp; | ||
53 | 71 | ||
54 | #endif | 72 | #endif |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 7500f876dae4..23321fbab293 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page) | |||
48 | static ssize_t | 48 | static ssize_t |
49 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | 49 | queue_requests_store(struct request_queue *q, const char *page, size_t count) |
50 | { | 50 | { |
51 | struct request_list *rl; | ||
52 | unsigned long nr; | 51 | unsigned long nr; |
53 | int ret; | 52 | int ret, err; |
54 | 53 | ||
55 | if (!q->request_fn) | 54 | if (!q->request_fn && !q->mq_ops) |
56 | return -EINVAL; | 55 | return -EINVAL; |
57 | 56 | ||
58 | ret = queue_var_store(&nr, page, count); | 57 | ret = queue_var_store(&nr, page, count); |
@@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
62 | if (nr < BLKDEV_MIN_RQ) | 61 | if (nr < BLKDEV_MIN_RQ) |
63 | nr = BLKDEV_MIN_RQ; | 62 | nr = BLKDEV_MIN_RQ; |
64 | 63 | ||
65 | spin_lock_irq(q->queue_lock); | 64 | if (q->request_fn) |
66 | q->nr_requests = nr; | 65 | err = blk_update_nr_requests(q, nr); |
67 | blk_queue_congestion_threshold(q); | 66 | else |
68 | 67 | err = blk_mq_update_nr_requests(q, nr); | |
69 | /* congestion isn't cgroup aware and follows root blkcg for now */ | 68 | |
70 | rl = &q->root_rl; | 69 | if (err) |
71 | 70 | return err; | |
72 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) | ||
73 | blk_set_queue_congested(q, BLK_RW_SYNC); | ||
74 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) | ||
75 | blk_clear_queue_congested(q, BLK_RW_SYNC); | ||
76 | |||
77 | if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) | ||
78 | blk_set_queue_congested(q, BLK_RW_ASYNC); | ||
79 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) | ||
80 | blk_clear_queue_congested(q, BLK_RW_ASYNC); | ||
81 | |||
82 | blk_queue_for_each_rl(rl, q) { | ||
83 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | ||
84 | blk_set_rl_full(rl, BLK_RW_SYNC); | ||
85 | } else { | ||
86 | blk_clear_rl_full(rl, BLK_RW_SYNC); | ||
87 | wake_up(&rl->wait[BLK_RW_SYNC]); | ||
88 | } | ||
89 | |||
90 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
91 | blk_set_rl_full(rl, BLK_RW_ASYNC); | ||
92 | } else { | ||
93 | blk_clear_rl_full(rl, BLK_RW_ASYNC); | ||
94 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
95 | } | ||
96 | } | ||
97 | 71 | ||
98 | spin_unlock_irq(q->queue_lock); | ||
99 | return ret; | 72 | return ret; |
100 | } | 73 | } |
101 | 74 | ||
@@ -544,8 +517,6 @@ static void blk_release_queue(struct kobject *kobj) | |||
544 | if (q->queue_tags) | 517 | if (q->queue_tags) |
545 | __blk_queue_free_tags(q); | 518 | __blk_queue_free_tags(q); |
546 | 519 | ||
547 | percpu_counter_destroy(&q->mq_usage_counter); | ||
548 | |||
549 | if (q->mq_ops) | 520 | if (q->mq_ops) |
550 | blk_mq_free_queue(q); | 521 | blk_mq_free_queue(q); |
551 | 522 | ||
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 033745cd7fba..9353b4683359 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -744,7 +744,7 @@ static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw, | |||
744 | static bool throtl_slice_used(struct throtl_grp *tg, bool rw) | 744 | static bool throtl_slice_used(struct throtl_grp *tg, bool rw) |
745 | { | 745 | { |
746 | if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) | 746 | if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) |
747 | return 0; | 747 | return false; |
748 | 748 | ||
749 | return 1; | 749 | return 1; |
750 | } | 750 | } |
@@ -842,7 +842,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, | |||
842 | if (tg->io_disp[rw] + 1 <= io_allowed) { | 842 | if (tg->io_disp[rw] + 1 <= io_allowed) { |
843 | if (wait) | 843 | if (wait) |
844 | *wait = 0; | 844 | *wait = 0; |
845 | return 1; | 845 | return true; |
846 | } | 846 | } |
847 | 847 | ||
848 | /* Calc approx time to dispatch */ | 848 | /* Calc approx time to dispatch */ |
@@ -880,7 +880,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, | |||
880 | if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) { | 880 | if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) { |
881 | if (wait) | 881 | if (wait) |
882 | *wait = 0; | 882 | *wait = 0; |
883 | return 1; | 883 | return true; |
884 | } | 884 | } |
885 | 885 | ||
886 | /* Calc approx time to dispatch */ | 886 | /* Calc approx time to dispatch */ |
@@ -923,7 +923,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, | |||
923 | if (tg->bps[rw] == -1 && tg->iops[rw] == -1) { | 923 | if (tg->bps[rw] == -1 && tg->iops[rw] == -1) { |
924 | if (wait) | 924 | if (wait) |
925 | *wait = 0; | 925 | *wait = 0; |
926 | return 1; | 926 | return true; |
927 | } | 927 | } |
928 | 928 | ||
929 | /* | 929 | /* |
@@ -1258,7 +1258,7 @@ out_unlock: | |||
1258 | * of throtl_data->service_queue. Those bio's are ready and issued by this | 1258 | * of throtl_data->service_queue. Those bio's are ready and issued by this |
1259 | * function. | 1259 | * function. |
1260 | */ | 1260 | */ |
1261 | void blk_throtl_dispatch_work_fn(struct work_struct *work) | 1261 | static void blk_throtl_dispatch_work_fn(struct work_struct *work) |
1262 | { | 1262 | { |
1263 | struct throtl_data *td = container_of(work, struct throtl_data, | 1263 | struct throtl_data *td = container_of(work, struct throtl_data, |
1264 | dispatch_work); | 1264 | dispatch_work); |
diff --git a/block/blk-timeout.c b/block/blk-timeout.c index d96f7061c6fd..95a09590ccfd 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c | |||
@@ -96,11 +96,7 @@ static void blk_rq_timed_out(struct request *req) | |||
96 | __blk_complete_request(req); | 96 | __blk_complete_request(req); |
97 | break; | 97 | break; |
98 | case BLK_EH_RESET_TIMER: | 98 | case BLK_EH_RESET_TIMER: |
99 | if (q->mq_ops) | 99 | blk_add_timer(req); |
100 | blk_mq_add_timer(req); | ||
101 | else | ||
102 | blk_add_timer(req); | ||
103 | |||
104 | blk_clear_rq_complete(req); | 100 | blk_clear_rq_complete(req); |
105 | break; | 101 | break; |
106 | case BLK_EH_NOT_HANDLED: | 102 | case BLK_EH_NOT_HANDLED: |
@@ -170,7 +166,26 @@ void blk_abort_request(struct request *req) | |||
170 | } | 166 | } |
171 | EXPORT_SYMBOL_GPL(blk_abort_request); | 167 | EXPORT_SYMBOL_GPL(blk_abort_request); |
172 | 168 | ||
173 | void __blk_add_timer(struct request *req, struct list_head *timeout_list) | 169 | unsigned long blk_rq_timeout(unsigned long timeout) |
170 | { | ||
171 | unsigned long maxt; | ||
172 | |||
173 | maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT); | ||
174 | if (time_after(timeout, maxt)) | ||
175 | timeout = maxt; | ||
176 | |||
177 | return timeout; | ||
178 | } | ||
179 | |||
180 | /** | ||
181 | * blk_add_timer - Start timeout timer for a single request | ||
182 | * @req: request that is about to start running. | ||
183 | * | ||
184 | * Notes: | ||
185 | * Each request has its own timer, and as it is added to the queue, we | ||
186 | * set up the timer. When the request completes, we cancel the timer. | ||
187 | */ | ||
188 | void blk_add_timer(struct request *req) | ||
174 | { | 189 | { |
175 | struct request_queue *q = req->q; | 190 | struct request_queue *q = req->q; |
176 | unsigned long expiry; | 191 | unsigned long expiry; |
@@ -188,32 +203,29 @@ void __blk_add_timer(struct request *req, struct list_head *timeout_list) | |||
188 | req->timeout = q->rq_timeout; | 203 | req->timeout = q->rq_timeout; |
189 | 204 | ||
190 | req->deadline = jiffies + req->timeout; | 205 | req->deadline = jiffies + req->timeout; |
191 | if (timeout_list) | 206 | if (!q->mq_ops) |
192 | list_add_tail(&req->timeout_list, timeout_list); | 207 | list_add_tail(&req->timeout_list, &req->q->timeout_list); |
193 | 208 | ||
194 | /* | 209 | /* |
195 | * If the timer isn't already pending or this timeout is earlier | 210 | * If the timer isn't already pending or this timeout is earlier |
196 | * than an existing one, modify the timer. Round up to next nearest | 211 | * than an existing one, modify the timer. Round up to next nearest |
197 | * second. | 212 | * second. |
198 | */ | 213 | */ |
199 | expiry = round_jiffies_up(req->deadline); | 214 | expiry = blk_rq_timeout(round_jiffies_up(req->deadline)); |
200 | 215 | ||
201 | if (!timer_pending(&q->timeout) || | 216 | if (!timer_pending(&q->timeout) || |
202 | time_before(expiry, q->timeout.expires)) | 217 | time_before(expiry, q->timeout.expires)) { |
203 | mod_timer(&q->timeout, expiry); | 218 | unsigned long diff = q->timeout.expires - expiry; |
204 | 219 | ||
205 | } | 220 | /* |
221 | * Due to added timer slack to group timers, the timer | ||
222 | * will often be a little in front of what we asked for. | ||
223 | * So apply some tolerance here too, otherwise we keep | ||
224 | * modifying the timer because expires for value X | ||
225 | * will be X + something. | ||
226 | */ | ||
227 | if (!timer_pending(&q->timeout) || (diff >= HZ / 2)) | ||
228 | mod_timer(&q->timeout, expiry); | ||
229 | } | ||
206 | 230 | ||
207 | /** | ||
208 | * blk_add_timer - Start timeout timer for a single request | ||
209 | * @req: request that is about to start running. | ||
210 | * | ||
211 | * Notes: | ||
212 | * Each request has its own timer, and as it is added to the queue, we | ||
213 | * set up the timer. When the request completes, we cancel the timer. | ||
214 | */ | ||
215 | void blk_add_timer(struct request *req) | ||
216 | { | ||
217 | __blk_add_timer(req, &req->q->timeout_list); | ||
218 | } | 231 | } |
219 | |||
diff --git a/block/blk.h b/block/blk.h index 1d880f1f957f..45385e9abf6f 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -9,6 +9,9 @@ | |||
9 | /* Number of requests a "batching" process may submit */ | 9 | /* Number of requests a "batching" process may submit */ |
10 | #define BLK_BATCH_REQ 32 | 10 | #define BLK_BATCH_REQ 32 |
11 | 11 | ||
12 | /* Max future timer expiry for timeouts */ | ||
13 | #define BLK_MAX_TIMEOUT (5 * HZ) | ||
14 | |||
12 | extern struct kmem_cache *blk_requestq_cachep; | 15 | extern struct kmem_cache *blk_requestq_cachep; |
13 | extern struct kmem_cache *request_cachep; | 16 | extern struct kmem_cache *request_cachep; |
14 | extern struct kobj_type blk_queue_ktype; | 17 | extern struct kobj_type blk_queue_ktype; |
@@ -37,9 +40,9 @@ bool __blk_end_bidi_request(struct request *rq, int error, | |||
37 | void blk_rq_timed_out_timer(unsigned long data); | 40 | void blk_rq_timed_out_timer(unsigned long data); |
38 | void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, | 41 | void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, |
39 | unsigned int *next_set); | 42 | unsigned int *next_set); |
40 | void __blk_add_timer(struct request *req, struct list_head *timeout_list); | 43 | unsigned long blk_rq_timeout(unsigned long timeout); |
44 | void blk_add_timer(struct request *req); | ||
41 | void blk_delete_timer(struct request *); | 45 | void blk_delete_timer(struct request *); |
42 | void blk_add_timer(struct request *); | ||
43 | 46 | ||
44 | 47 | ||
45 | bool bio_attempt_front_merge(struct request_queue *q, struct request *req, | 48 | bool bio_attempt_front_merge(struct request_queue *q, struct request *req, |
@@ -185,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) | |||
185 | return q->nr_congestion_off; | 188 | return q->nr_congestion_off; |
186 | } | 189 | } |
187 | 190 | ||
191 | extern int blk_update_nr_requests(struct request_queue *, unsigned int); | ||
192 | |||
188 | /* | 193 | /* |
189 | * Contribute to IO statistics IFF: | 194 | * Contribute to IO statistics IFF: |
190 | * | 195 | * |
diff --git a/mm/bounce.c b/block/bounce.c index 523918b8c6dc..523918b8c6dc 100644 --- a/mm/bounce.c +++ b/block/bounce.c | |||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e0985f1955e7..22dffebc7c73 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -908,7 +908,7 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) | |||
908 | { | 908 | { |
909 | if (cfqd->busy_queues) { | 909 | if (cfqd->busy_queues) { |
910 | cfq_log(cfqd, "schedule dispatch"); | 910 | cfq_log(cfqd, "schedule dispatch"); |
911 | kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); | 911 | kblockd_schedule_work(&cfqd->unplug_work); |
912 | } | 912 | } |
913 | } | 913 | } |
914 | 914 | ||
@@ -4460,7 +4460,7 @@ out_free: | |||
4460 | static ssize_t | 4460 | static ssize_t |
4461 | cfq_var_show(unsigned int var, char *page) | 4461 | cfq_var_show(unsigned int var, char *page) |
4462 | { | 4462 | { |
4463 | return sprintf(page, "%d\n", var); | 4463 | return sprintf(page, "%u\n", var); |
4464 | } | 4464 | } |
4465 | 4465 | ||
4466 | static ssize_t | 4466 | static ssize_t |
diff --git a/fs/ioprio.c b/block/ioprio.c index e50170ca7c33..e50170ca7c33 100644 --- a/fs/ioprio.c +++ b/block/ioprio.c | |||
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 26487972ac54..9c28a5b38042 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c | |||
@@ -205,10 +205,6 @@ int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm) | |||
205 | if (capable(CAP_SYS_RAWIO)) | 205 | if (capable(CAP_SYS_RAWIO)) |
206 | return 0; | 206 | return 0; |
207 | 207 | ||
208 | /* if there's no filter set, assume we're filtering everything out */ | ||
209 | if (!filter) | ||
210 | return -EPERM; | ||
211 | |||
212 | /* Anybody who can open the device can do a read-safe command */ | 208 | /* Anybody who can open the device can do a read-safe command */ |
213 | if (test_bit(cmd[0], filter->read_ok)) | 209 | if (test_bit(cmd[0], filter->read_ok)) |
214 | return 0; | 210 | return 0; |
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 748dea4f34dc..758da2287d9a 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c | |||
@@ -1406,7 +1406,7 @@ next_segment: | |||
1406 | 1406 | ||
1407 | track = block / (floppy->dtype->sects * floppy->type->sect_mult); | 1407 | track = block / (floppy->dtype->sects * floppy->type->sect_mult); |
1408 | sector = block % (floppy->dtype->sects * floppy->type->sect_mult); | 1408 | sector = block % (floppy->dtype->sects * floppy->type->sect_mult); |
1409 | data = rq->buffer + 512 * cnt; | 1409 | data = bio_data(rq->bio) + 512 * cnt; |
1410 | #ifdef DEBUG | 1410 | #ifdef DEBUG |
1411 | printk("access to track %d, sector %d, with buffer at " | 1411 | printk("access to track %d, sector %d, with buffer at " |
1412 | "0x%08lx\n", track, sector, data); | 1412 | "0x%08lx\n", track, sector, data); |
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index cfa64bdf01c9..2104b1b4ccda 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c | |||
@@ -1484,7 +1484,7 @@ repeat: | |||
1484 | ReqCnt = 0; | 1484 | ReqCnt = 0; |
1485 | ReqCmd = rq_data_dir(fd_request); | 1485 | ReqCmd = rq_data_dir(fd_request); |
1486 | ReqBlock = blk_rq_pos(fd_request); | 1486 | ReqBlock = blk_rq_pos(fd_request); |
1487 | ReqBuffer = fd_request->buffer; | 1487 | ReqBuffer = bio_data(fd_request->bio); |
1488 | setup_req_params( drive ); | 1488 | setup_req_params( drive ); |
1489 | do_fd_action( drive ); | 1489 | do_fd_action( drive ); |
1490 | 1490 | ||
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index fa9bb742df6e..dc3a41c82b38 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c | |||
@@ -2351,7 +2351,7 @@ static void rw_interrupt(void) | |||
2351 | } | 2351 | } |
2352 | 2352 | ||
2353 | if (CT(COMMAND) != FD_READ || | 2353 | if (CT(COMMAND) != FD_READ || |
2354 | raw_cmd->kernel_data == current_req->buffer) { | 2354 | raw_cmd->kernel_data == bio_data(current_req->bio)) { |
2355 | /* transfer directly from buffer */ | 2355 | /* transfer directly from buffer */ |
2356 | cont->done(1); | 2356 | cont->done(1); |
2357 | } else if (CT(COMMAND) == FD_READ) { | 2357 | } else if (CT(COMMAND) == FD_READ) { |
@@ -2640,7 +2640,7 @@ static int make_raw_rw_request(void) | |||
2640 | raw_cmd->flags &= ~FD_RAW_WRITE; | 2640 | raw_cmd->flags &= ~FD_RAW_WRITE; |
2641 | raw_cmd->flags |= FD_RAW_READ; | 2641 | raw_cmd->flags |= FD_RAW_READ; |
2642 | COMMAND = FM_MODE(_floppy, FD_READ); | 2642 | COMMAND = FM_MODE(_floppy, FD_READ); |
2643 | } else if ((unsigned long)current_req->buffer < MAX_DMA_ADDRESS) { | 2643 | } else if ((unsigned long)bio_data(current_req->bio) < MAX_DMA_ADDRESS) { |
2644 | unsigned long dma_limit; | 2644 | unsigned long dma_limit; |
2645 | int direct, indirect; | 2645 | int direct, indirect; |
2646 | 2646 | ||
@@ -2654,13 +2654,13 @@ static int make_raw_rw_request(void) | |||
2654 | */ | 2654 | */ |
2655 | max_size = buffer_chain_size(); | 2655 | max_size = buffer_chain_size(); |
2656 | dma_limit = (MAX_DMA_ADDRESS - | 2656 | dma_limit = (MAX_DMA_ADDRESS - |
2657 | ((unsigned long)current_req->buffer)) >> 9; | 2657 | ((unsigned long)bio_data(current_req->bio))) >> 9; |
2658 | if ((unsigned long)max_size > dma_limit) | 2658 | if ((unsigned long)max_size > dma_limit) |
2659 | max_size = dma_limit; | 2659 | max_size = dma_limit; |
2660 | /* 64 kb boundaries */ | 2660 | /* 64 kb boundaries */ |
2661 | if (CROSS_64KB(current_req->buffer, max_size << 9)) | 2661 | if (CROSS_64KB(bio_data(current_req->bio), max_size << 9)) |
2662 | max_size = (K_64 - | 2662 | max_size = (K_64 - |
2663 | ((unsigned long)current_req->buffer) % | 2663 | ((unsigned long)bio_data(current_req->bio)) % |
2664 | K_64) >> 9; | 2664 | K_64) >> 9; |
2665 | direct = transfer_size(ssize, max_sector, max_size) - fsector_t; | 2665 | direct = transfer_size(ssize, max_sector, max_size) - fsector_t; |
2666 | /* | 2666 | /* |
@@ -2677,7 +2677,7 @@ static int make_raw_rw_request(void) | |||
2677 | (DP->read_track & (1 << DRS->probed_format)))))) { | 2677 | (DP->read_track & (1 << DRS->probed_format)))))) { |
2678 | max_size = blk_rq_sectors(current_req); | 2678 | max_size = blk_rq_sectors(current_req); |
2679 | } else { | 2679 | } else { |
2680 | raw_cmd->kernel_data = current_req->buffer; | 2680 | raw_cmd->kernel_data = bio_data(current_req->bio); |
2681 | raw_cmd->length = current_count_sectors << 9; | 2681 | raw_cmd->length = current_count_sectors << 9; |
2682 | if (raw_cmd->length == 0) { | 2682 | if (raw_cmd->length == 0) { |
2683 | DPRINT("%s: zero dma transfer attempted\n", __func__); | 2683 | DPRINT("%s: zero dma transfer attempted\n", __func__); |
@@ -2731,7 +2731,7 @@ static int make_raw_rw_request(void) | |||
2731 | raw_cmd->length = ((raw_cmd->length - 1) | (ssize - 1)) + 1; | 2731 | raw_cmd->length = ((raw_cmd->length - 1) | (ssize - 1)) + 1; |
2732 | raw_cmd->length <<= 9; | 2732 | raw_cmd->length <<= 9; |
2733 | if ((raw_cmd->length < current_count_sectors << 9) || | 2733 | if ((raw_cmd->length < current_count_sectors << 9) || |
2734 | (raw_cmd->kernel_data != current_req->buffer && | 2734 | (raw_cmd->kernel_data != bio_data(current_req->bio) && |
2735 | CT(COMMAND) == FD_WRITE && | 2735 | CT(COMMAND) == FD_WRITE && |
2736 | (aligned_sector_t + (raw_cmd->length >> 9) > buffer_max || | 2736 | (aligned_sector_t + (raw_cmd->length >> 9) > buffer_max || |
2737 | aligned_sector_t < buffer_min)) || | 2737 | aligned_sector_t < buffer_min)) || |
@@ -2739,7 +2739,7 @@ static int make_raw_rw_request(void) | |||
2739 | raw_cmd->length <= 0 || current_count_sectors <= 0) { | 2739 | raw_cmd->length <= 0 || current_count_sectors <= 0) { |
2740 | DPRINT("fractionary current count b=%lx s=%lx\n", | 2740 | DPRINT("fractionary current count b=%lx s=%lx\n", |
2741 | raw_cmd->length, current_count_sectors); | 2741 | raw_cmd->length, current_count_sectors); |
2742 | if (raw_cmd->kernel_data != current_req->buffer) | 2742 | if (raw_cmd->kernel_data != bio_data(current_req->bio)) |
2743 | pr_info("addr=%d, length=%ld\n", | 2743 | pr_info("addr=%d, length=%ld\n", |
2744 | (int)((raw_cmd->kernel_data - | 2744 | (int)((raw_cmd->kernel_data - |
2745 | floppy_track_buffer) >> 9), | 2745 | floppy_track_buffer) >> 9), |
@@ -2756,7 +2756,7 @@ static int make_raw_rw_request(void) | |||
2756 | return 0; | 2756 | return 0; |
2757 | } | 2757 | } |
2758 | 2758 | ||
2759 | if (raw_cmd->kernel_data != current_req->buffer) { | 2759 | if (raw_cmd->kernel_data != bio_data(current_req->bio)) { |
2760 | if (raw_cmd->kernel_data < floppy_track_buffer || | 2760 | if (raw_cmd->kernel_data < floppy_track_buffer || |
2761 | current_count_sectors < 0 || | 2761 | current_count_sectors < 0 || |
2762 | raw_cmd->length < 0 || | 2762 | raw_cmd->length < 0 || |
diff --git a/drivers/block/hd.c b/drivers/block/hd.c index bf397bf108b7..8a290c08262f 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c | |||
@@ -464,11 +464,11 @@ static void read_intr(void) | |||
464 | 464 | ||
465 | ok_to_read: | 465 | ok_to_read: |
466 | req = hd_req; | 466 | req = hd_req; |
467 | insw(HD_DATA, req->buffer, 256); | 467 | insw(HD_DATA, bio_data(req->bio), 256); |
468 | #ifdef DEBUG | 468 | #ifdef DEBUG |
469 | printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", | 469 | printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", |
470 | req->rq_disk->disk_name, blk_rq_pos(req) + 1, | 470 | req->rq_disk->disk_name, blk_rq_pos(req) + 1, |
471 | blk_rq_sectors(req) - 1, req->buffer+512); | 471 | blk_rq_sectors(req) - 1, bio_data(req->bio)+512); |
472 | #endif | 472 | #endif |
473 | if (hd_end_request(0, 512)) { | 473 | if (hd_end_request(0, 512)) { |
474 | SET_HANDLER(&read_intr); | 474 | SET_HANDLER(&read_intr); |
@@ -505,7 +505,7 @@ static void write_intr(void) | |||
505 | ok_to_write: | 505 | ok_to_write: |
506 | if (hd_end_request(0, 512)) { | 506 | if (hd_end_request(0, 512)) { |
507 | SET_HANDLER(&write_intr); | 507 | SET_HANDLER(&write_intr); |
508 | outsw(HD_DATA, req->buffer, 256); | 508 | outsw(HD_DATA, bio_data(req->bio), 256); |
509 | return; | 509 | return; |
510 | } | 510 | } |
511 | 511 | ||
@@ -624,7 +624,7 @@ repeat: | |||
624 | printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", | 624 | printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", |
625 | req->rq_disk->disk_name, | 625 | req->rq_disk->disk_name, |
626 | req_data_dir(req) == READ ? "read" : "writ", | 626 | req_data_dir(req) == READ ? "read" : "writ", |
627 | cyl, head, sec, nsect, req->buffer); | 627 | cyl, head, sec, nsect, bio_data(req->bio)); |
628 | #endif | 628 | #endif |
629 | if (req->cmd_type == REQ_TYPE_FS) { | 629 | if (req->cmd_type == REQ_TYPE_FS) { |
630 | switch (rq_data_dir(req)) { | 630 | switch (rq_data_dir(req)) { |
@@ -643,7 +643,7 @@ repeat: | |||
643 | bad_rw_intr(); | 643 | bad_rw_intr(); |
644 | goto repeat; | 644 | goto repeat; |
645 | } | 645 | } |
646 | outsw(HD_DATA, req->buffer, 256); | 646 | outsw(HD_DATA, bio_data(req->bio), 256); |
647 | break; | 647 | break; |
648 | default: | 648 | default: |
649 | printk("unknown hd-command\n"); | 649 | printk("unknown hd-command\n"); |
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index eb59b1241366..e352cac707e8 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c | |||
@@ -479,7 +479,7 @@ static unsigned int mg_out(struct mg_host *host, | |||
479 | 479 | ||
480 | static void mg_read_one(struct mg_host *host, struct request *req) | 480 | static void mg_read_one(struct mg_host *host, struct request *req) |
481 | { | 481 | { |
482 | u16 *buff = (u16 *)req->buffer; | 482 | u16 *buff = (u16 *)bio_data(req->bio); |
483 | u32 i; | 483 | u32 i; |
484 | 484 | ||
485 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | 485 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) |
@@ -496,7 +496,7 @@ static void mg_read(struct request *req) | |||
496 | mg_bad_rw_intr(host); | 496 | mg_bad_rw_intr(host); |
497 | 497 | ||
498 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", | 498 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", |
499 | blk_rq_sectors(req), blk_rq_pos(req), req->buffer); | 499 | blk_rq_sectors(req), blk_rq_pos(req), bio_data(req->bio)); |
500 | 500 | ||
501 | do { | 501 | do { |
502 | if (mg_wait(host, ATA_DRQ, | 502 | if (mg_wait(host, ATA_DRQ, |
@@ -514,7 +514,7 @@ static void mg_read(struct request *req) | |||
514 | 514 | ||
515 | static void mg_write_one(struct mg_host *host, struct request *req) | 515 | static void mg_write_one(struct mg_host *host, struct request *req) |
516 | { | 516 | { |
517 | u16 *buff = (u16 *)req->buffer; | 517 | u16 *buff = (u16 *)bio_data(req->bio); |
518 | u32 i; | 518 | u32 i; |
519 | 519 | ||
520 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | 520 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) |
@@ -534,7 +534,7 @@ static void mg_write(struct request *req) | |||
534 | } | 534 | } |
535 | 535 | ||
536 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", | 536 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", |
537 | rem, blk_rq_pos(req), req->buffer); | 537 | rem, blk_rq_pos(req), bio_data(req->bio)); |
538 | 538 | ||
539 | if (mg_wait(host, ATA_DRQ, | 539 | if (mg_wait(host, ATA_DRQ, |
540 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | 540 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { |
@@ -585,7 +585,7 @@ ok_to_read: | |||
585 | mg_read_one(host, req); | 585 | mg_read_one(host, req); |
586 | 586 | ||
587 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", | 587 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", |
588 | blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer); | 588 | blk_rq_pos(req), blk_rq_sectors(req) - 1, bio_data(req->bio)); |
589 | 589 | ||
590 | /* send read confirm */ | 590 | /* send read confirm */ |
591 | outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); | 591 | outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); |
@@ -624,7 +624,7 @@ ok_to_write: | |||
624 | /* write 1 sector and set handler if remains */ | 624 | /* write 1 sector and set handler if remains */ |
625 | mg_write_one(host, req); | 625 | mg_write_one(host, req); |
626 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", | 626 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", |
627 | blk_rq_pos(req), blk_rq_sectors(req), req->buffer); | 627 | blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio)); |
628 | host->mg_do_intr = mg_write_intr; | 628 | host->mg_do_intr = mg_write_intr; |
629 | mod_timer(&host->timer, jiffies + 3 * HZ); | 629 | mod_timer(&host->timer, jiffies + 3 * HZ); |
630 | } | 630 | } |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 091b9ea14feb..b40af63a5476 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
@@ -32,6 +32,7 @@ struct nullb { | |||
32 | unsigned int index; | 32 | unsigned int index; |
33 | struct request_queue *q; | 33 | struct request_queue *q; |
34 | struct gendisk *disk; | 34 | struct gendisk *disk; |
35 | struct blk_mq_tag_set tag_set; | ||
35 | struct hrtimer timer; | 36 | struct hrtimer timer; |
36 | unsigned int queue_depth; | 37 | unsigned int queue_depth; |
37 | spinlock_t lock; | 38 | spinlock_t lock; |
@@ -226,7 +227,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) | |||
226 | 227 | ||
227 | static void null_softirq_done_fn(struct request *rq) | 228 | static void null_softirq_done_fn(struct request *rq) |
228 | { | 229 | { |
229 | end_cmd(rq->special); | 230 | end_cmd(blk_mq_rq_to_pdu(rq)); |
230 | } | 231 | } |
231 | 232 | ||
232 | static inline void null_handle_cmd(struct nullb_cmd *cmd) | 233 | static inline void null_handle_cmd(struct nullb_cmd *cmd) |
@@ -311,7 +312,7 @@ static void null_request_fn(struct request_queue *q) | |||
311 | 312 | ||
312 | static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) | 313 | static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) |
313 | { | 314 | { |
314 | struct nullb_cmd *cmd = rq->special; | 315 | struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); |
315 | 316 | ||
316 | cmd->rq = rq; | 317 | cmd->rq = rq; |
317 | cmd->nq = hctx->driver_data; | 318 | cmd->nq = hctx->driver_data; |
@@ -320,46 +321,6 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) | |||
320 | return BLK_MQ_RQ_QUEUE_OK; | 321 | return BLK_MQ_RQ_QUEUE_OK; |
321 | } | 322 | } |
322 | 323 | ||
323 | static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) | ||
324 | { | ||
325 | int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes); | ||
326 | int tip = (reg->nr_hw_queues % nr_online_nodes); | ||
327 | int node = 0, i, n; | ||
328 | |||
329 | /* | ||
330 | * Split submit queues evenly wrt to the number of nodes. If uneven, | ||
331 | * fill the first buckets with one extra, until the rest is filled with | ||
332 | * no extra. | ||
333 | */ | ||
334 | for (i = 0, n = 1; i < hctx_index; i++, n++) { | ||
335 | if (n % b_size == 0) { | ||
336 | n = 0; | ||
337 | node++; | ||
338 | |||
339 | tip--; | ||
340 | if (!tip) | ||
341 | b_size = reg->nr_hw_queues / nr_online_nodes; | ||
342 | } | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * A node might not be online, therefore map the relative node id to the | ||
347 | * real node id. | ||
348 | */ | ||
349 | for_each_online_node(n) { | ||
350 | if (!node) | ||
351 | break; | ||
352 | node--; | ||
353 | } | ||
354 | |||
355 | return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n); | ||
356 | } | ||
357 | |||
358 | static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) | ||
359 | { | ||
360 | kfree(hctx); | ||
361 | } | ||
362 | |||
363 | static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) | 324 | static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) |
364 | { | 325 | { |
365 | BUG_ON(!nullb); | 326 | BUG_ON(!nullb); |
@@ -389,19 +350,14 @@ static struct blk_mq_ops null_mq_ops = { | |||
389 | .complete = null_softirq_done_fn, | 350 | .complete = null_softirq_done_fn, |
390 | }; | 351 | }; |
391 | 352 | ||
392 | static struct blk_mq_reg null_mq_reg = { | ||
393 | .ops = &null_mq_ops, | ||
394 | .queue_depth = 64, | ||
395 | .cmd_size = sizeof(struct nullb_cmd), | ||
396 | .flags = BLK_MQ_F_SHOULD_MERGE, | ||
397 | }; | ||
398 | |||
399 | static void null_del_dev(struct nullb *nullb) | 353 | static void null_del_dev(struct nullb *nullb) |
400 | { | 354 | { |
401 | list_del_init(&nullb->list); | 355 | list_del_init(&nullb->list); |
402 | 356 | ||
403 | del_gendisk(nullb->disk); | 357 | del_gendisk(nullb->disk); |
404 | blk_cleanup_queue(nullb->q); | 358 | blk_cleanup_queue(nullb->q); |
359 | if (queue_mode == NULL_Q_MQ) | ||
360 | blk_mq_free_tag_set(&nullb->tag_set); | ||
405 | put_disk(nullb->disk); | 361 | put_disk(nullb->disk); |
406 | kfree(nullb); | 362 | kfree(nullb); |
407 | } | 363 | } |
@@ -506,7 +462,7 @@ static int null_add_dev(void) | |||
506 | 462 | ||
507 | nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); | 463 | nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); |
508 | if (!nullb) | 464 | if (!nullb) |
509 | return -ENOMEM; | 465 | goto out; |
510 | 466 | ||
511 | spin_lock_init(&nullb->lock); | 467 | spin_lock_init(&nullb->lock); |
512 | 468 | ||
@@ -514,49 +470,44 @@ static int null_add_dev(void) | |||
514 | submit_queues = nr_online_nodes; | 470 | submit_queues = nr_online_nodes; |
515 | 471 | ||
516 | if (setup_queues(nullb)) | 472 | if (setup_queues(nullb)) |
517 | goto err; | 473 | goto out_free_nullb; |
518 | 474 | ||
519 | if (queue_mode == NULL_Q_MQ) { | 475 | if (queue_mode == NULL_Q_MQ) { |
520 | null_mq_reg.numa_node = home_node; | 476 | nullb->tag_set.ops = &null_mq_ops; |
521 | null_mq_reg.queue_depth = hw_queue_depth; | 477 | nullb->tag_set.nr_hw_queues = submit_queues; |
522 | null_mq_reg.nr_hw_queues = submit_queues; | 478 | nullb->tag_set.queue_depth = hw_queue_depth; |
523 | 479 | nullb->tag_set.numa_node = home_node; | |
524 | if (use_per_node_hctx) { | 480 | nullb->tag_set.cmd_size = sizeof(struct nullb_cmd); |
525 | null_mq_reg.ops->alloc_hctx = null_alloc_hctx; | 481 | nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; |
526 | null_mq_reg.ops->free_hctx = null_free_hctx; | 482 | nullb->tag_set.driver_data = nullb; |
527 | } else { | 483 | |
528 | null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue; | 484 | if (blk_mq_alloc_tag_set(&nullb->tag_set)) |
529 | null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue; | 485 | goto out_cleanup_queues; |
530 | } | 486 | |
531 | 487 | nullb->q = blk_mq_init_queue(&nullb->tag_set); | |
532 | nullb->q = blk_mq_init_queue(&null_mq_reg, nullb); | 488 | if (!nullb->q) |
489 | goto out_cleanup_tags; | ||
533 | } else if (queue_mode == NULL_Q_BIO) { | 490 | } else if (queue_mode == NULL_Q_BIO) { |
534 | nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); | 491 | nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); |
492 | if (!nullb->q) | ||
493 | goto out_cleanup_queues; | ||
535 | blk_queue_make_request(nullb->q, null_queue_bio); | 494 | blk_queue_make_request(nullb->q, null_queue_bio); |
536 | init_driver_queues(nullb); | 495 | init_driver_queues(nullb); |
537 | } else { | 496 | } else { |
538 | nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); | 497 | nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); |
498 | if (!nullb->q) | ||
499 | goto out_cleanup_queues; | ||
539 | blk_queue_prep_rq(nullb->q, null_rq_prep_fn); | 500 | blk_queue_prep_rq(nullb->q, null_rq_prep_fn); |
540 | if (nullb->q) | 501 | blk_queue_softirq_done(nullb->q, null_softirq_done_fn); |
541 | blk_queue_softirq_done(nullb->q, null_softirq_done_fn); | ||
542 | init_driver_queues(nullb); | 502 | init_driver_queues(nullb); |
543 | } | 503 | } |
544 | 504 | ||
545 | if (!nullb->q) | ||
546 | goto queue_fail; | ||
547 | |||
548 | nullb->q->queuedata = nullb; | 505 | nullb->q->queuedata = nullb; |
549 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); | 506 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); |
550 | 507 | ||
551 | disk = nullb->disk = alloc_disk_node(1, home_node); | 508 | disk = nullb->disk = alloc_disk_node(1, home_node); |
552 | if (!disk) { | 509 | if (!disk) |
553 | queue_fail: | 510 | goto out_cleanup_blk_queue; |
554 | blk_cleanup_queue(nullb->q); | ||
555 | cleanup_queues(nullb); | ||
556 | err: | ||
557 | kfree(nullb); | ||
558 | return -ENOMEM; | ||
559 | } | ||
560 | 511 | ||
561 | mutex_lock(&lock); | 512 | mutex_lock(&lock); |
562 | list_add_tail(&nullb->list, &nullb_list); | 513 | list_add_tail(&nullb->list, &nullb_list); |
@@ -579,6 +530,18 @@ err: | |||
579 | sprintf(disk->disk_name, "nullb%d", nullb->index); | 530 | sprintf(disk->disk_name, "nullb%d", nullb->index); |
580 | add_disk(disk); | 531 | add_disk(disk); |
581 | return 0; | 532 | return 0; |
533 | |||
534 | out_cleanup_blk_queue: | ||
535 | blk_cleanup_queue(nullb->q); | ||
536 | out_cleanup_tags: | ||
537 | if (queue_mode == NULL_Q_MQ) | ||
538 | blk_mq_free_tag_set(&nullb->tag_set); | ||
539 | out_cleanup_queues: | ||
540 | cleanup_queues(nullb); | ||
541 | out_free_nullb: | ||
542 | kfree(nullb); | ||
543 | out: | ||
544 | return -ENOMEM; | ||
582 | } | 545 | } |
583 | 546 | ||
584 | static int __init null_init(void) | 547 | static int __init null_init(void) |
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index e76bdc074dbe..719cb1bc1640 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c | |||
@@ -747,7 +747,7 @@ static void do_pcd_request(struct request_queue * q) | |||
747 | pcd_current = cd; | 747 | pcd_current = cd; |
748 | pcd_sector = blk_rq_pos(pcd_req); | 748 | pcd_sector = blk_rq_pos(pcd_req); |
749 | pcd_count = blk_rq_cur_sectors(pcd_req); | 749 | pcd_count = blk_rq_cur_sectors(pcd_req); |
750 | pcd_buf = pcd_req->buffer; | 750 | pcd_buf = bio_data(pcd_req->bio); |
751 | pcd_busy = 1; | 751 | pcd_busy = 1; |
752 | ps_set_intr(do_pcd_read, NULL, 0, nice); | 752 | ps_set_intr(do_pcd_read, NULL, 0, nice); |
753 | return; | 753 | return; |
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 19ad8f0c83ef..fea7e76a00de 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c | |||
@@ -454,7 +454,7 @@ static enum action do_pd_io_start(void) | |||
454 | if (pd_block + pd_count > get_capacity(pd_req->rq_disk)) | 454 | if (pd_block + pd_count > get_capacity(pd_req->rq_disk)) |
455 | return Fail; | 455 | return Fail; |
456 | pd_run = blk_rq_sectors(pd_req); | 456 | pd_run = blk_rq_sectors(pd_req); |
457 | pd_buf = pd_req->buffer; | 457 | pd_buf = bio_data(pd_req->bio); |
458 | pd_retries = 0; | 458 | pd_retries = 0; |
459 | if (pd_cmd == READ) | 459 | if (pd_cmd == READ) |
460 | return do_pd_read_start(); | 460 | return do_pd_read_start(); |
@@ -485,7 +485,7 @@ static int pd_next_buf(void) | |||
485 | spin_lock_irqsave(&pd_lock, saved_flags); | 485 | spin_lock_irqsave(&pd_lock, saved_flags); |
486 | __blk_end_request_cur(pd_req, 0); | 486 | __blk_end_request_cur(pd_req, 0); |
487 | pd_count = blk_rq_cur_sectors(pd_req); | 487 | pd_count = blk_rq_cur_sectors(pd_req); |
488 | pd_buf = pd_req->buffer; | 488 | pd_buf = bio_data(pd_req->bio); |
489 | spin_unlock_irqrestore(&pd_lock, saved_flags); | 489 | spin_unlock_irqrestore(&pd_lock, saved_flags); |
490 | return 0; | 490 | return 0; |
491 | } | 491 | } |
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index f5c86d523ba0..9a15fd3c9349 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c | |||
@@ -795,7 +795,7 @@ repeat: | |||
795 | } | 795 | } |
796 | 796 | ||
797 | pf_cmd = rq_data_dir(pf_req); | 797 | pf_cmd = rq_data_dir(pf_req); |
798 | pf_buf = pf_req->buffer; | 798 | pf_buf = bio_data(pf_req->bio); |
799 | pf_retries = 0; | 799 | pf_retries = 0; |
800 | 800 | ||
801 | pf_busy = 1; | 801 | pf_busy = 1; |
@@ -827,7 +827,7 @@ static int pf_next_buf(void) | |||
827 | if (!pf_req) | 827 | if (!pf_req) |
828 | return 1; | 828 | return 1; |
829 | pf_count = blk_rq_cur_sectors(pf_req); | 829 | pf_count = blk_rq_cur_sectors(pf_req); |
830 | pf_buf = pf_req->buffer; | 830 | pf_buf = bio_data(pf_req->bio); |
831 | } | 831 | } |
832 | return 0; | 832 | return 0; |
833 | } | 833 | } |
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index a69dd93d1bd5..c48d9084c965 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c | |||
@@ -563,7 +563,6 @@ skd_prep_discard_cdb(struct skd_scsi_request *scsi_req, | |||
563 | 563 | ||
564 | req = skreq->req; | 564 | req = skreq->req; |
565 | blk_add_request_payload(req, page, len); | 565 | blk_add_request_payload(req, page, len); |
566 | req->buffer = buf; | ||
567 | } | 566 | } |
568 | 567 | ||
569 | static void skd_request_fn_not_online(struct request_queue *q); | 568 | static void skd_request_fn_not_online(struct request_queue *q); |
@@ -744,6 +743,7 @@ static void skd_request_fn(struct request_queue *q) | |||
744 | break; | 743 | break; |
745 | } | 744 | } |
746 | skreq->discard_page = 1; | 745 | skreq->discard_page = 1; |
746 | req->completion_data = page; | ||
747 | skd_prep_discard_cdb(scsi_req, skreq, page, lba, count); | 747 | skd_prep_discard_cdb(scsi_req, skreq, page, lba, count); |
748 | 748 | ||
749 | } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) { | 749 | } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) { |
@@ -858,8 +858,7 @@ static void skd_end_request(struct skd_device *skdev, | |||
858 | (skreq->discard_page == 1)) { | 858 | (skreq->discard_page == 1)) { |
859 | pr_debug("%s:%s:%d, free the page!", | 859 | pr_debug("%s:%s:%d, free the page!", |
860 | skdev->name, __func__, __LINE__); | 860 | skdev->name, __func__, __LINE__); |
861 | free_page((unsigned long)req->buffer); | 861 | __free_page(req->completion_data); |
862 | req->buffer = NULL; | ||
863 | } | 862 | } |
864 | 863 | ||
865 | if (unlikely(error)) { | 864 | if (unlikely(error)) { |
diff --git a/drivers/block/swim.c b/drivers/block/swim.c index b02d53a399f3..6b44bbe528b7 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c | |||
@@ -549,7 +549,7 @@ static void redo_fd_request(struct request_queue *q) | |||
549 | case READ: | 549 | case READ: |
550 | err = floppy_read_sectors(fs, blk_rq_pos(req), | 550 | err = floppy_read_sectors(fs, blk_rq_pos(req), |
551 | blk_rq_cur_sectors(req), | 551 | blk_rq_cur_sectors(req), |
552 | req->buffer); | 552 | bio_data(req->bio)); |
553 | break; | 553 | break; |
554 | } | 554 | } |
555 | done: | 555 | done: |
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index c74f7b56e7c4..523ee8fd4c15 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c | |||
@@ -342,7 +342,7 @@ static void start_request(struct floppy_state *fs) | |||
342 | swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n", | 342 | swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n", |
343 | req->rq_disk->disk_name, req->cmd, | 343 | req->rq_disk->disk_name, req->cmd, |
344 | (long)blk_rq_pos(req), blk_rq_sectors(req), | 344 | (long)blk_rq_pos(req), blk_rq_sectors(req), |
345 | req->buffer); | 345 | bio_data(req->bio)); |
346 | swim3_dbg(" errors=%d current_nr_sectors=%u\n", | 346 | swim3_dbg(" errors=%d current_nr_sectors=%u\n", |
347 | req->errors, blk_rq_cur_sectors(req)); | 347 | req->errors, blk_rq_cur_sectors(req)); |
348 | #endif | 348 | #endif |
@@ -479,11 +479,11 @@ static inline void setup_transfer(struct floppy_state *fs) | |||
479 | /* Set up 3 dma commands: write preamble, data, postamble */ | 479 | /* Set up 3 dma commands: write preamble, data, postamble */ |
480 | init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble)); | 480 | init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble)); |
481 | ++cp; | 481 | ++cp; |
482 | init_dma(cp, OUTPUT_MORE, req->buffer, 512); | 482 | init_dma(cp, OUTPUT_MORE, bio_data(req->bio), 512); |
483 | ++cp; | 483 | ++cp; |
484 | init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble)); | 484 | init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble)); |
485 | } else { | 485 | } else { |
486 | init_dma(cp, INPUT_LAST, req->buffer, n * 512); | 486 | init_dma(cp, INPUT_LAST, bio_data(req->bio), n * 512); |
487 | } | 487 | } |
488 | ++cp; | 488 | ++cp; |
489 | out_le16(&cp->command, DBDMA_STOP); | 489 | out_le16(&cp->command, DBDMA_STOP); |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index cb9b1f8326c3..c8f286e8d80f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -30,6 +30,9 @@ struct virtio_blk | |||
30 | /* The disk structure for the kernel. */ | 30 | /* The disk structure for the kernel. */ |
31 | struct gendisk *disk; | 31 | struct gendisk *disk; |
32 | 32 | ||
33 | /* Block layer tags. */ | ||
34 | struct blk_mq_tag_set tag_set; | ||
35 | |||
33 | /* Process context for config space updates */ | 36 | /* Process context for config space updates */ |
34 | struct work_struct config_work; | 37 | struct work_struct config_work; |
35 | 38 | ||
@@ -112,7 +115,7 @@ static int __virtblk_add_req(struct virtqueue *vq, | |||
112 | 115 | ||
113 | static inline void virtblk_request_done(struct request *req) | 116 | static inline void virtblk_request_done(struct request *req) |
114 | { | 117 | { |
115 | struct virtblk_req *vbr = req->special; | 118 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); |
116 | int error = virtblk_result(vbr); | 119 | int error = virtblk_result(vbr); |
117 | 120 | ||
118 | if (req->cmd_type == REQ_TYPE_BLOCK_PC) { | 121 | if (req->cmd_type == REQ_TYPE_BLOCK_PC) { |
@@ -147,14 +150,14 @@ static void virtblk_done(struct virtqueue *vq) | |||
147 | 150 | ||
148 | /* In case queue is stopped waiting for more buffers. */ | 151 | /* In case queue is stopped waiting for more buffers. */ |
149 | if (req_done) | 152 | if (req_done) |
150 | blk_mq_start_stopped_hw_queues(vblk->disk->queue); | 153 | blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); |
151 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | 154 | spin_unlock_irqrestore(&vblk->vq_lock, flags); |
152 | } | 155 | } |
153 | 156 | ||
154 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | 157 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) |
155 | { | 158 | { |
156 | struct virtio_blk *vblk = hctx->queue->queuedata; | 159 | struct virtio_blk *vblk = hctx->queue->queuedata; |
157 | struct virtblk_req *vbr = req->special; | 160 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); |
158 | unsigned long flags; | 161 | unsigned long flags; |
159 | unsigned int num; | 162 | unsigned int num; |
160 | const bool last = (req->cmd_flags & REQ_END) != 0; | 163 | const bool last = (req->cmd_flags & REQ_END) != 0; |
@@ -480,33 +483,27 @@ static const struct device_attribute dev_attr_cache_type_rw = | |||
480 | __ATTR(cache_type, S_IRUGO|S_IWUSR, | 483 | __ATTR(cache_type, S_IRUGO|S_IWUSR, |
481 | virtblk_cache_type_show, virtblk_cache_type_store); | 484 | virtblk_cache_type_show, virtblk_cache_type_store); |
482 | 485 | ||
483 | static struct blk_mq_ops virtio_mq_ops = { | 486 | static int virtblk_init_request(void *data, struct request *rq, |
484 | .queue_rq = virtio_queue_rq, | 487 | unsigned int hctx_idx, unsigned int request_idx, |
485 | .map_queue = blk_mq_map_queue, | 488 | unsigned int numa_node) |
486 | .alloc_hctx = blk_mq_alloc_single_hw_queue, | ||
487 | .free_hctx = blk_mq_free_single_hw_queue, | ||
488 | .complete = virtblk_request_done, | ||
489 | }; | ||
490 | |||
491 | static struct blk_mq_reg virtio_mq_reg = { | ||
492 | .ops = &virtio_mq_ops, | ||
493 | .nr_hw_queues = 1, | ||
494 | .queue_depth = 0, /* Set in virtblk_probe */ | ||
495 | .numa_node = NUMA_NO_NODE, | ||
496 | .flags = BLK_MQ_F_SHOULD_MERGE, | ||
497 | }; | ||
498 | module_param_named(queue_depth, virtio_mq_reg.queue_depth, uint, 0444); | ||
499 | |||
500 | static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx, | ||
501 | struct request *rq, unsigned int nr) | ||
502 | { | 489 | { |
503 | struct virtio_blk *vblk = data; | 490 | struct virtio_blk *vblk = data; |
504 | struct virtblk_req *vbr = rq->special; | 491 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); |
505 | 492 | ||
506 | sg_init_table(vbr->sg, vblk->sg_elems); | 493 | sg_init_table(vbr->sg, vblk->sg_elems); |
507 | return 0; | 494 | return 0; |
508 | } | 495 | } |
509 | 496 | ||
497 | static struct blk_mq_ops virtio_mq_ops = { | ||
498 | .queue_rq = virtio_queue_rq, | ||
499 | .map_queue = blk_mq_map_queue, | ||
500 | .complete = virtblk_request_done, | ||
501 | .init_request = virtblk_init_request, | ||
502 | }; | ||
503 | |||
504 | static unsigned int virtblk_queue_depth; | ||
505 | module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); | ||
506 | |||
510 | static int virtblk_probe(struct virtio_device *vdev) | 507 | static int virtblk_probe(struct virtio_device *vdev) |
511 | { | 508 | { |
512 | struct virtio_blk *vblk; | 509 | struct virtio_blk *vblk; |
@@ -561,24 +558,34 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
561 | } | 558 | } |
562 | 559 | ||
563 | /* Default queue sizing is to fill the ring. */ | 560 | /* Default queue sizing is to fill the ring. */ |
564 | if (!virtio_mq_reg.queue_depth) { | 561 | if (!virtblk_queue_depth) { |
565 | virtio_mq_reg.queue_depth = vblk->vq->num_free; | 562 | virtblk_queue_depth = vblk->vq->num_free; |
566 | /* ... but without indirect descs, we use 2 descs per req */ | 563 | /* ... but without indirect descs, we use 2 descs per req */ |
567 | if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) | 564 | if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) |
568 | virtio_mq_reg.queue_depth /= 2; | 565 | virtblk_queue_depth /= 2; |
569 | } | 566 | } |
570 | virtio_mq_reg.cmd_size = | 567 | |
568 | memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); | ||
569 | vblk->tag_set.ops = &virtio_mq_ops; | ||
570 | vblk->tag_set.nr_hw_queues = 1; | ||
571 | vblk->tag_set.queue_depth = virtblk_queue_depth; | ||
572 | vblk->tag_set.numa_node = NUMA_NO_NODE; | ||
573 | vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; | ||
574 | vblk->tag_set.cmd_size = | ||
571 | sizeof(struct virtblk_req) + | 575 | sizeof(struct virtblk_req) + |
572 | sizeof(struct scatterlist) * sg_elems; | 576 | sizeof(struct scatterlist) * sg_elems; |
577 | vblk->tag_set.driver_data = vblk; | ||
573 | 578 | ||
574 | q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk); | 579 | err = blk_mq_alloc_tag_set(&vblk->tag_set); |
580 | if (err) | ||
581 | goto out_put_disk; | ||
582 | |||
583 | q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set); | ||
575 | if (!q) { | 584 | if (!q) { |
576 | err = -ENOMEM; | 585 | err = -ENOMEM; |
577 | goto out_put_disk; | 586 | goto out_free_tags; |
578 | } | 587 | } |
579 | 588 | ||
580 | blk_mq_init_commands(q, virtblk_init_vbr, vblk); | ||
581 | |||
582 | q->queuedata = vblk; | 589 | q->queuedata = vblk; |
583 | 590 | ||
584 | virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); | 591 | virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); |
@@ -679,6 +686,8 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
679 | out_del_disk: | 686 | out_del_disk: |
680 | del_gendisk(vblk->disk); | 687 | del_gendisk(vblk->disk); |
681 | blk_cleanup_queue(vblk->disk->queue); | 688 | blk_cleanup_queue(vblk->disk->queue); |
689 | out_free_tags: | ||
690 | blk_mq_free_tag_set(&vblk->tag_set); | ||
682 | out_put_disk: | 691 | out_put_disk: |
683 | put_disk(vblk->disk); | 692 | put_disk(vblk->disk); |
684 | out_free_vq: | 693 | out_free_vq: |
@@ -705,6 +714,8 @@ static void virtblk_remove(struct virtio_device *vdev) | |||
705 | del_gendisk(vblk->disk); | 714 | del_gendisk(vblk->disk); |
706 | blk_cleanup_queue(vblk->disk->queue); | 715 | blk_cleanup_queue(vblk->disk->queue); |
707 | 716 | ||
717 | blk_mq_free_tag_set(&vblk->tag_set); | ||
718 | |||
708 | /* Stop all the virtqueues. */ | 719 | /* Stop all the virtqueues. */ |
709 | vdev->config->reset(vdev); | 720 | vdev->config->reset(vdev); |
710 | 721 | ||
@@ -749,7 +760,7 @@ static int virtblk_restore(struct virtio_device *vdev) | |||
749 | vblk->config_enable = true; | 760 | vblk->config_enable = true; |
750 | ret = init_vq(vdev->priv); | 761 | ret = init_vq(vdev->priv); |
751 | if (!ret) | 762 | if (!ret) |
752 | blk_mq_start_stopped_hw_queues(vblk->disk->queue); | 763 | blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); |
753 | 764 | ||
754 | return ret; | 765 | return ret; |
755 | } | 766 | } |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index efe1b4761735..283a30e88287 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -612,10 +612,10 @@ static void do_blkif_request(struct request_queue *rq) | |||
612 | } | 612 | } |
613 | 613 | ||
614 | pr_debug("do_blk_req %p: cmd %p, sec %lx, " | 614 | pr_debug("do_blk_req %p: cmd %p, sec %lx, " |
615 | "(%u/%u) buffer:%p [%s]\n", | 615 | "(%u/%u) [%s]\n", |
616 | req, req->cmd, (unsigned long)blk_rq_pos(req), | 616 | req, req->cmd, (unsigned long)blk_rq_pos(req), |
617 | blk_rq_cur_sectors(req), blk_rq_sectors(req), | 617 | blk_rq_cur_sectors(req), blk_rq_sectors(req), |
618 | req->buffer, rq_data_dir(req) ? "write" : "read"); | 618 | rq_data_dir(req) ? "write" : "read"); |
619 | 619 | ||
620 | if (blkif_queue_request(req)) { | 620 | if (blkif_queue_request(req)) { |
621 | blk_requeue_request(rq, req); | 621 | blk_requeue_request(rq, req); |
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 1393b8871a28..ab3ea62e5dfc 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c | |||
@@ -661,7 +661,7 @@ static void ace_fsm_dostate(struct ace_device *ace) | |||
661 | rq_data_dir(req)); | 661 | rq_data_dir(req)); |
662 | 662 | ||
663 | ace->req = req; | 663 | ace->req = req; |
664 | ace->data_ptr = req->buffer; | 664 | ace->data_ptr = bio_data(req->bio); |
665 | ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR; | 665 | ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR; |
666 | ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF); | 666 | ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF); |
667 | 667 | ||
@@ -733,7 +733,7 @@ static void ace_fsm_dostate(struct ace_device *ace) | |||
733 | * blk_rq_sectors(ace->req), | 733 | * blk_rq_sectors(ace->req), |
734 | * blk_rq_cur_sectors(ace->req)); | 734 | * blk_rq_cur_sectors(ace->req)); |
735 | */ | 735 | */ |
736 | ace->data_ptr = ace->req->buffer; | 736 | ace->data_ptr = bio_data(ace->req->bio); |
737 | ace->data_count = blk_rq_cur_sectors(ace->req) * 16; | 737 | ace->data_count = blk_rq_cur_sectors(ace->req) * 16; |
738 | ace_fsm_yieldirq(ace); | 738 | ace_fsm_yieldirq(ace); |
739 | break; | 739 | break; |
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 27de5046708a..968f9e52effa 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c | |||
@@ -87,13 +87,15 @@ static void do_z2_request(struct request_queue *q) | |||
87 | while (len) { | 87 | while (len) { |
88 | unsigned long addr = start & Z2RAM_CHUNKMASK; | 88 | unsigned long addr = start & Z2RAM_CHUNKMASK; |
89 | unsigned long size = Z2RAM_CHUNKSIZE - addr; | 89 | unsigned long size = Z2RAM_CHUNKSIZE - addr; |
90 | void *buffer = bio_data(req->bio); | ||
91 | |||
90 | if (len < size) | 92 | if (len < size) |
91 | size = len; | 93 | size = len; |
92 | addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; | 94 | addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; |
93 | if (rq_data_dir(req) == READ) | 95 | if (rq_data_dir(req) == READ) |
94 | memcpy(req->buffer, (char *)addr, size); | 96 | memcpy(buffer, (char *)addr, size); |
95 | else | 97 | else |
96 | memcpy((char *)addr, req->buffer, size); | 98 | memcpy((char *)addr, buffer, size); |
97 | start += size; | 99 | start += size; |
98 | len -= size; | 100 | len -= size; |
99 | } | 101 | } |
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 51e75ad96422..584bc3126403 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c | |||
@@ -602,7 +602,7 @@ static void gdrom_readdisk_dma(struct work_struct *work) | |||
602 | spin_unlock(&gdrom_lock); | 602 | spin_unlock(&gdrom_lock); |
603 | block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET; | 603 | block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET; |
604 | block_cnt = blk_rq_sectors(req)/GD_TO_BLK; | 604 | block_cnt = blk_rq_sectors(req)/GD_TO_BLK; |
605 | __raw_writel(virt_to_phys(req->buffer), GDROM_DMA_STARTADDR_REG); | 605 | __raw_writel(virt_to_phys(bio_data(req->bio)), GDROM_DMA_STARTADDR_REG); |
606 | __raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG); | 606 | __raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG); |
607 | __raw_writel(1, GDROM_DMA_DIRECTION_REG); | 607 | __raw_writel(1, GDROM_DMA_DIRECTION_REG); |
608 | __raw_writel(1, GDROM_DMA_ENABLE_REG); | 608 | __raw_writel(1, GDROM_DMA_ENABLE_REG); |
diff --git a/drivers/char/random.c b/drivers/char/random.c index 102c50d38902..06cea7ff3a7c 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c | |||
@@ -902,6 +902,7 @@ void add_disk_randomness(struct gendisk *disk) | |||
902 | add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); | 902 | add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); |
903 | trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool)); | 903 | trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool)); |
904 | } | 904 | } |
905 | EXPORT_SYMBOL_GPL(add_disk_randomness); | ||
905 | #endif | 906 | #endif |
906 | 907 | ||
907 | /********************************************************************* | 908 | /********************************************************************* |
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 16f69be820c7..ee880382e3bc 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c | |||
@@ -188,10 +188,9 @@ static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq, | |||
188 | 188 | ||
189 | ledtrig_ide_activity(); | 189 | ledtrig_ide_activity(); |
190 | 190 | ||
191 | pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx\n", | 191 | pr_debug("%s: %sing: block=%llu, sectors=%u\n", |
192 | drive->name, rq_data_dir(rq) == READ ? "read" : "writ", | 192 | drive->name, rq_data_dir(rq) == READ ? "read" : "writ", |
193 | (unsigned long long)block, blk_rq_sectors(rq), | 193 | (unsigned long long)block, blk_rq_sectors(rq)); |
194 | (unsigned long)rq->buffer); | ||
195 | 194 | ||
196 | if (hwif->rw_disk) | 195 | if (hwif->rw_disk) |
197 | hwif->rw_disk(drive, rq); | 196 | hwif->rw_disk(drive, rq); |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 455e64916498..6a71bc7c9133 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -1544,7 +1544,6 @@ static int setup_clone(struct request *clone, struct request *rq, | |||
1544 | clone->cmd = rq->cmd; | 1544 | clone->cmd = rq->cmd; |
1545 | clone->cmd_len = rq->cmd_len; | 1545 | clone->cmd_len = rq->cmd_len; |
1546 | clone->sense = rq->sense; | 1546 | clone->sense = rq->sense; |
1547 | clone->buffer = rq->buffer; | ||
1548 | clone->end_io = end_clone_request; | 1547 | clone->end_io = end_clone_request; |
1549 | clone->end_io_data = tio; | 1548 | clone->end_io_data = tio; |
1550 | 1549 | ||
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 0b2ccb68c0d0..4dbfaee9aa95 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c | |||
@@ -82,8 +82,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, | |||
82 | 82 | ||
83 | block = blk_rq_pos(req) << 9 >> tr->blkshift; | 83 | block = blk_rq_pos(req) << 9 >> tr->blkshift; |
84 | nsect = blk_rq_cur_bytes(req) >> tr->blkshift; | 84 | nsect = blk_rq_cur_bytes(req) >> tr->blkshift; |
85 | 85 | buf = bio_data(req->bio); | |
86 | buf = req->buffer; | ||
87 | 86 | ||
88 | if (req->cmd_type != REQ_TYPE_FS) | 87 | if (req->cmd_type != REQ_TYPE_FS) |
89 | return -EIO; | 88 | return -EIO; |
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 8d659e6a1b4c..20a667c95da4 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c | |||
@@ -253,7 +253,7 @@ static int do_ubiblock_request(struct ubiblock *dev, struct request *req) | |||
253 | * flash access anyway. | 253 | * flash access anyway. |
254 | */ | 254 | */ |
255 | mutex_lock(&dev->dev_mutex); | 255 | mutex_lock(&dev->dev_mutex); |
256 | ret = ubiblock_read(dev, req->buffer, sec, len); | 256 | ret = ubiblock_read(dev, bio_data(req->bio), sec, len); |
257 | mutex_unlock(&dev->dev_mutex); | 257 | mutex_unlock(&dev->dev_mutex); |
258 | 258 | ||
259 | return ret; | 259 | return ret; |
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index 4ccb5d869389..a40ee1e37486 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c | |||
@@ -207,7 +207,7 @@ static void jsfd_do_request(struct request_queue *q) | |||
207 | goto end; | 207 | goto end; |
208 | } | 208 | } |
209 | 209 | ||
210 | jsfd_read(req->buffer, jdp->dbase + offset, len); | 210 | jsfd_read(bio_data(req->bio), jdp->dbase + offset, len); |
211 | err = 0; | 211 | err = 0; |
212 | end: | 212 | end: |
213 | if (!__blk_end_request_cur(req, err)) | 213 | if (!__blk_end_request_cur(req, err)) |
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9db097a28a74..a0c95cac91f0 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c | |||
@@ -140,7 +140,7 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy) | |||
140 | cmd->result = 0; | 140 | cmd->result = 0; |
141 | spin_lock_irqsave(q->queue_lock, flags); | 141 | spin_lock_irqsave(q->queue_lock, flags); |
142 | blk_requeue_request(q, cmd->request); | 142 | blk_requeue_request(q, cmd->request); |
143 | kblockd_schedule_work(q, &device->requeue_work); | 143 | kblockd_schedule_work(&device->requeue_work); |
144 | spin_unlock_irqrestore(q->queue_lock, flags); | 144 | spin_unlock_irqrestore(q->queue_lock, flags); |
145 | } | 145 | } |
146 | 146 | ||
@@ -1019,8 +1019,6 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, | |||
1019 | return BLKPREP_DEFER; | 1019 | return BLKPREP_DEFER; |
1020 | } | 1020 | } |
1021 | 1021 | ||
1022 | req->buffer = NULL; | ||
1023 | |||
1024 | /* | 1022 | /* |
1025 | * Next, walk the list, and fill in the addresses and sizes of | 1023 | * Next, walk the list, and fill in the addresses and sizes of |
1026 | * each segment. | 1024 | * each segment. |
@@ -1158,7 +1156,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) | |||
1158 | BUG_ON(blk_rq_bytes(req)); | 1156 | BUG_ON(blk_rq_bytes(req)); |
1159 | 1157 | ||
1160 | memset(&cmd->sdb, 0, sizeof(cmd->sdb)); | 1158 | memset(&cmd->sdb, 0, sizeof(cmd->sdb)); |
1161 | req->buffer = NULL; | ||
1162 | } | 1159 | } |
1163 | 1160 | ||
1164 | cmd->cmd_len = req->cmd_len; | 1161 | cmd->cmd_len = req->cmd_len; |
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index efcbcd182863..96af195224f2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c | |||
@@ -737,16 +737,14 @@ static int sd_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq) | |||
737 | goto out; | 737 | goto out; |
738 | } | 738 | } |
739 | 739 | ||
740 | rq->completion_data = page; | ||
740 | blk_add_request_payload(rq, page, len); | 741 | blk_add_request_payload(rq, page, len); |
741 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); | 742 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); |
742 | rq->buffer = page_address(page); | ||
743 | rq->__data_len = nr_bytes; | 743 | rq->__data_len = nr_bytes; |
744 | 744 | ||
745 | out: | 745 | out: |
746 | if (ret != BLKPREP_OK) { | 746 | if (ret != BLKPREP_OK) |
747 | __free_page(page); | 747 | __free_page(page); |
748 | rq->buffer = NULL; | ||
749 | } | ||
750 | return ret; | 748 | return ret; |
751 | } | 749 | } |
752 | 750 | ||
@@ -842,10 +840,9 @@ static void sd_unprep_fn(struct request_queue *q, struct request *rq) | |||
842 | { | 840 | { |
843 | struct scsi_cmnd *SCpnt = rq->special; | 841 | struct scsi_cmnd *SCpnt = rq->special; |
844 | 842 | ||
845 | if (rq->cmd_flags & REQ_DISCARD) { | 843 | if (rq->cmd_flags & REQ_DISCARD) |
846 | free_page((unsigned long)rq->buffer); | 844 | __free_page(rq->completion_data); |
847 | rq->buffer = NULL; | 845 | |
848 | } | ||
849 | if (SCpnt->cmnd != rq->cmd) { | 846 | if (SCpnt->cmnd != rq->cmd) { |
850 | mempool_free(SCpnt->cmnd, sd_cdb_pool); | 847 | mempool_free(SCpnt->cmnd, sd_cdb_pool); |
851 | SCpnt->cmnd = NULL; | 848 | SCpnt->cmnd = NULL; |
diff --git a/fs/Makefile b/fs/Makefile index f9cb9876e466..4030cbfbc9af 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -14,14 +14,13 @@ obj-y := open.o read_write.o file_table.o super.o \ | |||
14 | stack.o fs_struct.o statfs.o | 14 | stack.o fs_struct.o statfs.o |
15 | 15 | ||
16 | ifeq ($(CONFIG_BLOCK),y) | 16 | ifeq ($(CONFIG_BLOCK),y) |
17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o | 17 | obj-y += buffer.o block_dev.o direct-io.o mpage.o |
18 | else | 18 | else |
19 | obj-y += no-block.o | 19 | obj-y += no-block.o |
20 | endif | 20 | endif |
21 | 21 | ||
22 | obj-$(CONFIG_PROC_FS) += proc_namespace.o | 22 | obj-$(CONFIG_PROC_FS) += proc_namespace.o |
23 | 23 | ||
24 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o | ||
25 | obj-y += notify/ | 24 | obj-y += notify/ |
26 | obj-$(CONFIG_EPOLL) += eventpoll.o | 25 | obj-$(CONFIG_EPOLL) += eventpoll.o |
27 | obj-$(CONFIG_ANON_INODES) += anon_inodes.o | 26 | obj-$(CONFIG_ANON_INODES) += anon_inodes.o |
diff --git a/include/linux/bio.h b/include/linux/bio.h index bba550826921..5a645769f020 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -333,7 +333,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, | |||
333 | 333 | ||
334 | extern struct bio_set *bioset_create(unsigned int, unsigned int); | 334 | extern struct bio_set *bioset_create(unsigned int, unsigned int); |
335 | extern void bioset_free(struct bio_set *); | 335 | extern void bioset_free(struct bio_set *); |
336 | extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); | 336 | extern mempool_t *biovec_create_pool(int pool_entries); |
337 | 337 | ||
338 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); | 338 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); |
339 | extern void bio_put(struct bio *); | 339 | extern void bio_put(struct bio *); |
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0120451545d8..c15128833100 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -8,7 +8,13 @@ struct blk_mq_tags; | |||
8 | struct blk_mq_cpu_notifier { | 8 | struct blk_mq_cpu_notifier { |
9 | struct list_head list; | 9 | struct list_head list; |
10 | void *data; | 10 | void *data; |
11 | void (*notify)(void *data, unsigned long action, unsigned int cpu); | 11 | int (*notify)(void *data, unsigned long action, unsigned int cpu); |
12 | }; | ||
13 | |||
14 | struct blk_mq_ctxmap { | ||
15 | unsigned int map_size; | ||
16 | unsigned int bits_per_word; | ||
17 | struct blk_align_bitmap *map; | ||
12 | }; | 18 | }; |
13 | 19 | ||
14 | struct blk_mq_hw_ctx { | 20 | struct blk_mq_hw_ctx { |
@@ -18,7 +24,11 @@ struct blk_mq_hw_ctx { | |||
18 | } ____cacheline_aligned_in_smp; | 24 | } ____cacheline_aligned_in_smp; |
19 | 25 | ||
20 | unsigned long state; /* BLK_MQ_S_* flags */ | 26 | unsigned long state; /* BLK_MQ_S_* flags */ |
21 | struct delayed_work delayed_work; | 27 | struct delayed_work run_work; |
28 | struct delayed_work delay_work; | ||
29 | cpumask_var_t cpumask; | ||
30 | int next_cpu; | ||
31 | int next_cpu_batch; | ||
22 | 32 | ||
23 | unsigned long flags; /* BLK_MQ_F_* flags */ | 33 | unsigned long flags; /* BLK_MQ_F_* flags */ |
24 | 34 | ||
@@ -27,13 +37,13 @@ struct blk_mq_hw_ctx { | |||
27 | 37 | ||
28 | void *driver_data; | 38 | void *driver_data; |
29 | 39 | ||
40 | struct blk_mq_ctxmap ctx_map; | ||
41 | |||
30 | unsigned int nr_ctx; | 42 | unsigned int nr_ctx; |
31 | struct blk_mq_ctx **ctxs; | 43 | struct blk_mq_ctx **ctxs; |
32 | unsigned int nr_ctx_map; | ||
33 | unsigned long *ctx_map; | ||
34 | 44 | ||
35 | struct request **rqs; | 45 | unsigned int wait_index; |
36 | struct list_head page_list; | 46 | |
37 | struct blk_mq_tags *tags; | 47 | struct blk_mq_tags *tags; |
38 | 48 | ||
39 | unsigned long queued; | 49 | unsigned long queued; |
@@ -41,31 +51,40 @@ struct blk_mq_hw_ctx { | |||
41 | #define BLK_MQ_MAX_DISPATCH_ORDER 10 | 51 | #define BLK_MQ_MAX_DISPATCH_ORDER 10 |
42 | unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; | 52 | unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; |
43 | 53 | ||
44 | unsigned int queue_depth; | ||
45 | unsigned int numa_node; | 54 | unsigned int numa_node; |
46 | unsigned int cmd_size; /* per-request extra data */ | 55 | unsigned int cmd_size; /* per-request extra data */ |
47 | 56 | ||
57 | atomic_t nr_active; | ||
58 | |||
48 | struct blk_mq_cpu_notifier cpu_notifier; | 59 | struct blk_mq_cpu_notifier cpu_notifier; |
49 | struct kobject kobj; | 60 | struct kobject kobj; |
50 | }; | 61 | }; |
51 | 62 | ||
52 | struct blk_mq_reg { | 63 | struct blk_mq_tag_set { |
53 | struct blk_mq_ops *ops; | 64 | struct blk_mq_ops *ops; |
54 | unsigned int nr_hw_queues; | 65 | unsigned int nr_hw_queues; |
55 | unsigned int queue_depth; | 66 | unsigned int queue_depth; /* max hw supported */ |
56 | unsigned int reserved_tags; | 67 | unsigned int reserved_tags; |
57 | unsigned int cmd_size; /* per-request extra data */ | 68 | unsigned int cmd_size; /* per-request extra data */ |
58 | int numa_node; | 69 | int numa_node; |
59 | unsigned int timeout; | 70 | unsigned int timeout; |
60 | unsigned int flags; /* BLK_MQ_F_* */ | 71 | unsigned int flags; /* BLK_MQ_F_* */ |
72 | void *driver_data; | ||
73 | |||
74 | struct blk_mq_tags **tags; | ||
75 | |||
76 | struct mutex tag_list_lock; | ||
77 | struct list_head tag_list; | ||
61 | }; | 78 | }; |
62 | 79 | ||
63 | typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); | 80 | typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); |
64 | typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); | 81 | typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); |
65 | typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int); | ||
66 | typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); | ||
67 | typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); | 82 | typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); |
68 | typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); | 83 | typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); |
84 | typedef int (init_request_fn)(void *, struct request *, unsigned int, | ||
85 | unsigned int, unsigned int); | ||
86 | typedef void (exit_request_fn)(void *, struct request *, unsigned int, | ||
87 | unsigned int); | ||
69 | 88 | ||
70 | struct blk_mq_ops { | 89 | struct blk_mq_ops { |
71 | /* | 90 | /* |
@@ -86,18 +105,20 @@ struct blk_mq_ops { | |||
86 | softirq_done_fn *complete; | 105 | softirq_done_fn *complete; |
87 | 106 | ||
88 | /* | 107 | /* |
89 | * Override for hctx allocations (should probably go) | ||
90 | */ | ||
91 | alloc_hctx_fn *alloc_hctx; | ||
92 | free_hctx_fn *free_hctx; | ||
93 | |||
94 | /* | ||
95 | * Called when the block layer side of a hardware queue has been | 108 | * Called when the block layer side of a hardware queue has been |
96 | * set up, allowing the driver to allocate/init matching structures. | 109 | * set up, allowing the driver to allocate/init matching structures. |
97 | * Ditto for exit/teardown. | 110 | * Ditto for exit/teardown. |
98 | */ | 111 | */ |
99 | init_hctx_fn *init_hctx; | 112 | init_hctx_fn *init_hctx; |
100 | exit_hctx_fn *exit_hctx; | 113 | exit_hctx_fn *exit_hctx; |
114 | |||
115 | /* | ||
116 | * Called for every command allocated by the block layer to allow | ||
117 | * the driver to set up driver specific data. | ||
118 | * Ditto for exit/teardown. | ||
119 | */ | ||
120 | init_request_fn *init_request; | ||
121 | exit_request_fn *exit_request; | ||
101 | }; | 122 | }; |
102 | 123 | ||
103 | enum { | 124 | enum { |
@@ -107,18 +128,24 @@ enum { | |||
107 | 128 | ||
108 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, | 129 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, |
109 | BLK_MQ_F_SHOULD_SORT = 1 << 1, | 130 | BLK_MQ_F_SHOULD_SORT = 1 << 1, |
110 | BLK_MQ_F_SHOULD_IPI = 1 << 2, | 131 | BLK_MQ_F_TAG_SHARED = 1 << 2, |
132 | BLK_MQ_F_SG_MERGE = 1 << 3, | ||
133 | BLK_MQ_F_SYSFS_UP = 1 << 4, | ||
111 | 134 | ||
112 | BLK_MQ_S_STOPPED = 0, | 135 | BLK_MQ_S_STOPPED = 0, |
136 | BLK_MQ_S_TAG_ACTIVE = 1, | ||
113 | 137 | ||
114 | BLK_MQ_MAX_DEPTH = 2048, | 138 | BLK_MQ_MAX_DEPTH = 2048, |
139 | |||
140 | BLK_MQ_CPU_WORK_BATCH = 8, | ||
115 | }; | 141 | }; |
116 | 142 | ||
117 | struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); | 143 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); |
118 | int blk_mq_register_disk(struct gendisk *); | 144 | int blk_mq_register_disk(struct gendisk *); |
119 | void blk_mq_unregister_disk(struct gendisk *); | 145 | void blk_mq_unregister_disk(struct gendisk *); |
120 | int blk_mq_init_commands(struct request_queue *, int (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); | 146 | |
121 | void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); | 147 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); |
148 | void blk_mq_free_tag_set(struct blk_mq_tag_set *set); | ||
122 | 149 | ||
123 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); | 150 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); |
124 | 151 | ||
@@ -126,28 +153,28 @@ void blk_mq_insert_request(struct request *, bool, bool, bool); | |||
126 | void blk_mq_run_queues(struct request_queue *q, bool async); | 153 | void blk_mq_run_queues(struct request_queue *q, bool async); |
127 | void blk_mq_free_request(struct request *rq); | 154 | void blk_mq_free_request(struct request *rq); |
128 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); | 155 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); |
129 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp); | 156 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, |
130 | struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); | 157 | gfp_t gfp, bool reserved); |
131 | struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); | 158 | struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag); |
132 | 159 | ||
133 | struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); | 160 | struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); |
134 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int); | 161 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); |
135 | void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); | ||
136 | 162 | ||
137 | bool blk_mq_end_io_partial(struct request *rq, int error, | 163 | void blk_mq_end_io(struct request *rq, int error); |
138 | unsigned int nr_bytes); | 164 | void __blk_mq_end_io(struct request *rq, int error); |
139 | static inline void blk_mq_end_io(struct request *rq, int error) | ||
140 | { | ||
141 | bool done = !blk_mq_end_io_partial(rq, error, blk_rq_bytes(rq)); | ||
142 | BUG_ON(!done); | ||
143 | } | ||
144 | 165 | ||
166 | void blk_mq_requeue_request(struct request *rq); | ||
167 | void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); | ||
168 | void blk_mq_kick_requeue_list(struct request_queue *q); | ||
145 | void blk_mq_complete_request(struct request *rq); | 169 | void blk_mq_complete_request(struct request *rq); |
146 | 170 | ||
147 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); | 171 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); |
148 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); | 172 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); |
149 | void blk_mq_stop_hw_queues(struct request_queue *q); | 173 | void blk_mq_stop_hw_queues(struct request_queue *q); |
150 | void blk_mq_start_stopped_hw_queues(struct request_queue *q); | 174 | void blk_mq_start_hw_queues(struct request_queue *q); |
175 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); | ||
176 | void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); | ||
177 | void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); | ||
151 | 178 | ||
152 | /* | 179 | /* |
153 | * Driver command data is immediately after the request. So subtract request | 180 | * Driver command data is immediately after the request. So subtract request |
@@ -162,12 +189,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) | |||
162 | return (void *) rq + sizeof(*rq); | 189 | return (void *) rq + sizeof(*rq); |
163 | } | 190 | } |
164 | 191 | ||
165 | static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, | ||
166 | unsigned int tag) | ||
167 | { | ||
168 | return hctx->rqs[tag]; | ||
169 | } | ||
170 | |||
171 | #define queue_for_each_hw_ctx(q, hctx, i) \ | 192 | #define queue_for_each_hw_ctx(q, hctx, i) \ |
172 | for ((i) = 0; (i) < (q)->nr_hw_queues && \ | 193 | for ((i) = 0; (i) < (q)->nr_hw_queues && \ |
173 | ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) | 194 | ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index aa0eaa2d0bd8..d8e4cea23a25 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -190,6 +190,7 @@ enum rq_flag_bits { | |||
190 | __REQ_PM, /* runtime pm request */ | 190 | __REQ_PM, /* runtime pm request */ |
191 | __REQ_END, /* last of chain of requests */ | 191 | __REQ_END, /* last of chain of requests */ |
192 | __REQ_HASHED, /* on IO scheduler merge hash */ | 192 | __REQ_HASHED, /* on IO scheduler merge hash */ |
193 | __REQ_MQ_INFLIGHT, /* track inflight for MQ */ | ||
193 | __REQ_NR_BITS, /* stops here */ | 194 | __REQ_NR_BITS, /* stops here */ |
194 | }; | 195 | }; |
195 | 196 | ||
@@ -243,5 +244,6 @@ enum rq_flag_bits { | |||
243 | #define REQ_PM (1ULL << __REQ_PM) | 244 | #define REQ_PM (1ULL << __REQ_PM) |
244 | #define REQ_END (1ULL << __REQ_END) | 245 | #define REQ_END (1ULL << __REQ_END) |
245 | #define REQ_HASHED (1ULL << __REQ_HASHED) | 246 | #define REQ_HASHED (1ULL << __REQ_HASHED) |
247 | #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) | ||
246 | 248 | ||
247 | #endif /* __LINUX_BLK_TYPES_H */ | 249 | #endif /* __LINUX_BLK_TYPES_H */ |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0d84981ee03f..695b9fd41efe 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -90,15 +90,15 @@ enum rq_cmd_type_bits { | |||
90 | #define BLK_MAX_CDB 16 | 90 | #define BLK_MAX_CDB 16 |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * try to put the fields that are referenced together in the same cacheline. | 93 | * Try to put the fields that are referenced together in the same cacheline. |
94 | * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init() | 94 | * |
95 | * as well! | 95 | * If you modify this structure, make sure to update blk_rq_init() and |
96 | * especially blk_mq_rq_ctx_init() to take care of the added fields. | ||
96 | */ | 97 | */ |
97 | struct request { | 98 | struct request { |
98 | struct list_head queuelist; | 99 | struct list_head queuelist; |
99 | union { | 100 | union { |
100 | struct call_single_data csd; | 101 | struct call_single_data csd; |
101 | struct work_struct mq_flush_work; | ||
102 | unsigned long fifo_time; | 102 | unsigned long fifo_time; |
103 | }; | 103 | }; |
104 | 104 | ||
@@ -178,7 +178,6 @@ struct request { | |||
178 | unsigned short ioprio; | 178 | unsigned short ioprio; |
179 | 179 | ||
180 | void *special; /* opaque pointer available for LLD use */ | 180 | void *special; /* opaque pointer available for LLD use */ |
181 | char *buffer; /* kaddr of the current segment if available */ | ||
182 | 181 | ||
183 | int tag; | 182 | int tag; |
184 | int errors; | 183 | int errors; |
@@ -463,6 +462,10 @@ struct request_queue { | |||
463 | struct request *flush_rq; | 462 | struct request *flush_rq; |
464 | spinlock_t mq_flush_lock; | 463 | spinlock_t mq_flush_lock; |
465 | 464 | ||
465 | struct list_head requeue_list; | ||
466 | spinlock_t requeue_lock; | ||
467 | struct work_struct requeue_work; | ||
468 | |||
466 | struct mutex sysfs_lock; | 469 | struct mutex sysfs_lock; |
467 | 470 | ||
468 | int bypass_depth; | 471 | int bypass_depth; |
@@ -481,6 +484,9 @@ struct request_queue { | |||
481 | wait_queue_head_t mq_freeze_wq; | 484 | wait_queue_head_t mq_freeze_wq; |
482 | struct percpu_counter mq_usage_counter; | 485 | struct percpu_counter mq_usage_counter; |
483 | struct list_head all_q_node; | 486 | struct list_head all_q_node; |
487 | |||
488 | struct blk_mq_tag_set *tag_set; | ||
489 | struct list_head tag_set_list; | ||
484 | }; | 490 | }; |
485 | 491 | ||
486 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 492 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |
@@ -504,6 +510,7 @@ struct request_queue { | |||
504 | #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ | 510 | #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ |
505 | #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ | 511 | #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ |
506 | #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ | 512 | #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ |
513 | #define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ | ||
507 | 514 | ||
508 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ | 515 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ |
509 | (1 << QUEUE_FLAG_STACKABLE) | \ | 516 | (1 << QUEUE_FLAG_STACKABLE) | \ |
@@ -937,6 +944,7 @@ extern struct request *blk_fetch_request(struct request_queue *q); | |||
937 | */ | 944 | */ |
938 | extern bool blk_update_request(struct request *rq, int error, | 945 | extern bool blk_update_request(struct request *rq, int error, |
939 | unsigned int nr_bytes); | 946 | unsigned int nr_bytes); |
947 | extern void blk_finish_request(struct request *rq, int error); | ||
940 | extern bool blk_end_request(struct request *rq, int error, | 948 | extern bool blk_end_request(struct request *rq, int error, |
941 | unsigned int nr_bytes); | 949 | unsigned int nr_bytes); |
942 | extern void blk_end_request_all(struct request *rq, int error); | 950 | extern void blk_end_request_all(struct request *rq, int error); |
@@ -1053,7 +1061,6 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} | |||
1053 | * schedule() where blk_schedule_flush_plug() is called. | 1061 | * schedule() where blk_schedule_flush_plug() is called. |
1054 | */ | 1062 | */ |
1055 | struct blk_plug { | 1063 | struct blk_plug { |
1056 | unsigned long magic; /* detect uninitialized use-cases */ | ||
1057 | struct list_head list; /* requests */ | 1064 | struct list_head list; /* requests */ |
1058 | struct list_head mq_list; /* blk-mq requests */ | 1065 | struct list_head mq_list; /* blk-mq requests */ |
1059 | struct list_head cb_list; /* md requires an unplug callback */ | 1066 | struct list_head cb_list; /* md requires an unplug callback */ |
@@ -1102,7 +1109,8 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) | |||
1102 | /* | 1109 | /* |
1103 | * tag stuff | 1110 | * tag stuff |
1104 | */ | 1111 | */ |
1105 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) | 1112 | #define blk_rq_tagged(rq) \ |
1113 | ((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED)) | ||
1106 | extern int blk_queue_start_tag(struct request_queue *, struct request *); | 1114 | extern int blk_queue_start_tag(struct request_queue *, struct request *); |
1107 | extern struct request *blk_queue_find_tag(struct request_queue *, int); | 1115 | extern struct request *blk_queue_find_tag(struct request_queue *, int); |
1108 | extern void blk_queue_end_tag(struct request_queue *, struct request *); | 1116 | extern void blk_queue_end_tag(struct request_queue *, struct request *); |
@@ -1370,8 +1378,9 @@ static inline void put_dev_sector(Sector p) | |||
1370 | } | 1378 | } |
1371 | 1379 | ||
1372 | struct work_struct; | 1380 | struct work_struct; |
1373 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); | 1381 | int kblockd_schedule_work(struct work_struct *work); |
1374 | int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); | 1382 | int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); |
1383 | int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); | ||
1375 | 1384 | ||
1376 | #ifdef CONFIG_BLK_CGROUP | 1385 | #ifdef CONFIG_BLK_CGROUP |
1377 | /* | 1386 | /* |
diff --git a/mm/Makefile b/mm/Makefile index b484452dac57..0173940407f6 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -30,7 +30,6 @@ endif | |||
30 | 30 | ||
31 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | 31 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o |
32 | 32 | ||
33 | obj-$(CONFIG_BOUNCE) += bounce.o | ||
34 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o | 33 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o |
35 | obj-$(CONFIG_FRONTSWAP) += frontswap.o | 34 | obj-$(CONFIG_FRONTSWAP) += frontswap.o |
36 | obj-$(CONFIG_ZSWAP) += zswap.o | 35 | obj-$(CONFIG_ZSWAP) += zswap.o |