diff options
72 files changed, 3761 insertions, 2688 deletions
diff --git a/Documentation/DocBook/filesystems.tmpl b/Documentation/DocBook/filesystems.tmpl index 4f676838da06..bcdfdb9a9277 100644 --- a/Documentation/DocBook/filesystems.tmpl +++ b/Documentation/DocBook/filesystems.tmpl | |||
@@ -62,7 +62,7 @@ | |||
62 | !Efs/mpage.c | 62 | !Efs/mpage.c |
63 | !Efs/namei.c | 63 | !Efs/namei.c |
64 | !Efs/buffer.c | 64 | !Efs/buffer.c |
65 | !Efs/bio.c | 65 | !Eblock/bio.c |
66 | !Efs/seq_file.c | 66 | !Efs/seq_file.c |
67 | !Efs/filesystems.c | 67 | !Efs/filesystems.c |
68 | !Efs/fs-writeback.c | 68 | !Efs/fs-writeback.c |
diff --git a/block/Makefile b/block/Makefile index 20645e88fb57..a2ce6ac935ec 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -2,13 +2,15 @@ | |||
2 | # Makefile for the kernel block layer | 2 | # Makefile for the kernel block layer |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | 5 | obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ |
7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
8 | blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ | 8 | blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ |
9 | blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ | 9 | blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ |
10 | genhd.o scsi_ioctl.o partition-generic.o partitions/ | 10 | genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ |
11 | partitions/ | ||
11 | 12 | ||
13 | obj-$(CONFIG_BOUNCE) += bounce.o | ||
12 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 14 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
13 | obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o | 15 | obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o |
14 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o | 16 | obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o |
@@ -20,3 +22,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o | |||
20 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o | 22 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o |
21 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o | 23 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o |
22 | obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o | 24 | obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o |
25 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o | ||
diff --git a/fs/bio-integrity.c b/block/bio-integrity.c index 1c2ce0c87711..9e241063a616 100644 --- a/fs/bio-integrity.c +++ b/block/bio-integrity.c | |||
@@ -617,7 +617,7 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size) | |||
617 | if (!bs->bio_integrity_pool) | 617 | if (!bs->bio_integrity_pool) |
618 | return -1; | 618 | return -1; |
619 | 619 | ||
620 | bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size); | 620 | bs->bvec_integrity_pool = biovec_create_pool(pool_size); |
621 | if (!bs->bvec_integrity_pool) { | 621 | if (!bs->bvec_integrity_pool) { |
622 | mempool_destroy(bs->bio_integrity_pool); | 622 | mempool_destroy(bs->bio_integrity_pool); |
623 | return -1; | 623 | return -1; |
diff --git a/fs/bio.c b/block/bio.c index 6f0362b77806..96d28eee8a1e 100644 --- a/fs/bio.c +++ b/block/bio.c | |||
@@ -305,6 +305,8 @@ static void bio_chain_endio(struct bio *bio, int error) | |||
305 | 305 | ||
306 | /** | 306 | /** |
307 | * bio_chain - chain bio completions | 307 | * bio_chain - chain bio completions |
308 | * @bio: the target bio | ||
309 | * @parent: the @bio's parent bio | ||
308 | * | 310 | * |
309 | * The caller won't have a bi_end_io called when @bio completes - instead, | 311 | * The caller won't have a bi_end_io called when @bio completes - instead, |
310 | * @parent's bi_end_io won't be called until both @parent and @bio have | 312 | * @parent's bi_end_io won't be called until both @parent and @bio have |
@@ -1011,8 +1013,7 @@ static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, | |||
1011 | bio->bi_private = bmd; | 1013 | bio->bi_private = bmd; |
1012 | } | 1014 | } |
1013 | 1015 | ||
1014 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, | 1016 | static struct bio_map_data *bio_alloc_map_data(unsigned int iov_count, |
1015 | unsigned int iov_count, | ||
1016 | gfp_t gfp_mask) | 1017 | gfp_t gfp_mask) |
1017 | { | 1018 | { |
1018 | if (iov_count > UIO_MAXIOV) | 1019 | if (iov_count > UIO_MAXIOV) |
@@ -1154,7 +1155,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, | |||
1154 | if (offset) | 1155 | if (offset) |
1155 | nr_pages++; | 1156 | nr_pages++; |
1156 | 1157 | ||
1157 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); | 1158 | bmd = bio_alloc_map_data(iov_count, gfp_mask); |
1158 | if (!bmd) | 1159 | if (!bmd) |
1159 | return ERR_PTR(-ENOMEM); | 1160 | return ERR_PTR(-ENOMEM); |
1160 | 1161 | ||
@@ -1859,7 +1860,7 @@ EXPORT_SYMBOL_GPL(bio_trim); | |||
1859 | * create memory pools for biovec's in a bio_set. | 1860 | * create memory pools for biovec's in a bio_set. |
1860 | * use the global biovec slabs created for general use. | 1861 | * use the global biovec slabs created for general use. |
1861 | */ | 1862 | */ |
1862 | mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries) | 1863 | mempool_t *biovec_create_pool(int pool_entries) |
1863 | { | 1864 | { |
1864 | struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; | 1865 | struct biovec_slab *bp = bvec_slabs + BIOVEC_MAX_IDX; |
1865 | 1866 | ||
@@ -1922,7 +1923,7 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad) | |||
1922 | if (!bs->bio_pool) | 1923 | if (!bs->bio_pool) |
1923 | goto bad; | 1924 | goto bad; |
1924 | 1925 | ||
1925 | bs->bvec_pool = biovec_create_pool(bs, pool_size); | 1926 | bs->bvec_pool = biovec_create_pool(pool_size); |
1926 | if (!bs->bvec_pool) | 1927 | if (!bs->bvec_pool) |
1927 | goto bad; | 1928 | goto bad; |
1928 | 1929 | ||
diff --git a/block/blk-core.c b/block/blk-core.c index a0e3096c4bb5..d87be5b4e554 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -146,8 +146,8 @@ void blk_dump_rq_flags(struct request *rq, char *msg) | |||
146 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", | 146 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", |
147 | (unsigned long long)blk_rq_pos(rq), | 147 | (unsigned long long)blk_rq_pos(rq), |
148 | blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); | 148 | blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); |
149 | printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", | 149 | printk(KERN_INFO " bio %p, biotail %p, len %u\n", |
150 | rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); | 150 | rq->bio, rq->biotail, blk_rq_bytes(rq)); |
151 | 151 | ||
152 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { | 152 | if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { |
153 | printk(KERN_INFO " cdb: "); | 153 | printk(KERN_INFO " cdb: "); |
@@ -251,8 +251,10 @@ void blk_sync_queue(struct request_queue *q) | |||
251 | struct blk_mq_hw_ctx *hctx; | 251 | struct blk_mq_hw_ctx *hctx; |
252 | int i; | 252 | int i; |
253 | 253 | ||
254 | queue_for_each_hw_ctx(q, hctx, i) | 254 | queue_for_each_hw_ctx(q, hctx, i) { |
255 | cancel_delayed_work_sync(&hctx->delayed_work); | 255 | cancel_delayed_work_sync(&hctx->run_work); |
256 | cancel_delayed_work_sync(&hctx->delay_work); | ||
257 | } | ||
256 | } else { | 258 | } else { |
257 | cancel_delayed_work_sync(&q->delay_work); | 259 | cancel_delayed_work_sync(&q->delay_work); |
258 | } | 260 | } |
@@ -574,12 +576,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
574 | if (!q) | 576 | if (!q) |
575 | return NULL; | 577 | return NULL; |
576 | 578 | ||
577 | if (percpu_counter_init(&q->mq_usage_counter, 0)) | ||
578 | goto fail_q; | ||
579 | |||
580 | q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); | 579 | q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); |
581 | if (q->id < 0) | 580 | if (q->id < 0) |
582 | goto fail_c; | 581 | goto fail_q; |
583 | 582 | ||
584 | q->backing_dev_info.ra_pages = | 583 | q->backing_dev_info.ra_pages = |
585 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 584 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
@@ -637,8 +636,6 @@ fail_bdi: | |||
637 | bdi_destroy(&q->backing_dev_info); | 636 | bdi_destroy(&q->backing_dev_info); |
638 | fail_id: | 637 | fail_id: |
639 | ida_simple_remove(&blk_queue_ida, q->id); | 638 | ida_simple_remove(&blk_queue_ida, q->id); |
640 | fail_c: | ||
641 | percpu_counter_destroy(&q->mq_usage_counter); | ||
642 | fail_q: | 639 | fail_q: |
643 | kmem_cache_free(blk_requestq_cachep, q); | 640 | kmem_cache_free(blk_requestq_cachep, q); |
644 | return NULL; | 641 | return NULL; |
@@ -846,6 +843,47 @@ static void freed_request(struct request_list *rl, unsigned int flags) | |||
846 | __freed_request(rl, sync ^ 1); | 843 | __freed_request(rl, sync ^ 1); |
847 | } | 844 | } |
848 | 845 | ||
846 | int blk_update_nr_requests(struct request_queue *q, unsigned int nr) | ||
847 | { | ||
848 | struct request_list *rl; | ||
849 | |||
850 | spin_lock_irq(q->queue_lock); | ||
851 | q->nr_requests = nr; | ||
852 | blk_queue_congestion_threshold(q); | ||
853 | |||
854 | /* congestion isn't cgroup aware and follows root blkcg for now */ | ||
855 | rl = &q->root_rl; | ||
856 | |||
857 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) | ||
858 | blk_set_queue_congested(q, BLK_RW_SYNC); | ||
859 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) | ||
860 | blk_clear_queue_congested(q, BLK_RW_SYNC); | ||
861 | |||
862 | if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) | ||
863 | blk_set_queue_congested(q, BLK_RW_ASYNC); | ||
864 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) | ||
865 | blk_clear_queue_congested(q, BLK_RW_ASYNC); | ||
866 | |||
867 | blk_queue_for_each_rl(rl, q) { | ||
868 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | ||
869 | blk_set_rl_full(rl, BLK_RW_SYNC); | ||
870 | } else { | ||
871 | blk_clear_rl_full(rl, BLK_RW_SYNC); | ||
872 | wake_up(&rl->wait[BLK_RW_SYNC]); | ||
873 | } | ||
874 | |||
875 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
876 | blk_set_rl_full(rl, BLK_RW_ASYNC); | ||
877 | } else { | ||
878 | blk_clear_rl_full(rl, BLK_RW_ASYNC); | ||
879 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
880 | } | ||
881 | } | ||
882 | |||
883 | spin_unlock_irq(q->queue_lock); | ||
884 | return 0; | ||
885 | } | ||
886 | |||
849 | /* | 887 | /* |
850 | * Determine if elevator data should be initialized when allocating the | 888 | * Determine if elevator data should be initialized when allocating the |
851 | * request associated with @bio. | 889 | * request associated with @bio. |
@@ -1135,7 +1173,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, | |||
1135 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 1173 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
1136 | { | 1174 | { |
1137 | if (q->mq_ops) | 1175 | if (q->mq_ops) |
1138 | return blk_mq_alloc_request(q, rw, gfp_mask); | 1176 | return blk_mq_alloc_request(q, rw, gfp_mask, false); |
1139 | else | 1177 | else |
1140 | return blk_old_get_request(q, rw, gfp_mask); | 1178 | return blk_old_get_request(q, rw, gfp_mask); |
1141 | } | 1179 | } |
@@ -1231,12 +1269,15 @@ static void add_acct_request(struct request_queue *q, struct request *rq, | |||
1231 | static void part_round_stats_single(int cpu, struct hd_struct *part, | 1269 | static void part_round_stats_single(int cpu, struct hd_struct *part, |
1232 | unsigned long now) | 1270 | unsigned long now) |
1233 | { | 1271 | { |
1272 | int inflight; | ||
1273 | |||
1234 | if (now == part->stamp) | 1274 | if (now == part->stamp) |
1235 | return; | 1275 | return; |
1236 | 1276 | ||
1237 | if (part_in_flight(part)) { | 1277 | inflight = part_in_flight(part); |
1278 | if (inflight) { | ||
1238 | __part_stat_add(cpu, part, time_in_queue, | 1279 | __part_stat_add(cpu, part, time_in_queue, |
1239 | part_in_flight(part) * (now - part->stamp)); | 1280 | inflight * (now - part->stamp)); |
1240 | __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); | 1281 | __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); |
1241 | } | 1282 | } |
1242 | part->stamp = now; | 1283 | part->stamp = now; |
@@ -1360,7 +1401,6 @@ void blk_add_request_payload(struct request *rq, struct page *page, | |||
1360 | 1401 | ||
1361 | rq->__data_len = rq->resid_len = len; | 1402 | rq->__data_len = rq->resid_len = len; |
1362 | rq->nr_phys_segments = 1; | 1403 | rq->nr_phys_segments = 1; |
1363 | rq->buffer = bio_data(bio); | ||
1364 | } | 1404 | } |
1365 | EXPORT_SYMBOL_GPL(blk_add_request_payload); | 1405 | EXPORT_SYMBOL_GPL(blk_add_request_payload); |
1366 | 1406 | ||
@@ -1402,12 +1442,6 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, | |||
1402 | bio->bi_next = req->bio; | 1442 | bio->bi_next = req->bio; |
1403 | req->bio = bio; | 1443 | req->bio = bio; |
1404 | 1444 | ||
1405 | /* | ||
1406 | * may not be valid. if the low level driver said | ||
1407 | * it didn't need a bounce buffer then it better | ||
1408 | * not touch req->buffer either... | ||
1409 | */ | ||
1410 | req->buffer = bio_data(bio); | ||
1411 | req->__sector = bio->bi_iter.bi_sector; | 1445 | req->__sector = bio->bi_iter.bi_sector; |
1412 | req->__data_len += bio->bi_iter.bi_size; | 1446 | req->__data_len += bio->bi_iter.bi_size; |
1413 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | 1447 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
@@ -1432,6 +1466,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, | |||
1432 | * added on the elevator at this point. In addition, we don't have | 1466 | * added on the elevator at this point. In addition, we don't have |
1433 | * reliable access to the elevator outside queue lock. Only check basic | 1467 | * reliable access to the elevator outside queue lock. Only check basic |
1434 | * merging parameters without querying the elevator. | 1468 | * merging parameters without querying the elevator. |
1469 | * | ||
1470 | * Caller must ensure !blk_queue_nomerges(q) beforehand. | ||
1435 | */ | 1471 | */ |
1436 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, | 1472 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, |
1437 | unsigned int *request_count) | 1473 | unsigned int *request_count) |
@@ -1441,9 +1477,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, | |||
1441 | bool ret = false; | 1477 | bool ret = false; |
1442 | struct list_head *plug_list; | 1478 | struct list_head *plug_list; |
1443 | 1479 | ||
1444 | if (blk_queue_nomerges(q)) | ||
1445 | goto out; | ||
1446 | |||
1447 | plug = current->plug; | 1480 | plug = current->plug; |
1448 | if (!plug) | 1481 | if (!plug) |
1449 | goto out; | 1482 | goto out; |
@@ -1522,7 +1555,8 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) | |||
1522 | * Check if we can merge with the plugged list before grabbing | 1555 | * Check if we can merge with the plugged list before grabbing |
1523 | * any locks. | 1556 | * any locks. |
1524 | */ | 1557 | */ |
1525 | if (blk_attempt_plug_merge(q, bio, &request_count)) | 1558 | if (!blk_queue_nomerges(q) && |
1559 | blk_attempt_plug_merge(q, bio, &request_count)) | ||
1526 | return; | 1560 | return; |
1527 | 1561 | ||
1528 | spin_lock_irq(q->queue_lock); | 1562 | spin_lock_irq(q->queue_lock); |
@@ -1654,7 +1688,7 @@ static int __init fail_make_request_debugfs(void) | |||
1654 | struct dentry *dir = fault_create_debugfs_attr("fail_make_request", | 1688 | struct dentry *dir = fault_create_debugfs_attr("fail_make_request", |
1655 | NULL, &fail_make_request); | 1689 | NULL, &fail_make_request); |
1656 | 1690 | ||
1657 | return IS_ERR(dir) ? PTR_ERR(dir) : 0; | 1691 | return PTR_ERR_OR_ZERO(dir); |
1658 | } | 1692 | } |
1659 | 1693 | ||
1660 | late_initcall(fail_make_request_debugfs); | 1694 | late_initcall(fail_make_request_debugfs); |
@@ -2434,7 +2468,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | |||
2434 | } | 2468 | } |
2435 | 2469 | ||
2436 | req->__data_len -= total_bytes; | 2470 | req->__data_len -= total_bytes; |
2437 | req->buffer = bio_data(req->bio); | ||
2438 | 2471 | ||
2439 | /* update sector only for requests with clear definition of sector */ | 2472 | /* update sector only for requests with clear definition of sector */ |
2440 | if (req->cmd_type == REQ_TYPE_FS) | 2473 | if (req->cmd_type == REQ_TYPE_FS) |
@@ -2503,7 +2536,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request); | |||
2503 | /* | 2536 | /* |
2504 | * queue lock must be held | 2537 | * queue lock must be held |
2505 | */ | 2538 | */ |
2506 | static void blk_finish_request(struct request *req, int error) | 2539 | void blk_finish_request(struct request *req, int error) |
2507 | { | 2540 | { |
2508 | if (blk_rq_tagged(req)) | 2541 | if (blk_rq_tagged(req)) |
2509 | blk_queue_end_tag(req->q, req); | 2542 | blk_queue_end_tag(req->q, req); |
@@ -2529,6 +2562,7 @@ static void blk_finish_request(struct request *req, int error) | |||
2529 | __blk_put_request(req->q, req); | 2562 | __blk_put_request(req->q, req); |
2530 | } | 2563 | } |
2531 | } | 2564 | } |
2565 | EXPORT_SYMBOL(blk_finish_request); | ||
2532 | 2566 | ||
2533 | /** | 2567 | /** |
2534 | * blk_end_bidi_request - Complete a bidi request | 2568 | * blk_end_bidi_request - Complete a bidi request |
@@ -2752,10 +2786,9 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, | |||
2752 | /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ | 2786 | /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ |
2753 | rq->cmd_flags |= bio->bi_rw & REQ_WRITE; | 2787 | rq->cmd_flags |= bio->bi_rw & REQ_WRITE; |
2754 | 2788 | ||
2755 | if (bio_has_data(bio)) { | 2789 | if (bio_has_data(bio)) |
2756 | rq->nr_phys_segments = bio_phys_segments(q, bio); | 2790 | rq->nr_phys_segments = bio_phys_segments(q, bio); |
2757 | rq->buffer = bio_data(bio); | 2791 | |
2758 | } | ||
2759 | rq->__data_len = bio->bi_iter.bi_size; | 2792 | rq->__data_len = bio->bi_iter.bi_size; |
2760 | rq->bio = rq->biotail = bio; | 2793 | rq->bio = rq->biotail = bio; |
2761 | 2794 | ||
@@ -2831,7 +2864,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); | |||
2831 | 2864 | ||
2832 | /* | 2865 | /* |
2833 | * Copy attributes of the original request to the clone request. | 2866 | * Copy attributes of the original request to the clone request. |
2834 | * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. | 2867 | * The actual data parts (e.g. ->cmd, ->sense) are not copied. |
2835 | */ | 2868 | */ |
2836 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) | 2869 | static void __blk_rq_prep_clone(struct request *dst, struct request *src) |
2837 | { | 2870 | { |
@@ -2857,7 +2890,7 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src) | |||
2857 | * | 2890 | * |
2858 | * Description: | 2891 | * Description: |
2859 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. | 2892 | * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. |
2860 | * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) | 2893 | * The actual data parts of @rq_src (e.g. ->cmd, ->sense) |
2861 | * are not copied, and copying such parts is the caller's responsibility. | 2894 | * are not copied, and copying such parts is the caller's responsibility. |
2862 | * Also, pages which the original bios are pointing to are not copied | 2895 | * Also, pages which the original bios are pointing to are not copied |
2863 | * and the cloned bios just point same pages. | 2896 | * and the cloned bios just point same pages. |
@@ -2904,19 +2937,26 @@ free_and_out: | |||
2904 | } | 2937 | } |
2905 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); | 2938 | EXPORT_SYMBOL_GPL(blk_rq_prep_clone); |
2906 | 2939 | ||
2907 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) | 2940 | int kblockd_schedule_work(struct work_struct *work) |
2908 | { | 2941 | { |
2909 | return queue_work(kblockd_workqueue, work); | 2942 | return queue_work(kblockd_workqueue, work); |
2910 | } | 2943 | } |
2911 | EXPORT_SYMBOL(kblockd_schedule_work); | 2944 | EXPORT_SYMBOL(kblockd_schedule_work); |
2912 | 2945 | ||
2913 | int kblockd_schedule_delayed_work(struct request_queue *q, | 2946 | int kblockd_schedule_delayed_work(struct delayed_work *dwork, |
2914 | struct delayed_work *dwork, unsigned long delay) | 2947 | unsigned long delay) |
2915 | { | 2948 | { |
2916 | return queue_delayed_work(kblockd_workqueue, dwork, delay); | 2949 | return queue_delayed_work(kblockd_workqueue, dwork, delay); |
2917 | } | 2950 | } |
2918 | EXPORT_SYMBOL(kblockd_schedule_delayed_work); | 2951 | EXPORT_SYMBOL(kblockd_schedule_delayed_work); |
2919 | 2952 | ||
2953 | int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, | ||
2954 | unsigned long delay) | ||
2955 | { | ||
2956 | return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); | ||
2957 | } | ||
2958 | EXPORT_SYMBOL(kblockd_schedule_delayed_work_on); | ||
2959 | |||
2920 | #define PLUG_MAGIC 0x91827364 | 2960 | #define PLUG_MAGIC 0x91827364 |
2921 | 2961 | ||
2922 | /** | 2962 | /** |
diff --git a/block/blk-flush.c b/block/blk-flush.c index 43e6b4755e9a..ef608b35d9be 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -130,21 +130,13 @@ static void blk_flush_restore_request(struct request *rq) | |||
130 | blk_clear_rq_complete(rq); | 130 | blk_clear_rq_complete(rq); |
131 | } | 131 | } |
132 | 132 | ||
133 | static void mq_flush_run(struct work_struct *work) | ||
134 | { | ||
135 | struct request *rq; | ||
136 | |||
137 | rq = container_of(work, struct request, mq_flush_work); | ||
138 | |||
139 | memset(&rq->csd, 0, sizeof(rq->csd)); | ||
140 | blk_mq_insert_request(rq, false, true, false); | ||
141 | } | ||
142 | |||
143 | static bool blk_flush_queue_rq(struct request *rq, bool add_front) | 133 | static bool blk_flush_queue_rq(struct request *rq, bool add_front) |
144 | { | 134 | { |
145 | if (rq->q->mq_ops) { | 135 | if (rq->q->mq_ops) { |
146 | INIT_WORK(&rq->mq_flush_work, mq_flush_run); | 136 | struct request_queue *q = rq->q; |
147 | kblockd_schedule_work(rq->q, &rq->mq_flush_work); | 137 | |
138 | blk_mq_add_to_requeue_list(rq, add_front); | ||
139 | blk_mq_kick_requeue_list(q); | ||
148 | return false; | 140 | return false; |
149 | } else { | 141 | } else { |
150 | if (add_front) | 142 | if (add_front) |
@@ -306,23 +298,9 @@ static bool blk_kick_flush(struct request_queue *q) | |||
306 | */ | 298 | */ |
307 | q->flush_pending_idx ^= 1; | 299 | q->flush_pending_idx ^= 1; |
308 | 300 | ||
309 | if (q->mq_ops) { | 301 | blk_rq_init(q, q->flush_rq); |
310 | struct blk_mq_ctx *ctx = first_rq->mq_ctx; | 302 | if (q->mq_ops) |
311 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); | 303 | blk_mq_clone_flush_request(q->flush_rq, first_rq); |
312 | |||
313 | blk_mq_rq_init(hctx, q->flush_rq); | ||
314 | q->flush_rq->mq_ctx = ctx; | ||
315 | |||
316 | /* | ||
317 | * Reuse the tag value from the fist waiting request, | ||
318 | * with blk-mq the tag is generated during request | ||
319 | * allocation and drivers can rely on it being inside | ||
320 | * the range they asked for. | ||
321 | */ | ||
322 | q->flush_rq->tag = first_rq->tag; | ||
323 | } else { | ||
324 | blk_rq_init(q, q->flush_rq); | ||
325 | } | ||
326 | 304 | ||
327 | q->flush_rq->cmd_type = REQ_TYPE_FS; | 305 | q->flush_rq->cmd_type = REQ_TYPE_FS; |
328 | q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; | 306 | q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; |
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index c11d24e379e2..d828b44a404b 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c | |||
@@ -64,12 +64,12 @@ EXPORT_SYMBOL(__blk_iopoll_complete); | |||
64 | * iopoll handler will not be invoked again before blk_iopoll_sched_prep() | 64 | * iopoll handler will not be invoked again before blk_iopoll_sched_prep() |
65 | * is called. | 65 | * is called. |
66 | **/ | 66 | **/ |
67 | void blk_iopoll_complete(struct blk_iopoll *iopoll) | 67 | void blk_iopoll_complete(struct blk_iopoll *iop) |
68 | { | 68 | { |
69 | unsigned long flags; | 69 | unsigned long flags; |
70 | 70 | ||
71 | local_irq_save(flags); | 71 | local_irq_save(flags); |
72 | __blk_iopoll_complete(iopoll); | 72 | __blk_iopoll_complete(iop); |
73 | local_irq_restore(flags); | 73 | local_irq_restore(flags); |
74 | } | 74 | } |
75 | EXPORT_SYMBOL(blk_iopoll_complete); | 75 | EXPORT_SYMBOL(blk_iopoll_complete); |
diff --git a/block/blk-lib.c b/block/blk-lib.c index 97a733cf3d5f..8411be3c19d3 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -226,8 +226,8 @@ EXPORT_SYMBOL(blkdev_issue_write_same); | |||
226 | * Generate and issue number of bios with zerofiled pages. | 226 | * Generate and issue number of bios with zerofiled pages. |
227 | */ | 227 | */ |
228 | 228 | ||
229 | int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 229 | static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
230 | sector_t nr_sects, gfp_t gfp_mask) | 230 | sector_t nr_sects, gfp_t gfp_mask) |
231 | { | 231 | { |
232 | int ret; | 232 | int ret; |
233 | struct bio *bio; | 233 | struct bio *bio; |
diff --git a/block/blk-map.c b/block/blk-map.c index f7b22bc21518..f890d4345b0c 100644 --- a/block/blk-map.c +++ b/block/blk-map.c | |||
@@ -155,7 +155,6 @@ int blk_rq_map_user(struct request_queue *q, struct request *rq, | |||
155 | if (!bio_flagged(bio, BIO_USER_MAPPED)) | 155 | if (!bio_flagged(bio, BIO_USER_MAPPED)) |
156 | rq->cmd_flags |= REQ_COPY_USER; | 156 | rq->cmd_flags |= REQ_COPY_USER; |
157 | 157 | ||
158 | rq->buffer = NULL; | ||
159 | return 0; | 158 | return 0; |
160 | unmap_rq: | 159 | unmap_rq: |
161 | blk_rq_unmap_user(bio); | 160 | blk_rq_unmap_user(bio); |
@@ -238,7 +237,6 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, | |||
238 | blk_queue_bounce(q, &bio); | 237 | blk_queue_bounce(q, &bio); |
239 | bio_get(bio); | 238 | bio_get(bio); |
240 | blk_rq_bio_prep(q, rq, bio); | 239 | blk_rq_bio_prep(q, rq, bio); |
241 | rq->buffer = NULL; | ||
242 | return 0; | 240 | return 0; |
243 | } | 241 | } |
244 | EXPORT_SYMBOL(blk_rq_map_user_iov); | 242 | EXPORT_SYMBOL(blk_rq_map_user_iov); |
@@ -325,7 +323,6 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, | |||
325 | } | 323 | } |
326 | 324 | ||
327 | blk_queue_bounce(q, &rq->bio); | 325 | blk_queue_bounce(q, &rq->bio); |
328 | rq->buffer = NULL; | ||
329 | return 0; | 326 | return 0; |
330 | } | 327 | } |
331 | EXPORT_SYMBOL(blk_rq_map_kern); | 328 | EXPORT_SYMBOL(blk_rq_map_kern); |
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c index 136ef8643bba..bb3ed488f7b5 100644 --- a/block/blk-mq-cpu.c +++ b/block/blk-mq-cpu.c | |||
@@ -1,3 +1,8 @@ | |||
1 | /* | ||
2 | * CPU notifier helper code for blk-mq | ||
3 | * | ||
4 | * Copyright (C) 2013-2014 Jens Axboe | ||
5 | */ | ||
1 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 7 | #include <linux/module.h> |
3 | #include <linux/init.h> | 8 | #include <linux/init.h> |
@@ -18,14 +23,18 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self, | |||
18 | { | 23 | { |
19 | unsigned int cpu = (unsigned long) hcpu; | 24 | unsigned int cpu = (unsigned long) hcpu; |
20 | struct blk_mq_cpu_notifier *notify; | 25 | struct blk_mq_cpu_notifier *notify; |
26 | int ret = NOTIFY_OK; | ||
21 | 27 | ||
22 | raw_spin_lock(&blk_mq_cpu_notify_lock); | 28 | raw_spin_lock(&blk_mq_cpu_notify_lock); |
23 | 29 | ||
24 | list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) | 30 | list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) { |
25 | notify->notify(notify->data, action, cpu); | 31 | ret = notify->notify(notify->data, action, cpu); |
32 | if (ret != NOTIFY_OK) | ||
33 | break; | ||
34 | } | ||
26 | 35 | ||
27 | raw_spin_unlock(&blk_mq_cpu_notify_lock); | 36 | raw_spin_unlock(&blk_mq_cpu_notify_lock); |
28 | return NOTIFY_OK; | 37 | return ret; |
29 | } | 38 | } |
30 | 39 | ||
31 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | 40 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) |
@@ -45,7 +54,7 @@ void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | |||
45 | } | 54 | } |
46 | 55 | ||
47 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | 56 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, |
48 | void (*fn)(void *, unsigned long, unsigned int), | 57 | int (*fn)(void *, unsigned long, unsigned int), |
49 | void *data) | 58 | void *data) |
50 | { | 59 | { |
51 | notifier->notify = fn; | 60 | notifier->notify = fn; |
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 097921329619..1065d7c65fa1 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c | |||
@@ -1,3 +1,8 @@ | |||
1 | /* | ||
2 | * CPU <-> hardware queue mapping helpers | ||
3 | * | ||
4 | * Copyright (C) 2013-2014 Jens Axboe | ||
5 | */ | ||
1 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
2 | #include <linux/threads.h> | 7 | #include <linux/threads.h> |
3 | #include <linux/module.h> | 8 | #include <linux/module.h> |
@@ -80,19 +85,35 @@ int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) | |||
80 | return 0; | 85 | return 0; |
81 | } | 86 | } |
82 | 87 | ||
83 | unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg) | 88 | unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) |
84 | { | 89 | { |
85 | unsigned int *map; | 90 | unsigned int *map; |
86 | 91 | ||
87 | /* If cpus are offline, map them to first hctx */ | 92 | /* If cpus are offline, map them to first hctx */ |
88 | map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, | 93 | map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, |
89 | reg->numa_node); | 94 | set->numa_node); |
90 | if (!map) | 95 | if (!map) |
91 | return NULL; | 96 | return NULL; |
92 | 97 | ||
93 | if (!blk_mq_update_queue_map(map, reg->nr_hw_queues)) | 98 | if (!blk_mq_update_queue_map(map, set->nr_hw_queues)) |
94 | return map; | 99 | return map; |
95 | 100 | ||
96 | kfree(map); | 101 | kfree(map); |
97 | return NULL; | 102 | return NULL; |
98 | } | 103 | } |
104 | |||
105 | /* | ||
106 | * We have no quick way of doing reverse lookups. This is only used at | ||
107 | * queue init time, so runtime isn't important. | ||
108 | */ | ||
109 | int blk_mq_hw_queue_to_node(unsigned int *mq_map, unsigned int index) | ||
110 | { | ||
111 | int i; | ||
112 | |||
113 | for_each_possible_cpu(i) { | ||
114 | if (index == mq_map[i]) | ||
115 | return cpu_to_node(i); | ||
116 | } | ||
117 | |||
118 | return NUMA_NO_NODE; | ||
119 | } | ||
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index b0ba264b0522..99a60a829e69 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
@@ -203,59 +203,24 @@ static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx, | |||
203 | return ret; | 203 | return ret; |
204 | } | 204 | } |
205 | 205 | ||
206 | static ssize_t blk_mq_hw_sysfs_ipi_show(struct blk_mq_hw_ctx *hctx, char *page) | 206 | static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) |
207 | { | ||
208 | ssize_t ret; | ||
209 | |||
210 | spin_lock(&hctx->lock); | ||
211 | ret = sprintf(page, "%u\n", !!(hctx->flags & BLK_MQ_F_SHOULD_IPI)); | ||
212 | spin_unlock(&hctx->lock); | ||
213 | |||
214 | return ret; | ||
215 | } | ||
216 | |||
217 | static ssize_t blk_mq_hw_sysfs_ipi_store(struct blk_mq_hw_ctx *hctx, | ||
218 | const char *page, size_t len) | ||
219 | { | 207 | { |
220 | struct blk_mq_ctx *ctx; | 208 | return blk_mq_tag_sysfs_show(hctx->tags, page); |
221 | unsigned long ret; | ||
222 | unsigned int i; | ||
223 | |||
224 | if (kstrtoul(page, 10, &ret)) { | ||
225 | pr_err("blk-mq-sysfs: invalid input '%s'\n", page); | ||
226 | return -EINVAL; | ||
227 | } | ||
228 | |||
229 | spin_lock(&hctx->lock); | ||
230 | if (ret) | ||
231 | hctx->flags |= BLK_MQ_F_SHOULD_IPI; | ||
232 | else | ||
233 | hctx->flags &= ~BLK_MQ_F_SHOULD_IPI; | ||
234 | spin_unlock(&hctx->lock); | ||
235 | |||
236 | hctx_for_each_ctx(hctx, ctx, i) | ||
237 | ctx->ipi_redirect = !!ret; | ||
238 | |||
239 | return len; | ||
240 | } | 209 | } |
241 | 210 | ||
242 | static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) | 211 | static ssize_t blk_mq_hw_sysfs_active_show(struct blk_mq_hw_ctx *hctx, char *page) |
243 | { | 212 | { |
244 | return blk_mq_tag_sysfs_show(hctx->tags, page); | 213 | return sprintf(page, "%u\n", atomic_read(&hctx->nr_active)); |
245 | } | 214 | } |
246 | 215 | ||
247 | static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) | 216 | static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) |
248 | { | 217 | { |
249 | unsigned int i, queue_num, first = 1; | 218 | unsigned int i, first = 1; |
250 | ssize_t ret = 0; | 219 | ssize_t ret = 0; |
251 | 220 | ||
252 | blk_mq_disable_hotplug(); | 221 | blk_mq_disable_hotplug(); |
253 | 222 | ||
254 | for_each_online_cpu(i) { | 223 | for_each_cpu(i, hctx->cpumask) { |
255 | queue_num = hctx->queue->mq_map[i]; | ||
256 | if (queue_num != hctx->queue_num) | ||
257 | continue; | ||
258 | |||
259 | if (first) | 224 | if (first) |
260 | ret += sprintf(ret + page, "%u", i); | 225 | ret += sprintf(ret + page, "%u", i); |
261 | else | 226 | else |
@@ -307,15 +272,14 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = { | |||
307 | .attr = {.name = "dispatched", .mode = S_IRUGO }, | 272 | .attr = {.name = "dispatched", .mode = S_IRUGO }, |
308 | .show = blk_mq_hw_sysfs_dispatched_show, | 273 | .show = blk_mq_hw_sysfs_dispatched_show, |
309 | }; | 274 | }; |
275 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_active = { | ||
276 | .attr = {.name = "active", .mode = S_IRUGO }, | ||
277 | .show = blk_mq_hw_sysfs_active_show, | ||
278 | }; | ||
310 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { | 279 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { |
311 | .attr = {.name = "pending", .mode = S_IRUGO }, | 280 | .attr = {.name = "pending", .mode = S_IRUGO }, |
312 | .show = blk_mq_hw_sysfs_rq_list_show, | 281 | .show = blk_mq_hw_sysfs_rq_list_show, |
313 | }; | 282 | }; |
314 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_ipi = { | ||
315 | .attr = {.name = "ipi_redirect", .mode = S_IRUGO | S_IWUSR}, | ||
316 | .show = blk_mq_hw_sysfs_ipi_show, | ||
317 | .store = blk_mq_hw_sysfs_ipi_store, | ||
318 | }; | ||
319 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { | 283 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { |
320 | .attr = {.name = "tags", .mode = S_IRUGO }, | 284 | .attr = {.name = "tags", .mode = S_IRUGO }, |
321 | .show = blk_mq_hw_sysfs_tags_show, | 285 | .show = blk_mq_hw_sysfs_tags_show, |
@@ -330,9 +294,9 @@ static struct attribute *default_hw_ctx_attrs[] = { | |||
330 | &blk_mq_hw_sysfs_run.attr, | 294 | &blk_mq_hw_sysfs_run.attr, |
331 | &blk_mq_hw_sysfs_dispatched.attr, | 295 | &blk_mq_hw_sysfs_dispatched.attr, |
332 | &blk_mq_hw_sysfs_pending.attr, | 296 | &blk_mq_hw_sysfs_pending.attr, |
333 | &blk_mq_hw_sysfs_ipi.attr, | ||
334 | &blk_mq_hw_sysfs_tags.attr, | 297 | &blk_mq_hw_sysfs_tags.attr, |
335 | &blk_mq_hw_sysfs_cpus.attr, | 298 | &blk_mq_hw_sysfs_cpus.attr, |
299 | &blk_mq_hw_sysfs_active.attr, | ||
336 | NULL, | 300 | NULL, |
337 | }; | 301 | }; |
338 | 302 | ||
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 83ae96c51a27..d90c4aeb7dd3 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -1,78 +1,345 @@ | |||
1 | /* | ||
2 | * Fast and scalable bitmap tagging variant. Uses sparser bitmaps spread | ||
3 | * over multiple cachelines to avoid ping-pong between multiple submitters | ||
4 | * or submitter and completer. Uses rolling wakeups to avoid falling of | ||
5 | * the scaling cliff when we run out of tags and have to start putting | ||
6 | * submitters to sleep. | ||
7 | * | ||
8 | * Uses active queue tracking to support fairer distribution of tags | ||
9 | * between multiple submitters when a shared tag map is used. | ||
10 | * | ||
11 | * Copyright (C) 2013-2014 Jens Axboe | ||
12 | */ | ||
1 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 14 | #include <linux/module.h> |
3 | #include <linux/percpu_ida.h> | 15 | #include <linux/random.h> |
4 | 16 | ||
5 | #include <linux/blk-mq.h> | 17 | #include <linux/blk-mq.h> |
6 | #include "blk.h" | 18 | #include "blk.h" |
7 | #include "blk-mq.h" | 19 | #include "blk-mq.h" |
8 | #include "blk-mq-tag.h" | 20 | #include "blk-mq-tag.h" |
9 | 21 | ||
22 | static bool bt_has_free_tags(struct blk_mq_bitmap_tags *bt) | ||
23 | { | ||
24 | int i; | ||
25 | |||
26 | for (i = 0; i < bt->map_nr; i++) { | ||
27 | struct blk_align_bitmap *bm = &bt->map[i]; | ||
28 | int ret; | ||
29 | |||
30 | ret = find_first_zero_bit(&bm->word, bm->depth); | ||
31 | if (ret < bm->depth) | ||
32 | return true; | ||
33 | } | ||
34 | |||
35 | return false; | ||
36 | } | ||
37 | |||
38 | bool blk_mq_has_free_tags(struct blk_mq_tags *tags) | ||
39 | { | ||
40 | if (!tags) | ||
41 | return true; | ||
42 | |||
43 | return bt_has_free_tags(&tags->bitmap_tags); | ||
44 | } | ||
45 | |||
46 | static inline void bt_index_inc(unsigned int *index) | ||
47 | { | ||
48 | *index = (*index + 1) & (BT_WAIT_QUEUES - 1); | ||
49 | } | ||
50 | |||
10 | /* | 51 | /* |
11 | * Per tagged queue (tag address space) map | 52 | * If a previously inactive queue goes active, bump the active user count. |
12 | */ | 53 | */ |
13 | struct blk_mq_tags { | 54 | bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) |
14 | unsigned int nr_tags; | 55 | { |
15 | unsigned int nr_reserved_tags; | 56 | if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) && |
16 | unsigned int nr_batch_move; | 57 | !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) |
17 | unsigned int nr_max_cache; | 58 | atomic_inc(&hctx->tags->active_queues); |
18 | 59 | ||
19 | struct percpu_ida free_tags; | 60 | return true; |
20 | struct percpu_ida reserved_tags; | 61 | } |
21 | }; | ||
22 | 62 | ||
23 | void blk_mq_wait_for_tags(struct blk_mq_tags *tags) | 63 | /* |
64 | * Wakeup all potentially sleeping on normal (non-reserved) tags | ||
65 | */ | ||
66 | static void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags) | ||
24 | { | 67 | { |
25 | int tag = blk_mq_get_tag(tags, __GFP_WAIT, false); | 68 | struct blk_mq_bitmap_tags *bt; |
26 | blk_mq_put_tag(tags, tag); | 69 | int i, wake_index; |
70 | |||
71 | bt = &tags->bitmap_tags; | ||
72 | wake_index = bt->wake_index; | ||
73 | for (i = 0; i < BT_WAIT_QUEUES; i++) { | ||
74 | struct bt_wait_state *bs = &bt->bs[wake_index]; | ||
75 | |||
76 | if (waitqueue_active(&bs->wait)) | ||
77 | wake_up(&bs->wait); | ||
78 | |||
79 | bt_index_inc(&wake_index); | ||
80 | } | ||
27 | } | 81 | } |
28 | 82 | ||
29 | bool blk_mq_has_free_tags(struct blk_mq_tags *tags) | 83 | /* |
84 | * If a previously busy queue goes inactive, potential waiters could now | ||
85 | * be allowed to queue. Wake them up and check. | ||
86 | */ | ||
87 | void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) | ||
88 | { | ||
89 | struct blk_mq_tags *tags = hctx->tags; | ||
90 | |||
91 | if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) | ||
92 | return; | ||
93 | |||
94 | atomic_dec(&tags->active_queues); | ||
95 | |||
96 | blk_mq_tag_wakeup_all(tags); | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * For shared tag users, we track the number of currently active users | ||
101 | * and attempt to provide a fair share of the tag depth for each of them. | ||
102 | */ | ||
103 | static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, | ||
104 | struct blk_mq_bitmap_tags *bt) | ||
105 | { | ||
106 | unsigned int depth, users; | ||
107 | |||
108 | if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
109 | return true; | ||
110 | if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) | ||
111 | return true; | ||
112 | |||
113 | /* | ||
114 | * Don't try dividing an ant | ||
115 | */ | ||
116 | if (bt->depth == 1) | ||
117 | return true; | ||
118 | |||
119 | users = atomic_read(&hctx->tags->active_queues); | ||
120 | if (!users) | ||
121 | return true; | ||
122 | |||
123 | /* | ||
124 | * Allow at least some tags | ||
125 | */ | ||
126 | depth = max((bt->depth + users - 1) / users, 4U); | ||
127 | return atomic_read(&hctx->nr_active) < depth; | ||
128 | } | ||
129 | |||
130 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) | ||
30 | { | 131 | { |
31 | return !tags || | 132 | int tag, org_last_tag, end; |
32 | percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids) != 0; | 133 | |
134 | org_last_tag = last_tag; | ||
135 | end = bm->depth; | ||
136 | do { | ||
137 | restart: | ||
138 | tag = find_next_zero_bit(&bm->word, end, last_tag); | ||
139 | if (unlikely(tag >= end)) { | ||
140 | /* | ||
141 | * We started with an offset, start from 0 to | ||
142 | * exhaust the map. | ||
143 | */ | ||
144 | if (org_last_tag && last_tag) { | ||
145 | end = last_tag; | ||
146 | last_tag = 0; | ||
147 | goto restart; | ||
148 | } | ||
149 | return -1; | ||
150 | } | ||
151 | last_tag = tag + 1; | ||
152 | } while (test_and_set_bit_lock(tag, &bm->word)); | ||
153 | |||
154 | return tag; | ||
33 | } | 155 | } |
34 | 156 | ||
35 | static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp) | 157 | /* |
158 | * Straight forward bitmap tag implementation, where each bit is a tag | ||
159 | * (cleared == free, and set == busy). The small twist is using per-cpu | ||
160 | * last_tag caches, which blk-mq stores in the blk_mq_ctx software queue | ||
161 | * contexts. This enables us to drastically limit the space searched, | ||
162 | * without dirtying an extra shared cacheline like we would if we stored | ||
163 | * the cache value inside the shared blk_mq_bitmap_tags structure. On top | ||
164 | * of that, each word of tags is in a separate cacheline. This means that | ||
165 | * multiple users will tend to stick to different cachelines, at least | ||
166 | * until the map is exhausted. | ||
167 | */ | ||
168 | static int __bt_get(struct blk_mq_hw_ctx *hctx, struct blk_mq_bitmap_tags *bt, | ||
169 | unsigned int *tag_cache) | ||
36 | { | 170 | { |
171 | unsigned int last_tag, org_last_tag; | ||
172 | int index, i, tag; | ||
173 | |||
174 | if (!hctx_may_queue(hctx, bt)) | ||
175 | return -1; | ||
176 | |||
177 | last_tag = org_last_tag = *tag_cache; | ||
178 | index = TAG_TO_INDEX(bt, last_tag); | ||
179 | |||
180 | for (i = 0; i < bt->map_nr; i++) { | ||
181 | tag = __bt_get_word(&bt->map[index], TAG_TO_BIT(bt, last_tag)); | ||
182 | if (tag != -1) { | ||
183 | tag += (index << bt->bits_per_word); | ||
184 | goto done; | ||
185 | } | ||
186 | |||
187 | last_tag = 0; | ||
188 | if (++index >= bt->map_nr) | ||
189 | index = 0; | ||
190 | } | ||
191 | |||
192 | *tag_cache = 0; | ||
193 | return -1; | ||
194 | |||
195 | /* | ||
196 | * Only update the cache from the allocation path, if we ended | ||
197 | * up using the specific cached tag. | ||
198 | */ | ||
199 | done: | ||
200 | if (tag == org_last_tag) { | ||
201 | last_tag = tag + 1; | ||
202 | if (last_tag >= bt->depth - 1) | ||
203 | last_tag = 0; | ||
204 | |||
205 | *tag_cache = last_tag; | ||
206 | } | ||
207 | |||
208 | return tag; | ||
209 | } | ||
210 | |||
211 | static struct bt_wait_state *bt_wait_ptr(struct blk_mq_bitmap_tags *bt, | ||
212 | struct blk_mq_hw_ctx *hctx) | ||
213 | { | ||
214 | struct bt_wait_state *bs; | ||
215 | |||
216 | if (!hctx) | ||
217 | return &bt->bs[0]; | ||
218 | |||
219 | bs = &bt->bs[hctx->wait_index]; | ||
220 | bt_index_inc(&hctx->wait_index); | ||
221 | return bs; | ||
222 | } | ||
223 | |||
224 | static int bt_get(struct blk_mq_bitmap_tags *bt, struct blk_mq_hw_ctx *hctx, | ||
225 | unsigned int *last_tag, gfp_t gfp) | ||
226 | { | ||
227 | struct bt_wait_state *bs; | ||
228 | DEFINE_WAIT(wait); | ||
37 | int tag; | 229 | int tag; |
38 | 230 | ||
39 | tag = percpu_ida_alloc(&tags->free_tags, (gfp & __GFP_WAIT) ? | 231 | tag = __bt_get(hctx, bt, last_tag); |
40 | TASK_UNINTERRUPTIBLE : TASK_RUNNING); | 232 | if (tag != -1) |
41 | if (tag < 0) | 233 | return tag; |
42 | return BLK_MQ_TAG_FAIL; | 234 | |
43 | return tag + tags->nr_reserved_tags; | 235 | if (!(gfp & __GFP_WAIT)) |
236 | return -1; | ||
237 | |||
238 | bs = bt_wait_ptr(bt, hctx); | ||
239 | do { | ||
240 | bool was_empty; | ||
241 | |||
242 | was_empty = list_empty(&wait.task_list); | ||
243 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
244 | |||
245 | tag = __bt_get(hctx, bt, last_tag); | ||
246 | if (tag != -1) | ||
247 | break; | ||
248 | |||
249 | if (was_empty) | ||
250 | atomic_set(&bs->wait_cnt, bt->wake_cnt); | ||
251 | |||
252 | io_schedule(); | ||
253 | } while (1); | ||
254 | |||
255 | finish_wait(&bs->wait, &wait); | ||
256 | return tag; | ||
257 | } | ||
258 | |||
259 | static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, | ||
260 | struct blk_mq_hw_ctx *hctx, | ||
261 | unsigned int *last_tag, gfp_t gfp) | ||
262 | { | ||
263 | int tag; | ||
264 | |||
265 | tag = bt_get(&tags->bitmap_tags, hctx, last_tag, gfp); | ||
266 | if (tag >= 0) | ||
267 | return tag + tags->nr_reserved_tags; | ||
268 | |||
269 | return BLK_MQ_TAG_FAIL; | ||
44 | } | 270 | } |
45 | 271 | ||
46 | static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags, | 272 | static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags, |
47 | gfp_t gfp) | 273 | gfp_t gfp) |
48 | { | 274 | { |
49 | int tag; | 275 | int tag, zero = 0; |
50 | 276 | ||
51 | if (unlikely(!tags->nr_reserved_tags)) { | 277 | if (unlikely(!tags->nr_reserved_tags)) { |
52 | WARN_ON_ONCE(1); | 278 | WARN_ON_ONCE(1); |
53 | return BLK_MQ_TAG_FAIL; | 279 | return BLK_MQ_TAG_FAIL; |
54 | } | 280 | } |
55 | 281 | ||
56 | tag = percpu_ida_alloc(&tags->reserved_tags, (gfp & __GFP_WAIT) ? | 282 | tag = bt_get(&tags->breserved_tags, NULL, &zero, gfp); |
57 | TASK_UNINTERRUPTIBLE : TASK_RUNNING); | ||
58 | if (tag < 0) | 283 | if (tag < 0) |
59 | return BLK_MQ_TAG_FAIL; | 284 | return BLK_MQ_TAG_FAIL; |
285 | |||
60 | return tag; | 286 | return tag; |
61 | } | 287 | } |
62 | 288 | ||
63 | unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved) | 289 | unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, |
290 | gfp_t gfp, bool reserved) | ||
64 | { | 291 | { |
65 | if (!reserved) | 292 | if (!reserved) |
66 | return __blk_mq_get_tag(tags, gfp); | 293 | return __blk_mq_get_tag(hctx->tags, hctx, last_tag, gfp); |
67 | 294 | ||
68 | return __blk_mq_get_reserved_tag(tags, gfp); | 295 | return __blk_mq_get_reserved_tag(hctx->tags, gfp); |
296 | } | ||
297 | |||
298 | static struct bt_wait_state *bt_wake_ptr(struct blk_mq_bitmap_tags *bt) | ||
299 | { | ||
300 | int i, wake_index; | ||
301 | |||
302 | wake_index = bt->wake_index; | ||
303 | for (i = 0; i < BT_WAIT_QUEUES; i++) { | ||
304 | struct bt_wait_state *bs = &bt->bs[wake_index]; | ||
305 | |||
306 | if (waitqueue_active(&bs->wait)) { | ||
307 | if (wake_index != bt->wake_index) | ||
308 | bt->wake_index = wake_index; | ||
309 | |||
310 | return bs; | ||
311 | } | ||
312 | |||
313 | bt_index_inc(&wake_index); | ||
314 | } | ||
315 | |||
316 | return NULL; | ||
317 | } | ||
318 | |||
319 | static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) | ||
320 | { | ||
321 | const int index = TAG_TO_INDEX(bt, tag); | ||
322 | struct bt_wait_state *bs; | ||
323 | |||
324 | /* | ||
325 | * The unlock memory barrier need to order access to req in free | ||
326 | * path and clearing tag bit | ||
327 | */ | ||
328 | clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); | ||
329 | |||
330 | bs = bt_wake_ptr(bt); | ||
331 | if (bs && atomic_dec_and_test(&bs->wait_cnt)) { | ||
332 | atomic_set(&bs->wait_cnt, bt->wake_cnt); | ||
333 | bt_index_inc(&bt->wake_index); | ||
334 | wake_up(&bs->wait); | ||
335 | } | ||
69 | } | 336 | } |
70 | 337 | ||
71 | static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag) | 338 | static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag) |
72 | { | 339 | { |
73 | BUG_ON(tag >= tags->nr_tags); | 340 | BUG_ON(tag >= tags->nr_tags); |
74 | 341 | ||
75 | percpu_ida_free(&tags->free_tags, tag - tags->nr_reserved_tags); | 342 | bt_clear_tag(&tags->bitmap_tags, tag); |
76 | } | 343 | } |
77 | 344 | ||
78 | static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags, | 345 | static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags, |
@@ -80,22 +347,43 @@ static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags, | |||
80 | { | 347 | { |
81 | BUG_ON(tag >= tags->nr_reserved_tags); | 348 | BUG_ON(tag >= tags->nr_reserved_tags); |
82 | 349 | ||
83 | percpu_ida_free(&tags->reserved_tags, tag); | 350 | bt_clear_tag(&tags->breserved_tags, tag); |
84 | } | 351 | } |
85 | 352 | ||
86 | void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag) | 353 | void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, |
354 | unsigned int *last_tag) | ||
87 | { | 355 | { |
88 | if (tag >= tags->nr_reserved_tags) | 356 | struct blk_mq_tags *tags = hctx->tags; |
89 | __blk_mq_put_tag(tags, tag); | 357 | |
90 | else | 358 | if (tag >= tags->nr_reserved_tags) { |
359 | const int real_tag = tag - tags->nr_reserved_tags; | ||
360 | |||
361 | __blk_mq_put_tag(tags, real_tag); | ||
362 | *last_tag = real_tag; | ||
363 | } else | ||
91 | __blk_mq_put_reserved_tag(tags, tag); | 364 | __blk_mq_put_reserved_tag(tags, tag); |
92 | } | 365 | } |
93 | 366 | ||
94 | static int __blk_mq_tag_iter(unsigned id, void *data) | 367 | static void bt_for_each_free(struct blk_mq_bitmap_tags *bt, |
368 | unsigned long *free_map, unsigned int off) | ||
95 | { | 369 | { |
96 | unsigned long *tag_map = data; | 370 | int i; |
97 | __set_bit(id, tag_map); | 371 | |
98 | return 0; | 372 | for (i = 0; i < bt->map_nr; i++) { |
373 | struct blk_align_bitmap *bm = &bt->map[i]; | ||
374 | int bit = 0; | ||
375 | |||
376 | do { | ||
377 | bit = find_next_zero_bit(&bm->word, bm->depth, bit); | ||
378 | if (bit >= bm->depth) | ||
379 | break; | ||
380 | |||
381 | __set_bit(bit + off, free_map); | ||
382 | bit++; | ||
383 | } while (1); | ||
384 | |||
385 | off += (1 << bt->bits_per_word); | ||
386 | } | ||
99 | } | 387 | } |
100 | 388 | ||
101 | void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, | 389 | void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, |
@@ -109,21 +397,128 @@ void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, | |||
109 | if (!tag_map) | 397 | if (!tag_map) |
110 | return; | 398 | return; |
111 | 399 | ||
112 | percpu_ida_for_each_free(&tags->free_tags, __blk_mq_tag_iter, tag_map); | 400 | bt_for_each_free(&tags->bitmap_tags, tag_map, tags->nr_reserved_tags); |
113 | if (tags->nr_reserved_tags) | 401 | if (tags->nr_reserved_tags) |
114 | percpu_ida_for_each_free(&tags->reserved_tags, __blk_mq_tag_iter, | 402 | bt_for_each_free(&tags->breserved_tags, tag_map, 0); |
115 | tag_map); | ||
116 | 403 | ||
117 | fn(data, tag_map); | 404 | fn(data, tag_map); |
118 | kfree(tag_map); | 405 | kfree(tag_map); |
119 | } | 406 | } |
407 | EXPORT_SYMBOL(blk_mq_tag_busy_iter); | ||
408 | |||
409 | static unsigned int bt_unused_tags(struct blk_mq_bitmap_tags *bt) | ||
410 | { | ||
411 | unsigned int i, used; | ||
412 | |||
413 | for (i = 0, used = 0; i < bt->map_nr; i++) { | ||
414 | struct blk_align_bitmap *bm = &bt->map[i]; | ||
415 | |||
416 | used += bitmap_weight(&bm->word, bm->depth); | ||
417 | } | ||
418 | |||
419 | return bt->depth - used; | ||
420 | } | ||
421 | |||
422 | static void bt_update_count(struct blk_mq_bitmap_tags *bt, | ||
423 | unsigned int depth) | ||
424 | { | ||
425 | unsigned int tags_per_word = 1U << bt->bits_per_word; | ||
426 | unsigned int map_depth = depth; | ||
427 | |||
428 | if (depth) { | ||
429 | int i; | ||
430 | |||
431 | for (i = 0; i < bt->map_nr; i++) { | ||
432 | bt->map[i].depth = min(map_depth, tags_per_word); | ||
433 | map_depth -= bt->map[i].depth; | ||
434 | } | ||
435 | } | ||
436 | |||
437 | bt->wake_cnt = BT_WAIT_BATCH; | ||
438 | if (bt->wake_cnt > depth / 4) | ||
439 | bt->wake_cnt = max(1U, depth / 4); | ||
440 | |||
441 | bt->depth = depth; | ||
442 | } | ||
443 | |||
444 | static int bt_alloc(struct blk_mq_bitmap_tags *bt, unsigned int depth, | ||
445 | int node, bool reserved) | ||
446 | { | ||
447 | int i; | ||
448 | |||
449 | bt->bits_per_word = ilog2(BITS_PER_LONG); | ||
450 | |||
451 | /* | ||
452 | * Depth can be zero for reserved tags, that's not a failure | ||
453 | * condition. | ||
454 | */ | ||
455 | if (depth) { | ||
456 | unsigned int nr, tags_per_word; | ||
457 | |||
458 | tags_per_word = (1 << bt->bits_per_word); | ||
459 | |||
460 | /* | ||
461 | * If the tag space is small, shrink the number of tags | ||
462 | * per word so we spread over a few cachelines, at least. | ||
463 | * If less than 4 tags, just forget about it, it's not | ||
464 | * going to work optimally anyway. | ||
465 | */ | ||
466 | if (depth >= 4) { | ||
467 | while (tags_per_word * 4 > depth) { | ||
468 | bt->bits_per_word--; | ||
469 | tags_per_word = (1 << bt->bits_per_word); | ||
470 | } | ||
471 | } | ||
472 | |||
473 | nr = ALIGN(depth, tags_per_word) / tags_per_word; | ||
474 | bt->map = kzalloc_node(nr * sizeof(struct blk_align_bitmap), | ||
475 | GFP_KERNEL, node); | ||
476 | if (!bt->map) | ||
477 | return -ENOMEM; | ||
478 | |||
479 | bt->map_nr = nr; | ||
480 | } | ||
481 | |||
482 | bt->bs = kzalloc(BT_WAIT_QUEUES * sizeof(*bt->bs), GFP_KERNEL); | ||
483 | if (!bt->bs) { | ||
484 | kfree(bt->map); | ||
485 | return -ENOMEM; | ||
486 | } | ||
487 | |||
488 | for (i = 0; i < BT_WAIT_QUEUES; i++) | ||
489 | init_waitqueue_head(&bt->bs[i].wait); | ||
490 | |||
491 | bt_update_count(bt, depth); | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | static void bt_free(struct blk_mq_bitmap_tags *bt) | ||
496 | { | ||
497 | kfree(bt->map); | ||
498 | kfree(bt->bs); | ||
499 | } | ||
500 | |||
501 | static struct blk_mq_tags *blk_mq_init_bitmap_tags(struct blk_mq_tags *tags, | ||
502 | int node) | ||
503 | { | ||
504 | unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; | ||
505 | |||
506 | if (bt_alloc(&tags->bitmap_tags, depth, node, false)) | ||
507 | goto enomem; | ||
508 | if (bt_alloc(&tags->breserved_tags, tags->nr_reserved_tags, node, true)) | ||
509 | goto enomem; | ||
510 | |||
511 | return tags; | ||
512 | enomem: | ||
513 | bt_free(&tags->bitmap_tags); | ||
514 | kfree(tags); | ||
515 | return NULL; | ||
516 | } | ||
120 | 517 | ||
121 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | 518 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, |
122 | unsigned int reserved_tags, int node) | 519 | unsigned int reserved_tags, int node) |
123 | { | 520 | { |
124 | unsigned int nr_tags, nr_cache; | ||
125 | struct blk_mq_tags *tags; | 521 | struct blk_mq_tags *tags; |
126 | int ret; | ||
127 | 522 | ||
128 | if (total_tags > BLK_MQ_TAG_MAX) { | 523 | if (total_tags > BLK_MQ_TAG_MAX) { |
129 | pr_err("blk-mq: tag depth too large\n"); | 524 | pr_err("blk-mq: tag depth too large\n"); |
@@ -134,73 +529,59 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | |||
134 | if (!tags) | 529 | if (!tags) |
135 | return NULL; | 530 | return NULL; |
136 | 531 | ||
137 | nr_tags = total_tags - reserved_tags; | ||
138 | nr_cache = nr_tags / num_possible_cpus(); | ||
139 | |||
140 | if (nr_cache < BLK_MQ_TAG_CACHE_MIN) | ||
141 | nr_cache = BLK_MQ_TAG_CACHE_MIN; | ||
142 | else if (nr_cache > BLK_MQ_TAG_CACHE_MAX) | ||
143 | nr_cache = BLK_MQ_TAG_CACHE_MAX; | ||
144 | |||
145 | tags->nr_tags = total_tags; | 532 | tags->nr_tags = total_tags; |
146 | tags->nr_reserved_tags = reserved_tags; | 533 | tags->nr_reserved_tags = reserved_tags; |
147 | tags->nr_max_cache = nr_cache; | ||
148 | tags->nr_batch_move = max(1u, nr_cache / 2); | ||
149 | 534 | ||
150 | ret = __percpu_ida_init(&tags->free_tags, tags->nr_tags - | 535 | return blk_mq_init_bitmap_tags(tags, node); |
151 | tags->nr_reserved_tags, | 536 | } |
152 | tags->nr_max_cache, | ||
153 | tags->nr_batch_move); | ||
154 | if (ret) | ||
155 | goto err_free_tags; | ||
156 | 537 | ||
157 | if (reserved_tags) { | 538 | void blk_mq_free_tags(struct blk_mq_tags *tags) |
158 | /* | 539 | { |
159 | * With max_cahe and batch set to 1, the allocator fallbacks to | 540 | bt_free(&tags->bitmap_tags); |
160 | * no cached. It's fine reserved tags allocation is slow. | 541 | bt_free(&tags->breserved_tags); |
161 | */ | 542 | kfree(tags); |
162 | ret = __percpu_ida_init(&tags->reserved_tags, reserved_tags, | 543 | } |
163 | 1, 1); | ||
164 | if (ret) | ||
165 | goto err_reserved_tags; | ||
166 | } | ||
167 | 544 | ||
168 | return tags; | 545 | void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *tag) |
546 | { | ||
547 | unsigned int depth = tags->nr_tags - tags->nr_reserved_tags; | ||
169 | 548 | ||
170 | err_reserved_tags: | 549 | *tag = prandom_u32() % depth; |
171 | percpu_ida_destroy(&tags->free_tags); | ||
172 | err_free_tags: | ||
173 | kfree(tags); | ||
174 | return NULL; | ||
175 | } | 550 | } |
176 | 551 | ||
177 | void blk_mq_free_tags(struct blk_mq_tags *tags) | 552 | int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth) |
178 | { | 553 | { |
179 | percpu_ida_destroy(&tags->free_tags); | 554 | tdepth -= tags->nr_reserved_tags; |
180 | percpu_ida_destroy(&tags->reserved_tags); | 555 | if (tdepth > tags->nr_tags) |
181 | kfree(tags); | 556 | return -EINVAL; |
557 | |||
558 | /* | ||
559 | * Don't need (or can't) update reserved tags here, they remain | ||
560 | * static and should never need resizing. | ||
561 | */ | ||
562 | bt_update_count(&tags->bitmap_tags, tdepth); | ||
563 | blk_mq_tag_wakeup_all(tags); | ||
564 | return 0; | ||
182 | } | 565 | } |
183 | 566 | ||
184 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) | 567 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) |
185 | { | 568 | { |
186 | char *orig_page = page; | 569 | char *orig_page = page; |
187 | unsigned int cpu; | 570 | unsigned int free, res; |
188 | 571 | ||
189 | if (!tags) | 572 | if (!tags) |
190 | return 0; | 573 | return 0; |
191 | 574 | ||
192 | page += sprintf(page, "nr_tags=%u, reserved_tags=%u, batch_move=%u," | 575 | page += sprintf(page, "nr_tags=%u, reserved_tags=%u, " |
193 | " max_cache=%u\n", tags->nr_tags, tags->nr_reserved_tags, | 576 | "bits_per_word=%u\n", |
194 | tags->nr_batch_move, tags->nr_max_cache); | 577 | tags->nr_tags, tags->nr_reserved_tags, |
578 | tags->bitmap_tags.bits_per_word); | ||
195 | 579 | ||
196 | page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", | 580 | free = bt_unused_tags(&tags->bitmap_tags); |
197 | percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids), | 581 | res = bt_unused_tags(&tags->breserved_tags); |
198 | percpu_ida_free_tags(&tags->reserved_tags, nr_cpu_ids)); | ||
199 | 582 | ||
200 | for_each_possible_cpu(cpu) { | 583 | page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", free, res); |
201 | page += sprintf(page, " cpu%02u: nr_free=%u\n", cpu, | 584 | page += sprintf(page, "active_queues=%u\n", atomic_read(&tags->active_queues)); |
202 | percpu_ida_free_tags(&tags->free_tags, cpu)); | ||
203 | } | ||
204 | 585 | ||
205 | return page - orig_page; | 586 | return page - orig_page; |
206 | } | 587 | } |
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h index 947ba2c6148e..c959de58d2a5 100644 --- a/block/blk-mq-tag.h +++ b/block/blk-mq-tag.h | |||
@@ -1,17 +1,59 @@ | |||
1 | #ifndef INT_BLK_MQ_TAG_H | 1 | #ifndef INT_BLK_MQ_TAG_H |
2 | #define INT_BLK_MQ_TAG_H | 2 | #define INT_BLK_MQ_TAG_H |
3 | 3 | ||
4 | struct blk_mq_tags; | 4 | #include "blk-mq.h" |
5 | |||
6 | enum { | ||
7 | BT_WAIT_QUEUES = 8, | ||
8 | BT_WAIT_BATCH = 8, | ||
9 | }; | ||
10 | |||
11 | struct bt_wait_state { | ||
12 | atomic_t wait_cnt; | ||
13 | wait_queue_head_t wait; | ||
14 | } ____cacheline_aligned_in_smp; | ||
15 | |||
16 | #define TAG_TO_INDEX(bt, tag) ((tag) >> (bt)->bits_per_word) | ||
17 | #define TAG_TO_BIT(bt, tag) ((tag) & ((1 << (bt)->bits_per_word) - 1)) | ||
18 | |||
19 | struct blk_mq_bitmap_tags { | ||
20 | unsigned int depth; | ||
21 | unsigned int wake_cnt; | ||
22 | unsigned int bits_per_word; | ||
23 | |||
24 | unsigned int map_nr; | ||
25 | struct blk_align_bitmap *map; | ||
26 | |||
27 | unsigned int wake_index; | ||
28 | struct bt_wait_state *bs; | ||
29 | }; | ||
30 | |||
31 | /* | ||
32 | * Tag address space map. | ||
33 | */ | ||
34 | struct blk_mq_tags { | ||
35 | unsigned int nr_tags; | ||
36 | unsigned int nr_reserved_tags; | ||
37 | |||
38 | atomic_t active_queues; | ||
39 | |||
40 | struct blk_mq_bitmap_tags bitmap_tags; | ||
41 | struct blk_mq_bitmap_tags breserved_tags; | ||
42 | |||
43 | struct request **rqs; | ||
44 | struct list_head page_list; | ||
45 | }; | ||
46 | |||
5 | 47 | ||
6 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); | 48 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); |
7 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); | 49 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); |
8 | 50 | ||
9 | extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved); | 51 | extern unsigned int blk_mq_get_tag(struct blk_mq_hw_ctx *hctx, unsigned int *last_tag, gfp_t gfp, bool reserved); |
10 | extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags); | 52 | extern void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, unsigned int *last_tag); |
11 | extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag); | ||
12 | extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); | ||
13 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); | 53 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); |
14 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); | 54 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); |
55 | extern void blk_mq_tag_init_last_tag(struct blk_mq_tags *tags, unsigned int *last_tag); | ||
56 | extern int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int depth); | ||
15 | 57 | ||
16 | enum { | 58 | enum { |
17 | BLK_MQ_TAG_CACHE_MIN = 1, | 59 | BLK_MQ_TAG_CACHE_MIN = 1, |
@@ -24,4 +66,23 @@ enum { | |||
24 | BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1, | 66 | BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1, |
25 | }; | 67 | }; |
26 | 68 | ||
69 | extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *); | ||
70 | extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *); | ||
71 | |||
72 | static inline bool blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) | ||
73 | { | ||
74 | if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
75 | return false; | ||
76 | |||
77 | return __blk_mq_tag_busy(hctx); | ||
78 | } | ||
79 | |||
80 | static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx) | ||
81 | { | ||
82 | if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) | ||
83 | return; | ||
84 | |||
85 | __blk_mq_tag_idle(hctx); | ||
86 | } | ||
87 | |||
27 | #endif | 88 | #endif |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 1d2a9bdbee57..f27fe44230c2 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1,3 +1,9 @@ | |||
1 | /* | ||
2 | * Block multiqueue core code | ||
3 | * | ||
4 | * Copyright (C) 2013-2014 Jens Axboe | ||
5 | * Copyright (C) 2013-2014 Christoph Hellwig | ||
6 | */ | ||
1 | #include <linux/kernel.h> | 7 | #include <linux/kernel.h> |
2 | #include <linux/module.h> | 8 | #include <linux/module.h> |
3 | #include <linux/backing-dev.h> | 9 | #include <linux/backing-dev.h> |
@@ -56,38 +62,40 @@ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) | |||
56 | { | 62 | { |
57 | unsigned int i; | 63 | unsigned int i; |
58 | 64 | ||
59 | for (i = 0; i < hctx->nr_ctx_map; i++) | 65 | for (i = 0; i < hctx->ctx_map.map_size; i++) |
60 | if (hctx->ctx_map[i]) | 66 | if (hctx->ctx_map.map[i].word) |
61 | return true; | 67 | return true; |
62 | 68 | ||
63 | return false; | 69 | return false; |
64 | } | 70 | } |
65 | 71 | ||
72 | static inline struct blk_align_bitmap *get_bm(struct blk_mq_hw_ctx *hctx, | ||
73 | struct blk_mq_ctx *ctx) | ||
74 | { | ||
75 | return &hctx->ctx_map.map[ctx->index_hw / hctx->ctx_map.bits_per_word]; | ||
76 | } | ||
77 | |||
78 | #define CTX_TO_BIT(hctx, ctx) \ | ||
79 | ((ctx)->index_hw & ((hctx)->ctx_map.bits_per_word - 1)) | ||
80 | |||
66 | /* | 81 | /* |
67 | * Mark this ctx as having pending work in this hardware queue | 82 | * Mark this ctx as having pending work in this hardware queue |
68 | */ | 83 | */ |
69 | static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, | 84 | static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, |
70 | struct blk_mq_ctx *ctx) | 85 | struct blk_mq_ctx *ctx) |
71 | { | 86 | { |
72 | if (!test_bit(ctx->index_hw, hctx->ctx_map)) | 87 | struct blk_align_bitmap *bm = get_bm(hctx, ctx); |
73 | set_bit(ctx->index_hw, hctx->ctx_map); | 88 | |
89 | if (!test_bit(CTX_TO_BIT(hctx, ctx), &bm->word)) | ||
90 | set_bit(CTX_TO_BIT(hctx, ctx), &bm->word); | ||
74 | } | 91 | } |
75 | 92 | ||
76 | static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, | 93 | static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, |
77 | gfp_t gfp, bool reserved) | 94 | struct blk_mq_ctx *ctx) |
78 | { | 95 | { |
79 | struct request *rq; | 96 | struct blk_align_bitmap *bm = get_bm(hctx, ctx); |
80 | unsigned int tag; | ||
81 | 97 | ||
82 | tag = blk_mq_get_tag(hctx->tags, gfp, reserved); | 98 | clear_bit(CTX_TO_BIT(hctx, ctx), &bm->word); |
83 | if (tag != BLK_MQ_TAG_FAIL) { | ||
84 | rq = hctx->rqs[tag]; | ||
85 | rq->tag = tag; | ||
86 | |||
87 | return rq; | ||
88 | } | ||
89 | |||
90 | return NULL; | ||
91 | } | 99 | } |
92 | 100 | ||
93 | static int blk_mq_queue_enter(struct request_queue *q) | 101 | static int blk_mq_queue_enter(struct request_queue *q) |
@@ -186,78 +194,109 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, | |||
186 | if (blk_queue_io_stat(q)) | 194 | if (blk_queue_io_stat(q)) |
187 | rw_flags |= REQ_IO_STAT; | 195 | rw_flags |= REQ_IO_STAT; |
188 | 196 | ||
197 | INIT_LIST_HEAD(&rq->queuelist); | ||
198 | /* csd/requeue_work/fifo_time is initialized before use */ | ||
199 | rq->q = q; | ||
189 | rq->mq_ctx = ctx; | 200 | rq->mq_ctx = ctx; |
190 | rq->cmd_flags = rw_flags; | 201 | rq->cmd_flags |= rw_flags; |
202 | rq->cmd_type = 0; | ||
203 | /* do not touch atomic flags, it needs atomic ops against the timer */ | ||
204 | rq->cpu = -1; | ||
205 | rq->__data_len = 0; | ||
206 | rq->__sector = (sector_t) -1; | ||
207 | rq->bio = NULL; | ||
208 | rq->biotail = NULL; | ||
209 | INIT_HLIST_NODE(&rq->hash); | ||
210 | RB_CLEAR_NODE(&rq->rb_node); | ||
211 | memset(&rq->flush, 0, max(sizeof(rq->flush), sizeof(rq->elv))); | ||
212 | rq->rq_disk = NULL; | ||
213 | rq->part = NULL; | ||
191 | rq->start_time = jiffies; | 214 | rq->start_time = jiffies; |
215 | #ifdef CONFIG_BLK_CGROUP | ||
216 | rq->rl = NULL; | ||
192 | set_start_time_ns(rq); | 217 | set_start_time_ns(rq); |
218 | rq->io_start_time_ns = 0; | ||
219 | #endif | ||
220 | rq->nr_phys_segments = 0; | ||
221 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | ||
222 | rq->nr_integrity_segments = 0; | ||
223 | #endif | ||
224 | rq->ioprio = 0; | ||
225 | rq->special = NULL; | ||
226 | /* tag was already set */ | ||
227 | rq->errors = 0; | ||
228 | memset(rq->__cmd, 0, sizeof(rq->__cmd)); | ||
229 | rq->cmd = rq->__cmd; | ||
230 | rq->cmd_len = BLK_MAX_CDB; | ||
231 | |||
232 | rq->extra_len = 0; | ||
233 | rq->sense_len = 0; | ||
234 | rq->resid_len = 0; | ||
235 | rq->sense = NULL; | ||
236 | |||
237 | rq->deadline = 0; | ||
238 | INIT_LIST_HEAD(&rq->timeout_list); | ||
239 | rq->timeout = 0; | ||
240 | rq->retries = 0; | ||
241 | rq->end_io = NULL; | ||
242 | rq->end_io_data = NULL; | ||
243 | rq->next_rq = NULL; | ||
244 | |||
193 | ctx->rq_dispatched[rw_is_sync(rw_flags)]++; | 245 | ctx->rq_dispatched[rw_is_sync(rw_flags)]++; |
194 | } | 246 | } |
195 | 247 | ||
196 | static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, | 248 | static struct request * |
197 | int rw, gfp_t gfp, | 249 | __blk_mq_alloc_request(struct request_queue *q, struct blk_mq_hw_ctx *hctx, |
198 | bool reserved) | 250 | struct blk_mq_ctx *ctx, int rw, gfp_t gfp, bool reserved) |
199 | { | 251 | { |
200 | struct request *rq; | 252 | struct request *rq; |
253 | unsigned int tag; | ||
201 | 254 | ||
202 | do { | 255 | tag = blk_mq_get_tag(hctx, &ctx->last_tag, gfp, reserved); |
203 | struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); | 256 | if (tag != BLK_MQ_TAG_FAIL) { |
204 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); | 257 | rq = hctx->tags->rqs[tag]; |
205 | 258 | ||
206 | rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved); | 259 | rq->cmd_flags = 0; |
207 | if (rq) { | 260 | if (blk_mq_tag_busy(hctx)) { |
208 | blk_mq_rq_ctx_init(q, ctx, rq, rw); | 261 | rq->cmd_flags = REQ_MQ_INFLIGHT; |
209 | break; | 262 | atomic_inc(&hctx->nr_active); |
210 | } | 263 | } |
211 | 264 | ||
212 | blk_mq_put_ctx(ctx); | 265 | rq->tag = tag; |
213 | if (!(gfp & __GFP_WAIT)) | 266 | blk_mq_rq_ctx_init(q, ctx, rq, rw); |
214 | break; | 267 | return rq; |
215 | 268 | } | |
216 | __blk_mq_run_hw_queue(hctx); | ||
217 | blk_mq_wait_for_tags(hctx->tags); | ||
218 | } while (1); | ||
219 | 269 | ||
220 | return rq; | 270 | return NULL; |
221 | } | 271 | } |
222 | 272 | ||
223 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp) | 273 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, |
274 | bool reserved) | ||
224 | { | 275 | { |
276 | struct blk_mq_ctx *ctx; | ||
277 | struct blk_mq_hw_ctx *hctx; | ||
225 | struct request *rq; | 278 | struct request *rq; |
226 | 279 | ||
227 | if (blk_mq_queue_enter(q)) | 280 | if (blk_mq_queue_enter(q)) |
228 | return NULL; | 281 | return NULL; |
229 | 282 | ||
230 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, false); | 283 | ctx = blk_mq_get_ctx(q); |
231 | if (rq) | 284 | hctx = q->mq_ops->map_queue(q, ctx->cpu); |
232 | blk_mq_put_ctx(rq->mq_ctx); | ||
233 | return rq; | ||
234 | } | ||
235 | |||
236 | struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, | ||
237 | gfp_t gfp) | ||
238 | { | ||
239 | struct request *rq; | ||
240 | 285 | ||
241 | if (blk_mq_queue_enter(q)) | 286 | rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp & ~__GFP_WAIT, |
242 | return NULL; | 287 | reserved); |
288 | if (!rq && (gfp & __GFP_WAIT)) { | ||
289 | __blk_mq_run_hw_queue(hctx); | ||
290 | blk_mq_put_ctx(ctx); | ||
243 | 291 | ||
244 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, true); | 292 | ctx = blk_mq_get_ctx(q); |
245 | if (rq) | 293 | hctx = q->mq_ops->map_queue(q, ctx->cpu); |
246 | blk_mq_put_ctx(rq->mq_ctx); | 294 | rq = __blk_mq_alloc_request(q, hctx, ctx, rw, gfp, reserved); |
295 | } | ||
296 | blk_mq_put_ctx(ctx); | ||
247 | return rq; | 297 | return rq; |
248 | } | 298 | } |
249 | EXPORT_SYMBOL(blk_mq_alloc_reserved_request); | 299 | EXPORT_SYMBOL(blk_mq_alloc_request); |
250 | |||
251 | /* | ||
252 | * Re-init and set pdu, if we have it | ||
253 | */ | ||
254 | void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) | ||
255 | { | ||
256 | blk_rq_init(hctx->queue, rq); | ||
257 | |||
258 | if (hctx->cmd_size) | ||
259 | rq->special = blk_mq_rq_to_pdu(rq); | ||
260 | } | ||
261 | 300 | ||
262 | static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | 301 | static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, |
263 | struct blk_mq_ctx *ctx, struct request *rq) | 302 | struct blk_mq_ctx *ctx, struct request *rq) |
@@ -265,9 +304,11 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | |||
265 | const int tag = rq->tag; | 304 | const int tag = rq->tag; |
266 | struct request_queue *q = rq->q; | 305 | struct request_queue *q = rq->q; |
267 | 306 | ||
268 | blk_mq_rq_init(hctx, rq); | 307 | if (rq->cmd_flags & REQ_MQ_INFLIGHT) |
269 | blk_mq_put_tag(hctx->tags, tag); | 308 | atomic_dec(&hctx->nr_active); |
270 | 309 | ||
310 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | ||
311 | blk_mq_put_tag(hctx, tag, &ctx->last_tag); | ||
271 | blk_mq_queue_exit(q); | 312 | blk_mq_queue_exit(q); |
272 | } | 313 | } |
273 | 314 | ||
@@ -283,20 +324,47 @@ void blk_mq_free_request(struct request *rq) | |||
283 | __blk_mq_free_request(hctx, ctx, rq); | 324 | __blk_mq_free_request(hctx, ctx, rq); |
284 | } | 325 | } |
285 | 326 | ||
286 | bool blk_mq_end_io_partial(struct request *rq, int error, unsigned int nr_bytes) | 327 | /* |
328 | * Clone all relevant state from a request that has been put on hold in | ||
329 | * the flush state machine into the preallocated flush request that hangs | ||
330 | * off the request queue. | ||
331 | * | ||
332 | * For a driver the flush request should be invisible, that's why we are | ||
333 | * impersonating the original request here. | ||
334 | */ | ||
335 | void blk_mq_clone_flush_request(struct request *flush_rq, | ||
336 | struct request *orig_rq) | ||
287 | { | 337 | { |
288 | if (blk_update_request(rq, error, blk_rq_bytes(rq))) | 338 | struct blk_mq_hw_ctx *hctx = |
289 | return true; | 339 | orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu); |
290 | 340 | ||
341 | flush_rq->mq_ctx = orig_rq->mq_ctx; | ||
342 | flush_rq->tag = orig_rq->tag; | ||
343 | memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq), | ||
344 | hctx->cmd_size); | ||
345 | } | ||
346 | |||
347 | inline void __blk_mq_end_io(struct request *rq, int error) | ||
348 | { | ||
291 | blk_account_io_done(rq); | 349 | blk_account_io_done(rq); |
292 | 350 | ||
293 | if (rq->end_io) | 351 | if (rq->end_io) { |
294 | rq->end_io(rq, error); | 352 | rq->end_io(rq, error); |
295 | else | 353 | } else { |
354 | if (unlikely(blk_bidi_rq(rq))) | ||
355 | blk_mq_free_request(rq->next_rq); | ||
296 | blk_mq_free_request(rq); | 356 | blk_mq_free_request(rq); |
297 | return false; | 357 | } |
298 | } | 358 | } |
299 | EXPORT_SYMBOL(blk_mq_end_io_partial); | 359 | EXPORT_SYMBOL(__blk_mq_end_io); |
360 | |||
361 | void blk_mq_end_io(struct request *rq, int error) | ||
362 | { | ||
363 | if (blk_update_request(rq, error, blk_rq_bytes(rq))) | ||
364 | BUG(); | ||
365 | __blk_mq_end_io(rq, error); | ||
366 | } | ||
367 | EXPORT_SYMBOL(blk_mq_end_io); | ||
300 | 368 | ||
301 | static void __blk_mq_complete_request_remote(void *data) | 369 | static void __blk_mq_complete_request_remote(void *data) |
302 | { | 370 | { |
@@ -308,15 +376,19 @@ static void __blk_mq_complete_request_remote(void *data) | |||
308 | void __blk_mq_complete_request(struct request *rq) | 376 | void __blk_mq_complete_request(struct request *rq) |
309 | { | 377 | { |
310 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 378 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
379 | bool shared = false; | ||
311 | int cpu; | 380 | int cpu; |
312 | 381 | ||
313 | if (!ctx->ipi_redirect) { | 382 | if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { |
314 | rq->q->softirq_done_fn(rq); | 383 | rq->q->softirq_done_fn(rq); |
315 | return; | 384 | return; |
316 | } | 385 | } |
317 | 386 | ||
318 | cpu = get_cpu(); | 387 | cpu = get_cpu(); |
319 | if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { | 388 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) |
389 | shared = cpus_share_cache(cpu, ctx->cpu); | ||
390 | |||
391 | if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { | ||
320 | rq->csd.func = __blk_mq_complete_request_remote; | 392 | rq->csd.func = __blk_mq_complete_request_remote; |
321 | rq->csd.info = rq; | 393 | rq->csd.info = rq; |
322 | rq->csd.flags = 0; | 394 | rq->csd.flags = 0; |
@@ -337,10 +409,16 @@ void __blk_mq_complete_request(struct request *rq) | |||
337 | **/ | 409 | **/ |
338 | void blk_mq_complete_request(struct request *rq) | 410 | void blk_mq_complete_request(struct request *rq) |
339 | { | 411 | { |
340 | if (unlikely(blk_should_fake_timeout(rq->q))) | 412 | struct request_queue *q = rq->q; |
413 | |||
414 | if (unlikely(blk_should_fake_timeout(q))) | ||
341 | return; | 415 | return; |
342 | if (!blk_mark_rq_complete(rq)) | 416 | if (!blk_mark_rq_complete(rq)) { |
343 | __blk_mq_complete_request(rq); | 417 | if (q->softirq_done_fn) |
418 | __blk_mq_complete_request(rq); | ||
419 | else | ||
420 | blk_mq_end_io(rq, rq->errors); | ||
421 | } | ||
344 | } | 422 | } |
345 | EXPORT_SYMBOL(blk_mq_complete_request); | 423 | EXPORT_SYMBOL(blk_mq_complete_request); |
346 | 424 | ||
@@ -350,13 +428,29 @@ static void blk_mq_start_request(struct request *rq, bool last) | |||
350 | 428 | ||
351 | trace_block_rq_issue(q, rq); | 429 | trace_block_rq_issue(q, rq); |
352 | 430 | ||
431 | rq->resid_len = blk_rq_bytes(rq); | ||
432 | if (unlikely(blk_bidi_rq(rq))) | ||
433 | rq->next_rq->resid_len = blk_rq_bytes(rq->next_rq); | ||
434 | |||
353 | /* | 435 | /* |
354 | * Just mark start time and set the started bit. Due to memory | 436 | * Just mark start time and set the started bit. Due to memory |
355 | * ordering, we know we'll see the correct deadline as long as | 437 | * ordering, we know we'll see the correct deadline as long as |
356 | * REQ_ATOMIC_STARTED is seen. | 438 | * REQ_ATOMIC_STARTED is seen. Use the default queue timeout, |
439 | * unless one has been set in the request. | ||
440 | */ | ||
441 | if (!rq->timeout) | ||
442 | rq->deadline = jiffies + q->rq_timeout; | ||
443 | else | ||
444 | rq->deadline = jiffies + rq->timeout; | ||
445 | |||
446 | /* | ||
447 | * Mark us as started and clear complete. Complete might have been | ||
448 | * set if requeue raced with timeout, which then marked it as | ||
449 | * complete. So be sure to clear complete again when we start | ||
450 | * the request, otherwise we'll ignore the completion event. | ||
357 | */ | 451 | */ |
358 | rq->deadline = jiffies + q->rq_timeout; | ||
359 | set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | 452 | set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); |
453 | clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); | ||
360 | 454 | ||
361 | if (q->dma_drain_size && blk_rq_bytes(rq)) { | 455 | if (q->dma_drain_size && blk_rq_bytes(rq)) { |
362 | /* | 456 | /* |
@@ -378,7 +472,7 @@ static void blk_mq_start_request(struct request *rq, bool last) | |||
378 | rq->cmd_flags |= REQ_END; | 472 | rq->cmd_flags |= REQ_END; |
379 | } | 473 | } |
380 | 474 | ||
381 | static void blk_mq_requeue_request(struct request *rq) | 475 | static void __blk_mq_requeue_request(struct request *rq) |
382 | { | 476 | { |
383 | struct request_queue *q = rq->q; | 477 | struct request_queue *q = rq->q; |
384 | 478 | ||
@@ -391,6 +485,80 @@ static void blk_mq_requeue_request(struct request *rq) | |||
391 | rq->nr_phys_segments--; | 485 | rq->nr_phys_segments--; |
392 | } | 486 | } |
393 | 487 | ||
488 | void blk_mq_requeue_request(struct request *rq) | ||
489 | { | ||
490 | __blk_mq_requeue_request(rq); | ||
491 | blk_clear_rq_complete(rq); | ||
492 | |||
493 | BUG_ON(blk_queued_rq(rq)); | ||
494 | blk_mq_add_to_requeue_list(rq, true); | ||
495 | } | ||
496 | EXPORT_SYMBOL(blk_mq_requeue_request); | ||
497 | |||
498 | static void blk_mq_requeue_work(struct work_struct *work) | ||
499 | { | ||
500 | struct request_queue *q = | ||
501 | container_of(work, struct request_queue, requeue_work); | ||
502 | LIST_HEAD(rq_list); | ||
503 | struct request *rq, *next; | ||
504 | unsigned long flags; | ||
505 | |||
506 | spin_lock_irqsave(&q->requeue_lock, flags); | ||
507 | list_splice_init(&q->requeue_list, &rq_list); | ||
508 | spin_unlock_irqrestore(&q->requeue_lock, flags); | ||
509 | |||
510 | list_for_each_entry_safe(rq, next, &rq_list, queuelist) { | ||
511 | if (!(rq->cmd_flags & REQ_SOFTBARRIER)) | ||
512 | continue; | ||
513 | |||
514 | rq->cmd_flags &= ~REQ_SOFTBARRIER; | ||
515 | list_del_init(&rq->queuelist); | ||
516 | blk_mq_insert_request(rq, true, false, false); | ||
517 | } | ||
518 | |||
519 | while (!list_empty(&rq_list)) { | ||
520 | rq = list_entry(rq_list.next, struct request, queuelist); | ||
521 | list_del_init(&rq->queuelist); | ||
522 | blk_mq_insert_request(rq, false, false, false); | ||
523 | } | ||
524 | |||
525 | blk_mq_run_queues(q, false); | ||
526 | } | ||
527 | |||
528 | void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) | ||
529 | { | ||
530 | struct request_queue *q = rq->q; | ||
531 | unsigned long flags; | ||
532 | |||
533 | /* | ||
534 | * We abuse this flag that is otherwise used by the I/O scheduler to | ||
535 | * request head insertation from the workqueue. | ||
536 | */ | ||
537 | BUG_ON(rq->cmd_flags & REQ_SOFTBARRIER); | ||
538 | |||
539 | spin_lock_irqsave(&q->requeue_lock, flags); | ||
540 | if (at_head) { | ||
541 | rq->cmd_flags |= REQ_SOFTBARRIER; | ||
542 | list_add(&rq->queuelist, &q->requeue_list); | ||
543 | } else { | ||
544 | list_add_tail(&rq->queuelist, &q->requeue_list); | ||
545 | } | ||
546 | spin_unlock_irqrestore(&q->requeue_lock, flags); | ||
547 | } | ||
548 | EXPORT_SYMBOL(blk_mq_add_to_requeue_list); | ||
549 | |||
550 | void blk_mq_kick_requeue_list(struct request_queue *q) | ||
551 | { | ||
552 | kblockd_schedule_work(&q->requeue_work); | ||
553 | } | ||
554 | EXPORT_SYMBOL(blk_mq_kick_requeue_list); | ||
555 | |||
556 | struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag) | ||
557 | { | ||
558 | return tags->rqs[tag]; | ||
559 | } | ||
560 | EXPORT_SYMBOL(blk_mq_tag_to_rq); | ||
561 | |||
394 | struct blk_mq_timeout_data { | 562 | struct blk_mq_timeout_data { |
395 | struct blk_mq_hw_ctx *hctx; | 563 | struct blk_mq_hw_ctx *hctx; |
396 | unsigned long *next; | 564 | unsigned long *next; |
@@ -412,12 +580,13 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags) | |||
412 | do { | 580 | do { |
413 | struct request *rq; | 581 | struct request *rq; |
414 | 582 | ||
415 | tag = find_next_zero_bit(free_tags, hctx->queue_depth, tag); | 583 | tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag); |
416 | if (tag >= hctx->queue_depth) | 584 | if (tag >= hctx->tags->nr_tags) |
417 | break; | 585 | break; |
418 | 586 | ||
419 | rq = hctx->rqs[tag++]; | 587 | rq = blk_mq_tag_to_rq(hctx->tags, tag++); |
420 | 588 | if (rq->q != hctx->queue) | |
589 | continue; | ||
421 | if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) | 590 | if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) |
422 | continue; | 591 | continue; |
423 | 592 | ||
@@ -442,6 +611,28 @@ static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx, | |||
442 | blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data); | 611 | blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data); |
443 | } | 612 | } |
444 | 613 | ||
614 | static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq) | ||
615 | { | ||
616 | struct request_queue *q = rq->q; | ||
617 | |||
618 | /* | ||
619 | * We know that complete is set at this point. If STARTED isn't set | ||
620 | * anymore, then the request isn't active and the "timeout" should | ||
621 | * just be ignored. This can happen due to the bitflag ordering. | ||
622 | * Timeout first checks if STARTED is set, and if it is, assumes | ||
623 | * the request is active. But if we race with completion, then | ||
624 | * we both flags will get cleared. So check here again, and ignore | ||
625 | * a timeout event with a request that isn't active. | ||
626 | */ | ||
627 | if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) | ||
628 | return BLK_EH_NOT_HANDLED; | ||
629 | |||
630 | if (!q->mq_ops->timeout) | ||
631 | return BLK_EH_RESET_TIMER; | ||
632 | |||
633 | return q->mq_ops->timeout(rq); | ||
634 | } | ||
635 | |||
445 | static void blk_mq_rq_timer(unsigned long data) | 636 | static void blk_mq_rq_timer(unsigned long data) |
446 | { | 637 | { |
447 | struct request_queue *q = (struct request_queue *) data; | 638 | struct request_queue *q = (struct request_queue *) data; |
@@ -449,11 +640,24 @@ static void blk_mq_rq_timer(unsigned long data) | |||
449 | unsigned long next = 0; | 640 | unsigned long next = 0; |
450 | int i, next_set = 0; | 641 | int i, next_set = 0; |
451 | 642 | ||
452 | queue_for_each_hw_ctx(q, hctx, i) | 643 | queue_for_each_hw_ctx(q, hctx, i) { |
644 | /* | ||
645 | * If not software queues are currently mapped to this | ||
646 | * hardware queue, there's nothing to check | ||
647 | */ | ||
648 | if (!hctx->nr_ctx || !hctx->tags) | ||
649 | continue; | ||
650 | |||
453 | blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); | 651 | blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); |
652 | } | ||
454 | 653 | ||
455 | if (next_set) | 654 | if (next_set) { |
456 | mod_timer(&q->timeout, round_jiffies_up(next)); | 655 | next = blk_rq_timeout(round_jiffies_up(next)); |
656 | mod_timer(&q->timeout, next); | ||
657 | } else { | ||
658 | queue_for_each_hw_ctx(q, hctx, i) | ||
659 | blk_mq_tag_idle(hctx); | ||
660 | } | ||
457 | } | 661 | } |
458 | 662 | ||
459 | /* | 663 | /* |
@@ -495,9 +699,38 @@ static bool blk_mq_attempt_merge(struct request_queue *q, | |||
495 | return false; | 699 | return false; |
496 | } | 700 | } |
497 | 701 | ||
498 | void blk_mq_add_timer(struct request *rq) | 702 | /* |
703 | * Process software queues that have been marked busy, splicing them | ||
704 | * to the for-dispatch | ||
705 | */ | ||
706 | static void flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list) | ||
499 | { | 707 | { |
500 | __blk_add_timer(rq, NULL); | 708 | struct blk_mq_ctx *ctx; |
709 | int i; | ||
710 | |||
711 | for (i = 0; i < hctx->ctx_map.map_size; i++) { | ||
712 | struct blk_align_bitmap *bm = &hctx->ctx_map.map[i]; | ||
713 | unsigned int off, bit; | ||
714 | |||
715 | if (!bm->word) | ||
716 | continue; | ||
717 | |||
718 | bit = 0; | ||
719 | off = i * hctx->ctx_map.bits_per_word; | ||
720 | do { | ||
721 | bit = find_next_bit(&bm->word, bm->depth, bit); | ||
722 | if (bit >= bm->depth) | ||
723 | break; | ||
724 | |||
725 | ctx = hctx->ctxs[bit + off]; | ||
726 | clear_bit(bit, &bm->word); | ||
727 | spin_lock(&ctx->lock); | ||
728 | list_splice_tail_init(&ctx->rq_list, list); | ||
729 | spin_unlock(&ctx->lock); | ||
730 | |||
731 | bit++; | ||
732 | } while (1); | ||
733 | } | ||
501 | } | 734 | } |
502 | 735 | ||
503 | /* | 736 | /* |
@@ -509,10 +742,11 @@ void blk_mq_add_timer(struct request *rq) | |||
509 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | 742 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) |
510 | { | 743 | { |
511 | struct request_queue *q = hctx->queue; | 744 | struct request_queue *q = hctx->queue; |
512 | struct blk_mq_ctx *ctx; | ||
513 | struct request *rq; | 745 | struct request *rq; |
514 | LIST_HEAD(rq_list); | 746 | LIST_HEAD(rq_list); |
515 | int bit, queued; | 747 | int queued; |
748 | |||
749 | WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)); | ||
516 | 750 | ||
517 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) | 751 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) |
518 | return; | 752 | return; |
@@ -522,15 +756,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
522 | /* | 756 | /* |
523 | * Touch any software queue that has pending entries. | 757 | * Touch any software queue that has pending entries. |
524 | */ | 758 | */ |
525 | for_each_set_bit(bit, hctx->ctx_map, hctx->nr_ctx) { | 759 | flush_busy_ctxs(hctx, &rq_list); |
526 | clear_bit(bit, hctx->ctx_map); | ||
527 | ctx = hctx->ctxs[bit]; | ||
528 | BUG_ON(bit != ctx->index_hw); | ||
529 | |||
530 | spin_lock(&ctx->lock); | ||
531 | list_splice_tail_init(&ctx->rq_list, &rq_list); | ||
532 | spin_unlock(&ctx->lock); | ||
533 | } | ||
534 | 760 | ||
535 | /* | 761 | /* |
536 | * If we have previous entries on our dispatch list, grab them | 762 | * If we have previous entries on our dispatch list, grab them |
@@ -544,13 +770,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
544 | } | 770 | } |
545 | 771 | ||
546 | /* | 772 | /* |
547 | * Delete and return all entries from our dispatch list | ||
548 | */ | ||
549 | queued = 0; | ||
550 | |||
551 | /* | ||
552 | * Now process all the entries, sending them to the driver. | 773 | * Now process all the entries, sending them to the driver. |
553 | */ | 774 | */ |
775 | queued = 0; | ||
554 | while (!list_empty(&rq_list)) { | 776 | while (!list_empty(&rq_list)) { |
555 | int ret; | 777 | int ret; |
556 | 778 | ||
@@ -565,13 +787,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
565 | queued++; | 787 | queued++; |
566 | continue; | 788 | continue; |
567 | case BLK_MQ_RQ_QUEUE_BUSY: | 789 | case BLK_MQ_RQ_QUEUE_BUSY: |
568 | /* | ||
569 | * FIXME: we should have a mechanism to stop the queue | ||
570 | * like blk_stop_queue, otherwise we will waste cpu | ||
571 | * time | ||
572 | */ | ||
573 | list_add(&rq->queuelist, &rq_list); | 790 | list_add(&rq->queuelist, &rq_list); |
574 | blk_mq_requeue_request(rq); | 791 | __blk_mq_requeue_request(rq); |
575 | break; | 792 | break; |
576 | default: | 793 | default: |
577 | pr_err("blk-mq: bad return on queue: %d\n", ret); | 794 | pr_err("blk-mq: bad return on queue: %d\n", ret); |
@@ -601,17 +818,44 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
601 | } | 818 | } |
602 | } | 819 | } |
603 | 820 | ||
821 | /* | ||
822 | * It'd be great if the workqueue API had a way to pass | ||
823 | * in a mask and had some smarts for more clever placement. | ||
824 | * For now we just round-robin here, switching for every | ||
825 | * BLK_MQ_CPU_WORK_BATCH queued items. | ||
826 | */ | ||
827 | static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) | ||
828 | { | ||
829 | int cpu = hctx->next_cpu; | ||
830 | |||
831 | if (--hctx->next_cpu_batch <= 0) { | ||
832 | int next_cpu; | ||
833 | |||
834 | next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask); | ||
835 | if (next_cpu >= nr_cpu_ids) | ||
836 | next_cpu = cpumask_first(hctx->cpumask); | ||
837 | |||
838 | hctx->next_cpu = next_cpu; | ||
839 | hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; | ||
840 | } | ||
841 | |||
842 | return cpu; | ||
843 | } | ||
844 | |||
604 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | 845 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) |
605 | { | 846 | { |
606 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) | 847 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) |
607 | return; | 848 | return; |
608 | 849 | ||
609 | if (!async) | 850 | if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask)) |
610 | __blk_mq_run_hw_queue(hctx); | 851 | __blk_mq_run_hw_queue(hctx); |
852 | else if (hctx->queue->nr_hw_queues == 1) | ||
853 | kblockd_schedule_delayed_work(&hctx->run_work, 0); | ||
611 | else { | 854 | else { |
612 | struct request_queue *q = hctx->queue; | 855 | unsigned int cpu; |
613 | 856 | ||
614 | kblockd_schedule_delayed_work(q, &hctx->delayed_work, 0); | 857 | cpu = blk_mq_hctx_next_cpu(hctx); |
858 | kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0); | ||
615 | } | 859 | } |
616 | } | 860 | } |
617 | 861 | ||
@@ -626,14 +870,17 @@ void blk_mq_run_queues(struct request_queue *q, bool async) | |||
626 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) | 870 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) |
627 | continue; | 871 | continue; |
628 | 872 | ||
873 | preempt_disable(); | ||
629 | blk_mq_run_hw_queue(hctx, async); | 874 | blk_mq_run_hw_queue(hctx, async); |
875 | preempt_enable(); | ||
630 | } | 876 | } |
631 | } | 877 | } |
632 | EXPORT_SYMBOL(blk_mq_run_queues); | 878 | EXPORT_SYMBOL(blk_mq_run_queues); |
633 | 879 | ||
634 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) | 880 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) |
635 | { | 881 | { |
636 | cancel_delayed_work(&hctx->delayed_work); | 882 | cancel_delayed_work(&hctx->run_work); |
883 | cancel_delayed_work(&hctx->delay_work); | ||
637 | set_bit(BLK_MQ_S_STOPPED, &hctx->state); | 884 | set_bit(BLK_MQ_S_STOPPED, &hctx->state); |
638 | } | 885 | } |
639 | EXPORT_SYMBOL(blk_mq_stop_hw_queue); | 886 | EXPORT_SYMBOL(blk_mq_stop_hw_queue); |
@@ -651,11 +898,25 @@ EXPORT_SYMBOL(blk_mq_stop_hw_queues); | |||
651 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) | 898 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) |
652 | { | 899 | { |
653 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); | 900 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); |
901 | |||
902 | preempt_disable(); | ||
654 | __blk_mq_run_hw_queue(hctx); | 903 | __blk_mq_run_hw_queue(hctx); |
904 | preempt_enable(); | ||
655 | } | 905 | } |
656 | EXPORT_SYMBOL(blk_mq_start_hw_queue); | 906 | EXPORT_SYMBOL(blk_mq_start_hw_queue); |
657 | 907 | ||
658 | void blk_mq_start_stopped_hw_queues(struct request_queue *q) | 908 | void blk_mq_start_hw_queues(struct request_queue *q) |
909 | { | ||
910 | struct blk_mq_hw_ctx *hctx; | ||
911 | int i; | ||
912 | |||
913 | queue_for_each_hw_ctx(q, hctx, i) | ||
914 | blk_mq_start_hw_queue(hctx); | ||
915 | } | ||
916 | EXPORT_SYMBOL(blk_mq_start_hw_queues); | ||
917 | |||
918 | |||
919 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) | ||
659 | { | 920 | { |
660 | struct blk_mq_hw_ctx *hctx; | 921 | struct blk_mq_hw_ctx *hctx; |
661 | int i; | 922 | int i; |
@@ -665,19 +926,47 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q) | |||
665 | continue; | 926 | continue; |
666 | 927 | ||
667 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); | 928 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); |
668 | blk_mq_run_hw_queue(hctx, true); | 929 | preempt_disable(); |
930 | blk_mq_run_hw_queue(hctx, async); | ||
931 | preempt_enable(); | ||
669 | } | 932 | } |
670 | } | 933 | } |
671 | EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); | 934 | EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); |
672 | 935 | ||
673 | static void blk_mq_work_fn(struct work_struct *work) | 936 | static void blk_mq_run_work_fn(struct work_struct *work) |
674 | { | 937 | { |
675 | struct blk_mq_hw_ctx *hctx; | 938 | struct blk_mq_hw_ctx *hctx; |
676 | 939 | ||
677 | hctx = container_of(work, struct blk_mq_hw_ctx, delayed_work.work); | 940 | hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work); |
941 | |||
678 | __blk_mq_run_hw_queue(hctx); | 942 | __blk_mq_run_hw_queue(hctx); |
679 | } | 943 | } |
680 | 944 | ||
945 | static void blk_mq_delay_work_fn(struct work_struct *work) | ||
946 | { | ||
947 | struct blk_mq_hw_ctx *hctx; | ||
948 | |||
949 | hctx = container_of(work, struct blk_mq_hw_ctx, delay_work.work); | ||
950 | |||
951 | if (test_and_clear_bit(BLK_MQ_S_STOPPED, &hctx->state)) | ||
952 | __blk_mq_run_hw_queue(hctx); | ||
953 | } | ||
954 | |||
955 | void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) | ||
956 | { | ||
957 | unsigned long tmo = msecs_to_jiffies(msecs); | ||
958 | |||
959 | if (hctx->queue->nr_hw_queues == 1) | ||
960 | kblockd_schedule_delayed_work(&hctx->delay_work, tmo); | ||
961 | else { | ||
962 | unsigned int cpu; | ||
963 | |||
964 | cpu = blk_mq_hctx_next_cpu(hctx); | ||
965 | kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo); | ||
966 | } | ||
967 | } | ||
968 | EXPORT_SYMBOL(blk_mq_delay_queue); | ||
969 | |||
681 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | 970 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, |
682 | struct request *rq, bool at_head) | 971 | struct request *rq, bool at_head) |
683 | { | 972 | { |
@@ -689,12 +978,13 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | |||
689 | list_add(&rq->queuelist, &ctx->rq_list); | 978 | list_add(&rq->queuelist, &ctx->rq_list); |
690 | else | 979 | else |
691 | list_add_tail(&rq->queuelist, &ctx->rq_list); | 980 | list_add_tail(&rq->queuelist, &ctx->rq_list); |
981 | |||
692 | blk_mq_hctx_mark_pending(hctx, ctx); | 982 | blk_mq_hctx_mark_pending(hctx, ctx); |
693 | 983 | ||
694 | /* | 984 | /* |
695 | * We do this early, to ensure we are on the right CPU. | 985 | * We do this early, to ensure we are on the right CPU. |
696 | */ | 986 | */ |
697 | blk_mq_add_timer(rq); | 987 | blk_add_timer(rq); |
698 | } | 988 | } |
699 | 989 | ||
700 | void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, | 990 | void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, |
@@ -719,10 +1009,10 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, | |||
719 | spin_unlock(&ctx->lock); | 1009 | spin_unlock(&ctx->lock); |
720 | } | 1010 | } |
721 | 1011 | ||
722 | blk_mq_put_ctx(current_ctx); | ||
723 | |||
724 | if (run_queue) | 1012 | if (run_queue) |
725 | blk_mq_run_hw_queue(hctx, async); | 1013 | blk_mq_run_hw_queue(hctx, async); |
1014 | |||
1015 | blk_mq_put_ctx(current_ctx); | ||
726 | } | 1016 | } |
727 | 1017 | ||
728 | static void blk_mq_insert_requests(struct request_queue *q, | 1018 | static void blk_mq_insert_requests(struct request_queue *q, |
@@ -758,9 +1048,8 @@ static void blk_mq_insert_requests(struct request_queue *q, | |||
758 | } | 1048 | } |
759 | spin_unlock(&ctx->lock); | 1049 | spin_unlock(&ctx->lock); |
760 | 1050 | ||
761 | blk_mq_put_ctx(current_ctx); | ||
762 | |||
763 | blk_mq_run_hw_queue(hctx, from_schedule); | 1051 | blk_mq_run_hw_queue(hctx, from_schedule); |
1052 | blk_mq_put_ctx(current_ctx); | ||
764 | } | 1053 | } |
765 | 1054 | ||
766 | static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) | 1055 | static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) |
@@ -826,21 +1115,161 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) | |||
826 | blk_account_io_start(rq, 1); | 1115 | blk_account_io_start(rq, 1); |
827 | } | 1116 | } |
828 | 1117 | ||
829 | static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | 1118 | static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, |
1119 | struct blk_mq_ctx *ctx, | ||
1120 | struct request *rq, struct bio *bio) | ||
830 | { | 1121 | { |
1122 | struct request_queue *q = hctx->queue; | ||
1123 | |||
1124 | if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) { | ||
1125 | blk_mq_bio_to_request(rq, bio); | ||
1126 | spin_lock(&ctx->lock); | ||
1127 | insert_rq: | ||
1128 | __blk_mq_insert_request(hctx, rq, false); | ||
1129 | spin_unlock(&ctx->lock); | ||
1130 | return false; | ||
1131 | } else { | ||
1132 | spin_lock(&ctx->lock); | ||
1133 | if (!blk_mq_attempt_merge(q, ctx, bio)) { | ||
1134 | blk_mq_bio_to_request(rq, bio); | ||
1135 | goto insert_rq; | ||
1136 | } | ||
1137 | |||
1138 | spin_unlock(&ctx->lock); | ||
1139 | __blk_mq_free_request(hctx, ctx, rq); | ||
1140 | return true; | ||
1141 | } | ||
1142 | } | ||
1143 | |||
1144 | struct blk_map_ctx { | ||
831 | struct blk_mq_hw_ctx *hctx; | 1145 | struct blk_mq_hw_ctx *hctx; |
832 | struct blk_mq_ctx *ctx; | 1146 | struct blk_mq_ctx *ctx; |
1147 | }; | ||
1148 | |||
1149 | static struct request *blk_mq_map_request(struct request_queue *q, | ||
1150 | struct bio *bio, | ||
1151 | struct blk_map_ctx *data) | ||
1152 | { | ||
1153 | struct blk_mq_hw_ctx *hctx; | ||
1154 | struct blk_mq_ctx *ctx; | ||
1155 | struct request *rq; | ||
1156 | int rw = bio_data_dir(bio); | ||
1157 | |||
1158 | if (unlikely(blk_mq_queue_enter(q))) { | ||
1159 | bio_endio(bio, -EIO); | ||
1160 | return NULL; | ||
1161 | } | ||
1162 | |||
1163 | ctx = blk_mq_get_ctx(q); | ||
1164 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
1165 | |||
1166 | if (rw_is_sync(bio->bi_rw)) | ||
1167 | rw |= REQ_SYNC; | ||
1168 | |||
1169 | trace_block_getrq(q, bio, rw); | ||
1170 | rq = __blk_mq_alloc_request(q, hctx, ctx, rw, GFP_ATOMIC, false); | ||
1171 | if (unlikely(!rq)) { | ||
1172 | __blk_mq_run_hw_queue(hctx); | ||
1173 | blk_mq_put_ctx(ctx); | ||
1174 | trace_block_sleeprq(q, bio, rw); | ||
1175 | |||
1176 | ctx = blk_mq_get_ctx(q); | ||
1177 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
1178 | rq = __blk_mq_alloc_request(q, hctx, ctx, rw, | ||
1179 | __GFP_WAIT|GFP_ATOMIC, false); | ||
1180 | } | ||
1181 | |||
1182 | hctx->queued++; | ||
1183 | data->hctx = hctx; | ||
1184 | data->ctx = ctx; | ||
1185 | return rq; | ||
1186 | } | ||
1187 | |||
1188 | /* | ||
1189 | * Multiple hardware queue variant. This will not use per-process plugs, | ||
1190 | * but will attempt to bypass the hctx queueing if we can go straight to | ||
1191 | * hardware for SYNC IO. | ||
1192 | */ | ||
1193 | static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | ||
1194 | { | ||
833 | const int is_sync = rw_is_sync(bio->bi_rw); | 1195 | const int is_sync = rw_is_sync(bio->bi_rw); |
834 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); | 1196 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); |
835 | int rw = bio_data_dir(bio); | 1197 | struct blk_map_ctx data; |
836 | struct request *rq; | 1198 | struct request *rq; |
1199 | |||
1200 | blk_queue_bounce(q, &bio); | ||
1201 | |||
1202 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { | ||
1203 | bio_endio(bio, -EIO); | ||
1204 | return; | ||
1205 | } | ||
1206 | |||
1207 | rq = blk_mq_map_request(q, bio, &data); | ||
1208 | if (unlikely(!rq)) | ||
1209 | return; | ||
1210 | |||
1211 | if (unlikely(is_flush_fua)) { | ||
1212 | blk_mq_bio_to_request(rq, bio); | ||
1213 | blk_insert_flush(rq); | ||
1214 | goto run_queue; | ||
1215 | } | ||
1216 | |||
1217 | if (is_sync) { | ||
1218 | int ret; | ||
1219 | |||
1220 | blk_mq_bio_to_request(rq, bio); | ||
1221 | blk_mq_start_request(rq, true); | ||
1222 | |||
1223 | /* | ||
1224 | * For OK queue, we are done. For error, kill it. Any other | ||
1225 | * error (busy), just add it to our list as we previously | ||
1226 | * would have done | ||
1227 | */ | ||
1228 | ret = q->mq_ops->queue_rq(data.hctx, rq); | ||
1229 | if (ret == BLK_MQ_RQ_QUEUE_OK) | ||
1230 | goto done; | ||
1231 | else { | ||
1232 | __blk_mq_requeue_request(rq); | ||
1233 | |||
1234 | if (ret == BLK_MQ_RQ_QUEUE_ERROR) { | ||
1235 | rq->errors = -EIO; | ||
1236 | blk_mq_end_io(rq, rq->errors); | ||
1237 | goto done; | ||
1238 | } | ||
1239 | } | ||
1240 | } | ||
1241 | |||
1242 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { | ||
1243 | /* | ||
1244 | * For a SYNC request, send it to the hardware immediately. For | ||
1245 | * an ASYNC request, just ensure that we run it later on. The | ||
1246 | * latter allows for merging opportunities and more efficient | ||
1247 | * dispatching. | ||
1248 | */ | ||
1249 | run_queue: | ||
1250 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); | ||
1251 | } | ||
1252 | done: | ||
1253 | blk_mq_put_ctx(data.ctx); | ||
1254 | } | ||
1255 | |||
1256 | /* | ||
1257 | * Single hardware queue variant. This will attempt to use any per-process | ||
1258 | * plug for merging and IO deferral. | ||
1259 | */ | ||
1260 | static void blk_sq_make_request(struct request_queue *q, struct bio *bio) | ||
1261 | { | ||
1262 | const int is_sync = rw_is_sync(bio->bi_rw); | ||
1263 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); | ||
837 | unsigned int use_plug, request_count = 0; | 1264 | unsigned int use_plug, request_count = 0; |
1265 | struct blk_map_ctx data; | ||
1266 | struct request *rq; | ||
838 | 1267 | ||
839 | /* | 1268 | /* |
840 | * If we have multiple hardware queues, just go directly to | 1269 | * If we have multiple hardware queues, just go directly to |
841 | * one of those for sync IO. | 1270 | * one of those for sync IO. |
842 | */ | 1271 | */ |
843 | use_plug = !is_flush_fua && ((q->nr_hw_queues == 1) || !is_sync); | 1272 | use_plug = !is_flush_fua && !is_sync; |
844 | 1273 | ||
845 | blk_queue_bounce(q, &bio); | 1274 | blk_queue_bounce(q, &bio); |
846 | 1275 | ||
@@ -849,37 +1278,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
849 | return; | 1278 | return; |
850 | } | 1279 | } |
851 | 1280 | ||
852 | if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) | 1281 | if (use_plug && !blk_queue_nomerges(q) && |
853 | return; | 1282 | blk_attempt_plug_merge(q, bio, &request_count)) |
854 | |||
855 | if (blk_mq_queue_enter(q)) { | ||
856 | bio_endio(bio, -EIO); | ||
857 | return; | 1283 | return; |
858 | } | ||
859 | 1284 | ||
860 | ctx = blk_mq_get_ctx(q); | 1285 | rq = blk_mq_map_request(q, bio, &data); |
861 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
862 | |||
863 | if (is_sync) | ||
864 | rw |= REQ_SYNC; | ||
865 | trace_block_getrq(q, bio, rw); | ||
866 | rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); | ||
867 | if (likely(rq)) | ||
868 | blk_mq_rq_ctx_init(q, ctx, rq, rw); | ||
869 | else { | ||
870 | blk_mq_put_ctx(ctx); | ||
871 | trace_block_sleeprq(q, bio, rw); | ||
872 | rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC, | ||
873 | false); | ||
874 | ctx = rq->mq_ctx; | ||
875 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
876 | } | ||
877 | |||
878 | hctx->queued++; | ||
879 | 1286 | ||
880 | if (unlikely(is_flush_fua)) { | 1287 | if (unlikely(is_flush_fua)) { |
881 | blk_mq_bio_to_request(rq, bio); | 1288 | blk_mq_bio_to_request(rq, bio); |
882 | blk_mq_put_ctx(ctx); | ||
883 | blk_insert_flush(rq); | 1289 | blk_insert_flush(rq); |
884 | goto run_queue; | 1290 | goto run_queue; |
885 | } | 1291 | } |
@@ -901,31 +1307,23 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
901 | trace_block_plug(q); | 1307 | trace_block_plug(q); |
902 | } | 1308 | } |
903 | list_add_tail(&rq->queuelist, &plug->mq_list); | 1309 | list_add_tail(&rq->queuelist, &plug->mq_list); |
904 | blk_mq_put_ctx(ctx); | 1310 | blk_mq_put_ctx(data.ctx); |
905 | return; | 1311 | return; |
906 | } | 1312 | } |
907 | } | 1313 | } |
908 | 1314 | ||
909 | spin_lock(&ctx->lock); | 1315 | if (!blk_mq_merge_queue_io(data.hctx, data.ctx, rq, bio)) { |
910 | 1316 | /* | |
911 | if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) && | 1317 | * For a SYNC request, send it to the hardware immediately. For |
912 | blk_mq_attempt_merge(q, ctx, bio)) | 1318 | * an ASYNC request, just ensure that we run it later on. The |
913 | __blk_mq_free_request(hctx, ctx, rq); | 1319 | * latter allows for merging opportunities and more efficient |
914 | else { | 1320 | * dispatching. |
915 | blk_mq_bio_to_request(rq, bio); | 1321 | */ |
916 | __blk_mq_insert_request(hctx, rq, false); | 1322 | run_queue: |
1323 | blk_mq_run_hw_queue(data.hctx, !is_sync || is_flush_fua); | ||
917 | } | 1324 | } |
918 | 1325 | ||
919 | spin_unlock(&ctx->lock); | 1326 | blk_mq_put_ctx(data.ctx); |
920 | blk_mq_put_ctx(ctx); | ||
921 | |||
922 | /* | ||
923 | * For a SYNC request, send it to the hardware immediately. For an | ||
924 | * ASYNC request, just ensure that we run it later on. The latter | ||
925 | * allows for merging opportunities and more efficient dispatching. | ||
926 | */ | ||
927 | run_queue: | ||
928 | blk_mq_run_hw_queue(hctx, !is_sync || is_flush_fua); | ||
929 | } | 1327 | } |
930 | 1328 | ||
931 | /* | 1329 | /* |
@@ -937,32 +1335,153 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu) | |||
937 | } | 1335 | } |
938 | EXPORT_SYMBOL(blk_mq_map_queue); | 1336 | EXPORT_SYMBOL(blk_mq_map_queue); |
939 | 1337 | ||
940 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *reg, | 1338 | static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, |
941 | unsigned int hctx_index) | 1339 | struct blk_mq_tags *tags, unsigned int hctx_idx) |
942 | { | 1340 | { |
943 | return kmalloc_node(sizeof(struct blk_mq_hw_ctx), | 1341 | struct page *page; |
944 | GFP_KERNEL | __GFP_ZERO, reg->numa_node); | 1342 | |
1343 | if (tags->rqs && set->ops->exit_request) { | ||
1344 | int i; | ||
1345 | |||
1346 | for (i = 0; i < tags->nr_tags; i++) { | ||
1347 | if (!tags->rqs[i]) | ||
1348 | continue; | ||
1349 | set->ops->exit_request(set->driver_data, tags->rqs[i], | ||
1350 | hctx_idx, i); | ||
1351 | } | ||
1352 | } | ||
1353 | |||
1354 | while (!list_empty(&tags->page_list)) { | ||
1355 | page = list_first_entry(&tags->page_list, struct page, lru); | ||
1356 | list_del_init(&page->lru); | ||
1357 | __free_pages(page, page->private); | ||
1358 | } | ||
1359 | |||
1360 | kfree(tags->rqs); | ||
1361 | |||
1362 | blk_mq_free_tags(tags); | ||
945 | } | 1363 | } |
946 | EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue); | ||
947 | 1364 | ||
948 | void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *hctx, | 1365 | static size_t order_to_size(unsigned int order) |
949 | unsigned int hctx_index) | ||
950 | { | 1366 | { |
951 | kfree(hctx); | 1367 | return (size_t)PAGE_SIZE << order; |
952 | } | 1368 | } |
953 | EXPORT_SYMBOL(blk_mq_free_single_hw_queue); | ||
954 | 1369 | ||
955 | static void blk_mq_hctx_notify(void *data, unsigned long action, | 1370 | static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, |
956 | unsigned int cpu) | 1371 | unsigned int hctx_idx) |
1372 | { | ||
1373 | struct blk_mq_tags *tags; | ||
1374 | unsigned int i, j, entries_per_page, max_order = 4; | ||
1375 | size_t rq_size, left; | ||
1376 | |||
1377 | tags = blk_mq_init_tags(set->queue_depth, set->reserved_tags, | ||
1378 | set->numa_node); | ||
1379 | if (!tags) | ||
1380 | return NULL; | ||
1381 | |||
1382 | INIT_LIST_HEAD(&tags->page_list); | ||
1383 | |||
1384 | tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), | ||
1385 | GFP_KERNEL, set->numa_node); | ||
1386 | if (!tags->rqs) { | ||
1387 | blk_mq_free_tags(tags); | ||
1388 | return NULL; | ||
1389 | } | ||
1390 | |||
1391 | /* | ||
1392 | * rq_size is the size of the request plus driver payload, rounded | ||
1393 | * to the cacheline size | ||
1394 | */ | ||
1395 | rq_size = round_up(sizeof(struct request) + set->cmd_size, | ||
1396 | cache_line_size()); | ||
1397 | left = rq_size * set->queue_depth; | ||
1398 | |||
1399 | for (i = 0; i < set->queue_depth; ) { | ||
1400 | int this_order = max_order; | ||
1401 | struct page *page; | ||
1402 | int to_do; | ||
1403 | void *p; | ||
1404 | |||
1405 | while (left < order_to_size(this_order - 1) && this_order) | ||
1406 | this_order--; | ||
1407 | |||
1408 | do { | ||
1409 | page = alloc_pages_node(set->numa_node, GFP_KERNEL, | ||
1410 | this_order); | ||
1411 | if (page) | ||
1412 | break; | ||
1413 | if (!this_order--) | ||
1414 | break; | ||
1415 | if (order_to_size(this_order) < rq_size) | ||
1416 | break; | ||
1417 | } while (1); | ||
1418 | |||
1419 | if (!page) | ||
1420 | goto fail; | ||
1421 | |||
1422 | page->private = this_order; | ||
1423 | list_add_tail(&page->lru, &tags->page_list); | ||
1424 | |||
1425 | p = page_address(page); | ||
1426 | entries_per_page = order_to_size(this_order) / rq_size; | ||
1427 | to_do = min(entries_per_page, set->queue_depth - i); | ||
1428 | left -= to_do * rq_size; | ||
1429 | for (j = 0; j < to_do; j++) { | ||
1430 | tags->rqs[i] = p; | ||
1431 | if (set->ops->init_request) { | ||
1432 | if (set->ops->init_request(set->driver_data, | ||
1433 | tags->rqs[i], hctx_idx, i, | ||
1434 | set->numa_node)) | ||
1435 | goto fail; | ||
1436 | } | ||
1437 | |||
1438 | p += rq_size; | ||
1439 | i++; | ||
1440 | } | ||
1441 | } | ||
1442 | |||
1443 | return tags; | ||
1444 | |||
1445 | fail: | ||
1446 | pr_warn("%s: failed to allocate requests\n", __func__); | ||
1447 | blk_mq_free_rq_map(set, tags, hctx_idx); | ||
1448 | return NULL; | ||
1449 | } | ||
1450 | |||
1451 | static void blk_mq_free_bitmap(struct blk_mq_ctxmap *bitmap) | ||
1452 | { | ||
1453 | kfree(bitmap->map); | ||
1454 | } | ||
1455 | |||
1456 | static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) | ||
1457 | { | ||
1458 | unsigned int bpw = 8, total, num_maps, i; | ||
1459 | |||
1460 | bitmap->bits_per_word = bpw; | ||
1461 | |||
1462 | num_maps = ALIGN(nr_cpu_ids, bpw) / bpw; | ||
1463 | bitmap->map = kzalloc_node(num_maps * sizeof(struct blk_align_bitmap), | ||
1464 | GFP_KERNEL, node); | ||
1465 | if (!bitmap->map) | ||
1466 | return -ENOMEM; | ||
1467 | |||
1468 | bitmap->map_size = num_maps; | ||
1469 | |||
1470 | total = nr_cpu_ids; | ||
1471 | for (i = 0; i < num_maps; i++) { | ||
1472 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); | ||
1473 | total -= bitmap->map[i].depth; | ||
1474 | } | ||
1475 | |||
1476 | return 0; | ||
1477 | } | ||
1478 | |||
1479 | static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) | ||
957 | { | 1480 | { |
958 | struct blk_mq_hw_ctx *hctx = data; | ||
959 | struct request_queue *q = hctx->queue; | 1481 | struct request_queue *q = hctx->queue; |
960 | struct blk_mq_ctx *ctx; | 1482 | struct blk_mq_ctx *ctx; |
961 | LIST_HEAD(tmp); | 1483 | LIST_HEAD(tmp); |
962 | 1484 | ||
963 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) | ||
964 | return; | ||
965 | |||
966 | /* | 1485 | /* |
967 | * Move ctx entries to new CPU, if this one is going away. | 1486 | * Move ctx entries to new CPU, if this one is going away. |
968 | */ | 1487 | */ |
@@ -971,12 +1490,12 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, | |||
971 | spin_lock(&ctx->lock); | 1490 | spin_lock(&ctx->lock); |
972 | if (!list_empty(&ctx->rq_list)) { | 1491 | if (!list_empty(&ctx->rq_list)) { |
973 | list_splice_init(&ctx->rq_list, &tmp); | 1492 | list_splice_init(&ctx->rq_list, &tmp); |
974 | clear_bit(ctx->index_hw, hctx->ctx_map); | 1493 | blk_mq_hctx_clear_pending(hctx, ctx); |
975 | } | 1494 | } |
976 | spin_unlock(&ctx->lock); | 1495 | spin_unlock(&ctx->lock); |
977 | 1496 | ||
978 | if (list_empty(&tmp)) | 1497 | if (list_empty(&tmp)) |
979 | return; | 1498 | return NOTIFY_OK; |
980 | 1499 | ||
981 | ctx = blk_mq_get_ctx(q); | 1500 | ctx = blk_mq_get_ctx(q); |
982 | spin_lock(&ctx->lock); | 1501 | spin_lock(&ctx->lock); |
@@ -993,210 +1512,103 @@ static void blk_mq_hctx_notify(void *data, unsigned long action, | |||
993 | blk_mq_hctx_mark_pending(hctx, ctx); | 1512 | blk_mq_hctx_mark_pending(hctx, ctx); |
994 | 1513 | ||
995 | spin_unlock(&ctx->lock); | 1514 | spin_unlock(&ctx->lock); |
996 | blk_mq_put_ctx(ctx); | ||
997 | 1515 | ||
998 | blk_mq_run_hw_queue(hctx, true); | 1516 | blk_mq_run_hw_queue(hctx, true); |
1517 | blk_mq_put_ctx(ctx); | ||
1518 | return NOTIFY_OK; | ||
999 | } | 1519 | } |
1000 | 1520 | ||
1001 | static int blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx, | 1521 | static int blk_mq_hctx_cpu_online(struct blk_mq_hw_ctx *hctx, int cpu) |
1002 | int (*init)(void *, struct blk_mq_hw_ctx *, | ||
1003 | struct request *, unsigned int), | ||
1004 | void *data) | ||
1005 | { | 1522 | { |
1006 | unsigned int i; | 1523 | struct request_queue *q = hctx->queue; |
1007 | int ret = 0; | 1524 | struct blk_mq_tag_set *set = q->tag_set; |
1008 | |||
1009 | for (i = 0; i < hctx->queue_depth; i++) { | ||
1010 | struct request *rq = hctx->rqs[i]; | ||
1011 | |||
1012 | ret = init(data, hctx, rq, i); | ||
1013 | if (ret) | ||
1014 | break; | ||
1015 | } | ||
1016 | |||
1017 | return ret; | ||
1018 | } | ||
1019 | 1525 | ||
1020 | int blk_mq_init_commands(struct request_queue *q, | 1526 | if (set->tags[hctx->queue_num]) |
1021 | int (*init)(void *, struct blk_mq_hw_ctx *, | 1527 | return NOTIFY_OK; |
1022 | struct request *, unsigned int), | ||
1023 | void *data) | ||
1024 | { | ||
1025 | struct blk_mq_hw_ctx *hctx; | ||
1026 | unsigned int i; | ||
1027 | int ret = 0; | ||
1028 | 1528 | ||
1029 | queue_for_each_hw_ctx(q, hctx, i) { | 1529 | set->tags[hctx->queue_num] = blk_mq_init_rq_map(set, hctx->queue_num); |
1030 | ret = blk_mq_init_hw_commands(hctx, init, data); | 1530 | if (!set->tags[hctx->queue_num]) |
1031 | if (ret) | 1531 | return NOTIFY_STOP; |
1032 | break; | ||
1033 | } | ||
1034 | 1532 | ||
1035 | return ret; | 1533 | hctx->tags = set->tags[hctx->queue_num]; |
1534 | return NOTIFY_OK; | ||
1036 | } | 1535 | } |
1037 | EXPORT_SYMBOL(blk_mq_init_commands); | ||
1038 | 1536 | ||
1039 | static void blk_mq_free_hw_commands(struct blk_mq_hw_ctx *hctx, | 1537 | static int blk_mq_hctx_notify(void *data, unsigned long action, |
1040 | void (*free)(void *, struct blk_mq_hw_ctx *, | 1538 | unsigned int cpu) |
1041 | struct request *, unsigned int), | ||
1042 | void *data) | ||
1043 | { | 1539 | { |
1044 | unsigned int i; | 1540 | struct blk_mq_hw_ctx *hctx = data; |
1045 | 1541 | ||
1046 | for (i = 0; i < hctx->queue_depth; i++) { | 1542 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) |
1047 | struct request *rq = hctx->rqs[i]; | 1543 | return blk_mq_hctx_cpu_offline(hctx, cpu); |
1544 | else if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | ||
1545 | return blk_mq_hctx_cpu_online(hctx, cpu); | ||
1048 | 1546 | ||
1049 | free(data, hctx, rq, i); | 1547 | return NOTIFY_OK; |
1050 | } | ||
1051 | } | 1548 | } |
1052 | 1549 | ||
1053 | void blk_mq_free_commands(struct request_queue *q, | 1550 | static void blk_mq_exit_hw_queues(struct request_queue *q, |
1054 | void (*free)(void *, struct blk_mq_hw_ctx *, | 1551 | struct blk_mq_tag_set *set, int nr_queue) |
1055 | struct request *, unsigned int), | ||
1056 | void *data) | ||
1057 | { | 1552 | { |
1058 | struct blk_mq_hw_ctx *hctx; | 1553 | struct blk_mq_hw_ctx *hctx; |
1059 | unsigned int i; | 1554 | unsigned int i; |
1060 | 1555 | ||
1061 | queue_for_each_hw_ctx(q, hctx, i) | 1556 | queue_for_each_hw_ctx(q, hctx, i) { |
1062 | blk_mq_free_hw_commands(hctx, free, data); | 1557 | if (i == nr_queue) |
1063 | } | 1558 | break; |
1064 | EXPORT_SYMBOL(blk_mq_free_commands); | ||
1065 | 1559 | ||
1066 | static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx) | 1560 | if (set->ops->exit_hctx) |
1067 | { | 1561 | set->ops->exit_hctx(hctx, i); |
1068 | struct page *page; | ||
1069 | 1562 | ||
1070 | while (!list_empty(&hctx->page_list)) { | 1563 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); |
1071 | page = list_first_entry(&hctx->page_list, struct page, lru); | 1564 | kfree(hctx->ctxs); |
1072 | list_del_init(&page->lru); | 1565 | blk_mq_free_bitmap(&hctx->ctx_map); |
1073 | __free_pages(page, page->private); | ||
1074 | } | 1566 | } |
1075 | 1567 | ||
1076 | kfree(hctx->rqs); | ||
1077 | |||
1078 | if (hctx->tags) | ||
1079 | blk_mq_free_tags(hctx->tags); | ||
1080 | } | ||
1081 | |||
1082 | static size_t order_to_size(unsigned int order) | ||
1083 | { | ||
1084 | size_t ret = PAGE_SIZE; | ||
1085 | |||
1086 | while (order--) | ||
1087 | ret *= 2; | ||
1088 | |||
1089 | return ret; | ||
1090 | } | 1568 | } |
1091 | 1569 | ||
1092 | static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, | 1570 | static void blk_mq_free_hw_queues(struct request_queue *q, |
1093 | unsigned int reserved_tags, int node) | 1571 | struct blk_mq_tag_set *set) |
1094 | { | 1572 | { |
1095 | unsigned int i, j, entries_per_page, max_order = 4; | 1573 | struct blk_mq_hw_ctx *hctx; |
1096 | size_t rq_size, left; | 1574 | unsigned int i; |
1097 | |||
1098 | INIT_LIST_HEAD(&hctx->page_list); | ||
1099 | |||
1100 | hctx->rqs = kmalloc_node(hctx->queue_depth * sizeof(struct request *), | ||
1101 | GFP_KERNEL, node); | ||
1102 | if (!hctx->rqs) | ||
1103 | return -ENOMEM; | ||
1104 | |||
1105 | /* | ||
1106 | * rq_size is the size of the request plus driver payload, rounded | ||
1107 | * to the cacheline size | ||
1108 | */ | ||
1109 | rq_size = round_up(sizeof(struct request) + hctx->cmd_size, | ||
1110 | cache_line_size()); | ||
1111 | left = rq_size * hctx->queue_depth; | ||
1112 | |||
1113 | for (i = 0; i < hctx->queue_depth;) { | ||
1114 | int this_order = max_order; | ||
1115 | struct page *page; | ||
1116 | int to_do; | ||
1117 | void *p; | ||
1118 | |||
1119 | while (left < order_to_size(this_order - 1) && this_order) | ||
1120 | this_order--; | ||
1121 | |||
1122 | do { | ||
1123 | page = alloc_pages_node(node, GFP_KERNEL, this_order); | ||
1124 | if (page) | ||
1125 | break; | ||
1126 | if (!this_order--) | ||
1127 | break; | ||
1128 | if (order_to_size(this_order) < rq_size) | ||
1129 | break; | ||
1130 | } while (1); | ||
1131 | |||
1132 | if (!page) | ||
1133 | break; | ||
1134 | |||
1135 | page->private = this_order; | ||
1136 | list_add_tail(&page->lru, &hctx->page_list); | ||
1137 | |||
1138 | p = page_address(page); | ||
1139 | entries_per_page = order_to_size(this_order) / rq_size; | ||
1140 | to_do = min(entries_per_page, hctx->queue_depth - i); | ||
1141 | left -= to_do * rq_size; | ||
1142 | for (j = 0; j < to_do; j++) { | ||
1143 | hctx->rqs[i] = p; | ||
1144 | blk_mq_rq_init(hctx, hctx->rqs[i]); | ||
1145 | p += rq_size; | ||
1146 | i++; | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | if (i < (reserved_tags + BLK_MQ_TAG_MIN)) | ||
1151 | goto err_rq_map; | ||
1152 | else if (i != hctx->queue_depth) { | ||
1153 | hctx->queue_depth = i; | ||
1154 | pr_warn("%s: queue depth set to %u because of low memory\n", | ||
1155 | __func__, i); | ||
1156 | } | ||
1157 | 1575 | ||
1158 | hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node); | 1576 | queue_for_each_hw_ctx(q, hctx, i) { |
1159 | if (!hctx->tags) { | 1577 | free_cpumask_var(hctx->cpumask); |
1160 | err_rq_map: | 1578 | kfree(hctx); |
1161 | blk_mq_free_rq_map(hctx); | ||
1162 | return -ENOMEM; | ||
1163 | } | 1579 | } |
1164 | |||
1165 | return 0; | ||
1166 | } | 1580 | } |
1167 | 1581 | ||
1168 | static int blk_mq_init_hw_queues(struct request_queue *q, | 1582 | static int blk_mq_init_hw_queues(struct request_queue *q, |
1169 | struct blk_mq_reg *reg, void *driver_data) | 1583 | struct blk_mq_tag_set *set) |
1170 | { | 1584 | { |
1171 | struct blk_mq_hw_ctx *hctx; | 1585 | struct blk_mq_hw_ctx *hctx; |
1172 | unsigned int i, j; | 1586 | unsigned int i; |
1173 | 1587 | ||
1174 | /* | 1588 | /* |
1175 | * Initialize hardware queues | 1589 | * Initialize hardware queues |
1176 | */ | 1590 | */ |
1177 | queue_for_each_hw_ctx(q, hctx, i) { | 1591 | queue_for_each_hw_ctx(q, hctx, i) { |
1178 | unsigned int num_maps; | ||
1179 | int node; | 1592 | int node; |
1180 | 1593 | ||
1181 | node = hctx->numa_node; | 1594 | node = hctx->numa_node; |
1182 | if (node == NUMA_NO_NODE) | 1595 | if (node == NUMA_NO_NODE) |
1183 | node = hctx->numa_node = reg->numa_node; | 1596 | node = hctx->numa_node = set->numa_node; |
1184 | 1597 | ||
1185 | INIT_DELAYED_WORK(&hctx->delayed_work, blk_mq_work_fn); | 1598 | INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); |
1599 | INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn); | ||
1186 | spin_lock_init(&hctx->lock); | 1600 | spin_lock_init(&hctx->lock); |
1187 | INIT_LIST_HEAD(&hctx->dispatch); | 1601 | INIT_LIST_HEAD(&hctx->dispatch); |
1188 | hctx->queue = q; | 1602 | hctx->queue = q; |
1189 | hctx->queue_num = i; | 1603 | hctx->queue_num = i; |
1190 | hctx->flags = reg->flags; | 1604 | hctx->flags = set->flags; |
1191 | hctx->queue_depth = reg->queue_depth; | 1605 | hctx->cmd_size = set->cmd_size; |
1192 | hctx->cmd_size = reg->cmd_size; | ||
1193 | 1606 | ||
1194 | blk_mq_init_cpu_notifier(&hctx->cpu_notifier, | 1607 | blk_mq_init_cpu_notifier(&hctx->cpu_notifier, |
1195 | blk_mq_hctx_notify, hctx); | 1608 | blk_mq_hctx_notify, hctx); |
1196 | blk_mq_register_cpu_notifier(&hctx->cpu_notifier); | 1609 | blk_mq_register_cpu_notifier(&hctx->cpu_notifier); |
1197 | 1610 | ||
1198 | if (blk_mq_init_rq_map(hctx, reg->reserved_tags, node)) | 1611 | hctx->tags = set->tags[i]; |
1199 | break; | ||
1200 | 1612 | ||
1201 | /* | 1613 | /* |
1202 | * Allocate space for all possible cpus to avoid allocation in | 1614 | * Allocate space for all possible cpus to avoid allocation in |
@@ -1207,17 +1619,13 @@ static int blk_mq_init_hw_queues(struct request_queue *q, | |||
1207 | if (!hctx->ctxs) | 1619 | if (!hctx->ctxs) |
1208 | break; | 1620 | break; |
1209 | 1621 | ||
1210 | num_maps = ALIGN(nr_cpu_ids, BITS_PER_LONG) / BITS_PER_LONG; | 1622 | if (blk_mq_alloc_bitmap(&hctx->ctx_map, node)) |
1211 | hctx->ctx_map = kzalloc_node(num_maps * sizeof(unsigned long), | ||
1212 | GFP_KERNEL, node); | ||
1213 | if (!hctx->ctx_map) | ||
1214 | break; | 1623 | break; |
1215 | 1624 | ||
1216 | hctx->nr_ctx_map = num_maps; | ||
1217 | hctx->nr_ctx = 0; | 1625 | hctx->nr_ctx = 0; |
1218 | 1626 | ||
1219 | if (reg->ops->init_hctx && | 1627 | if (set->ops->init_hctx && |
1220 | reg->ops->init_hctx(hctx, driver_data, i)) | 1628 | set->ops->init_hctx(hctx, set->driver_data, i)) |
1221 | break; | 1629 | break; |
1222 | } | 1630 | } |
1223 | 1631 | ||
@@ -1227,17 +1635,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q, | |||
1227 | /* | 1635 | /* |
1228 | * Init failed | 1636 | * Init failed |
1229 | */ | 1637 | */ |
1230 | queue_for_each_hw_ctx(q, hctx, j) { | 1638 | blk_mq_exit_hw_queues(q, set, i); |
1231 | if (i == j) | ||
1232 | break; | ||
1233 | |||
1234 | if (reg->ops->exit_hctx) | ||
1235 | reg->ops->exit_hctx(hctx, j); | ||
1236 | |||
1237 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); | ||
1238 | blk_mq_free_rq_map(hctx); | ||
1239 | kfree(hctx->ctxs); | ||
1240 | } | ||
1241 | 1639 | ||
1242 | return 1; | 1640 | return 1; |
1243 | } | 1641 | } |
@@ -1258,12 +1656,13 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||
1258 | __ctx->queue = q; | 1656 | __ctx->queue = q; |
1259 | 1657 | ||
1260 | /* If the cpu isn't online, the cpu is mapped to first hctx */ | 1658 | /* If the cpu isn't online, the cpu is mapped to first hctx */ |
1261 | hctx = q->mq_ops->map_queue(q, i); | ||
1262 | hctx->nr_ctx++; | ||
1263 | |||
1264 | if (!cpu_online(i)) | 1659 | if (!cpu_online(i)) |
1265 | continue; | 1660 | continue; |
1266 | 1661 | ||
1662 | hctx = q->mq_ops->map_queue(q, i); | ||
1663 | cpumask_set_cpu(i, hctx->cpumask); | ||
1664 | hctx->nr_ctx++; | ||
1665 | |||
1267 | /* | 1666 | /* |
1268 | * Set local node, IFF we have more than one hw queue. If | 1667 | * Set local node, IFF we have more than one hw queue. If |
1269 | * not, we remain on the home node of the device | 1668 | * not, we remain on the home node of the device |
@@ -1280,6 +1679,7 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1280 | struct blk_mq_ctx *ctx; | 1679 | struct blk_mq_ctx *ctx; |
1281 | 1680 | ||
1282 | queue_for_each_hw_ctx(q, hctx, i) { | 1681 | queue_for_each_hw_ctx(q, hctx, i) { |
1682 | cpumask_clear(hctx->cpumask); | ||
1283 | hctx->nr_ctx = 0; | 1683 | hctx->nr_ctx = 0; |
1284 | } | 1684 | } |
1285 | 1685 | ||
@@ -1288,115 +1688,205 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1288 | */ | 1688 | */ |
1289 | queue_for_each_ctx(q, ctx, i) { | 1689 | queue_for_each_ctx(q, ctx, i) { |
1290 | /* If the cpu isn't online, the cpu is mapped to first hctx */ | 1690 | /* If the cpu isn't online, the cpu is mapped to first hctx */ |
1691 | if (!cpu_online(i)) | ||
1692 | continue; | ||
1693 | |||
1291 | hctx = q->mq_ops->map_queue(q, i); | 1694 | hctx = q->mq_ops->map_queue(q, i); |
1695 | cpumask_set_cpu(i, hctx->cpumask); | ||
1292 | ctx->index_hw = hctx->nr_ctx; | 1696 | ctx->index_hw = hctx->nr_ctx; |
1293 | hctx->ctxs[hctx->nr_ctx++] = ctx; | 1697 | hctx->ctxs[hctx->nr_ctx++] = ctx; |
1294 | } | 1698 | } |
1699 | |||
1700 | queue_for_each_hw_ctx(q, hctx, i) { | ||
1701 | /* | ||
1702 | * If not software queues are mapped to this hardware queue, | ||
1703 | * disable it and free the request entries | ||
1704 | */ | ||
1705 | if (!hctx->nr_ctx) { | ||
1706 | struct blk_mq_tag_set *set = q->tag_set; | ||
1707 | |||
1708 | if (set->tags[i]) { | ||
1709 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
1710 | set->tags[i] = NULL; | ||
1711 | hctx->tags = NULL; | ||
1712 | } | ||
1713 | continue; | ||
1714 | } | ||
1715 | |||
1716 | /* | ||
1717 | * Initialize batch roundrobin counts | ||
1718 | */ | ||
1719 | hctx->next_cpu = cpumask_first(hctx->cpumask); | ||
1720 | hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; | ||
1721 | } | ||
1295 | } | 1722 | } |
1296 | 1723 | ||
1297 | struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | 1724 | static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) |
1298 | void *driver_data) | ||
1299 | { | 1725 | { |
1300 | struct blk_mq_hw_ctx **hctxs; | 1726 | struct blk_mq_hw_ctx *hctx; |
1301 | struct blk_mq_ctx *ctx; | ||
1302 | struct request_queue *q; | 1727 | struct request_queue *q; |
1728 | bool shared; | ||
1303 | int i; | 1729 | int i; |
1304 | 1730 | ||
1305 | if (!reg->nr_hw_queues || | 1731 | if (set->tag_list.next == set->tag_list.prev) |
1306 | !reg->ops->queue_rq || !reg->ops->map_queue || | 1732 | shared = false; |
1307 | !reg->ops->alloc_hctx || !reg->ops->free_hctx) | 1733 | else |
1308 | return ERR_PTR(-EINVAL); | 1734 | shared = true; |
1309 | 1735 | ||
1310 | if (!reg->queue_depth) | 1736 | list_for_each_entry(q, &set->tag_list, tag_set_list) { |
1311 | reg->queue_depth = BLK_MQ_MAX_DEPTH; | 1737 | blk_mq_freeze_queue(q); |
1312 | else if (reg->queue_depth > BLK_MQ_MAX_DEPTH) { | 1738 | |
1313 | pr_err("blk-mq: queuedepth too large (%u)\n", reg->queue_depth); | 1739 | queue_for_each_hw_ctx(q, hctx, i) { |
1314 | reg->queue_depth = BLK_MQ_MAX_DEPTH; | 1740 | if (shared) |
1741 | hctx->flags |= BLK_MQ_F_TAG_SHARED; | ||
1742 | else | ||
1743 | hctx->flags &= ~BLK_MQ_F_TAG_SHARED; | ||
1744 | } | ||
1745 | blk_mq_unfreeze_queue(q); | ||
1315 | } | 1746 | } |
1747 | } | ||
1748 | |||
1749 | static void blk_mq_del_queue_tag_set(struct request_queue *q) | ||
1750 | { | ||
1751 | struct blk_mq_tag_set *set = q->tag_set; | ||
1752 | |||
1753 | blk_mq_freeze_queue(q); | ||
1754 | |||
1755 | mutex_lock(&set->tag_list_lock); | ||
1756 | list_del_init(&q->tag_set_list); | ||
1757 | blk_mq_update_tag_set_depth(set); | ||
1758 | mutex_unlock(&set->tag_list_lock); | ||
1759 | |||
1760 | blk_mq_unfreeze_queue(q); | ||
1761 | } | ||
1762 | |||
1763 | static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, | ||
1764 | struct request_queue *q) | ||
1765 | { | ||
1766 | q->tag_set = set; | ||
1767 | |||
1768 | mutex_lock(&set->tag_list_lock); | ||
1769 | list_add_tail(&q->tag_set_list, &set->tag_list); | ||
1770 | blk_mq_update_tag_set_depth(set); | ||
1771 | mutex_unlock(&set->tag_list_lock); | ||
1772 | } | ||
1316 | 1773 | ||
1317 | if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) | 1774 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) |
1318 | return ERR_PTR(-EINVAL); | 1775 | { |
1776 | struct blk_mq_hw_ctx **hctxs; | ||
1777 | struct blk_mq_ctx *ctx; | ||
1778 | struct request_queue *q; | ||
1779 | unsigned int *map; | ||
1780 | int i; | ||
1319 | 1781 | ||
1320 | ctx = alloc_percpu(struct blk_mq_ctx); | 1782 | ctx = alloc_percpu(struct blk_mq_ctx); |
1321 | if (!ctx) | 1783 | if (!ctx) |
1322 | return ERR_PTR(-ENOMEM); | 1784 | return ERR_PTR(-ENOMEM); |
1323 | 1785 | ||
1324 | hctxs = kmalloc_node(reg->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, | 1786 | hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, |
1325 | reg->numa_node); | 1787 | set->numa_node); |
1326 | 1788 | ||
1327 | if (!hctxs) | 1789 | if (!hctxs) |
1328 | goto err_percpu; | 1790 | goto err_percpu; |
1329 | 1791 | ||
1330 | for (i = 0; i < reg->nr_hw_queues; i++) { | 1792 | map = blk_mq_make_queue_map(set); |
1331 | hctxs[i] = reg->ops->alloc_hctx(reg, i); | 1793 | if (!map) |
1794 | goto err_map; | ||
1795 | |||
1796 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
1797 | int node = blk_mq_hw_queue_to_node(map, i); | ||
1798 | |||
1799 | hctxs[i] = kzalloc_node(sizeof(struct blk_mq_hw_ctx), | ||
1800 | GFP_KERNEL, node); | ||
1332 | if (!hctxs[i]) | 1801 | if (!hctxs[i]) |
1333 | goto err_hctxs; | 1802 | goto err_hctxs; |
1334 | 1803 | ||
1335 | hctxs[i]->numa_node = NUMA_NO_NODE; | 1804 | if (!zalloc_cpumask_var(&hctxs[i]->cpumask, GFP_KERNEL)) |
1805 | goto err_hctxs; | ||
1806 | |||
1807 | atomic_set(&hctxs[i]->nr_active, 0); | ||
1808 | hctxs[i]->numa_node = node; | ||
1336 | hctxs[i]->queue_num = i; | 1809 | hctxs[i]->queue_num = i; |
1337 | } | 1810 | } |
1338 | 1811 | ||
1339 | q = blk_alloc_queue_node(GFP_KERNEL, reg->numa_node); | 1812 | q = blk_alloc_queue_node(GFP_KERNEL, set->numa_node); |
1340 | if (!q) | 1813 | if (!q) |
1341 | goto err_hctxs; | 1814 | goto err_hctxs; |
1342 | 1815 | ||
1343 | q->mq_map = blk_mq_make_queue_map(reg); | 1816 | if (percpu_counter_init(&q->mq_usage_counter, 0)) |
1344 | if (!q->mq_map) | ||
1345 | goto err_map; | 1817 | goto err_map; |
1346 | 1818 | ||
1347 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); | 1819 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); |
1348 | blk_queue_rq_timeout(q, 30000); | 1820 | blk_queue_rq_timeout(q, 30000); |
1349 | 1821 | ||
1350 | q->nr_queues = nr_cpu_ids; | 1822 | q->nr_queues = nr_cpu_ids; |
1351 | q->nr_hw_queues = reg->nr_hw_queues; | 1823 | q->nr_hw_queues = set->nr_hw_queues; |
1824 | q->mq_map = map; | ||
1352 | 1825 | ||
1353 | q->queue_ctx = ctx; | 1826 | q->queue_ctx = ctx; |
1354 | q->queue_hw_ctx = hctxs; | 1827 | q->queue_hw_ctx = hctxs; |
1355 | 1828 | ||
1356 | q->mq_ops = reg->ops; | 1829 | q->mq_ops = set->ops; |
1357 | q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; | 1830 | q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; |
1358 | 1831 | ||
1359 | q->sg_reserved_size = INT_MAX; | 1832 | q->sg_reserved_size = INT_MAX; |
1360 | 1833 | ||
1361 | blk_queue_make_request(q, blk_mq_make_request); | 1834 | INIT_WORK(&q->requeue_work, blk_mq_requeue_work); |
1362 | blk_queue_rq_timed_out(q, reg->ops->timeout); | 1835 | INIT_LIST_HEAD(&q->requeue_list); |
1363 | if (reg->timeout) | 1836 | spin_lock_init(&q->requeue_lock); |
1364 | blk_queue_rq_timeout(q, reg->timeout); | 1837 | |
1838 | if (q->nr_hw_queues > 1) | ||
1839 | blk_queue_make_request(q, blk_mq_make_request); | ||
1840 | else | ||
1841 | blk_queue_make_request(q, blk_sq_make_request); | ||
1842 | |||
1843 | blk_queue_rq_timed_out(q, blk_mq_rq_timed_out); | ||
1844 | if (set->timeout) | ||
1845 | blk_queue_rq_timeout(q, set->timeout); | ||
1365 | 1846 | ||
1366 | if (reg->ops->complete) | 1847 | /* |
1367 | blk_queue_softirq_done(q, reg->ops->complete); | 1848 | * Do this after blk_queue_make_request() overrides it... |
1849 | */ | ||
1850 | q->nr_requests = set->queue_depth; | ||
1851 | |||
1852 | if (set->ops->complete) | ||
1853 | blk_queue_softirq_done(q, set->ops->complete); | ||
1368 | 1854 | ||
1369 | blk_mq_init_flush(q); | 1855 | blk_mq_init_flush(q); |
1370 | blk_mq_init_cpu_queues(q, reg->nr_hw_queues); | 1856 | blk_mq_init_cpu_queues(q, set->nr_hw_queues); |
1371 | 1857 | ||
1372 | q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size, | 1858 | q->flush_rq = kzalloc(round_up(sizeof(struct request) + |
1373 | cache_line_size()), GFP_KERNEL); | 1859 | set->cmd_size, cache_line_size()), |
1860 | GFP_KERNEL); | ||
1374 | if (!q->flush_rq) | 1861 | if (!q->flush_rq) |
1375 | goto err_hw; | 1862 | goto err_hw; |
1376 | 1863 | ||
1377 | if (blk_mq_init_hw_queues(q, reg, driver_data)) | 1864 | if (blk_mq_init_hw_queues(q, set)) |
1378 | goto err_flush_rq; | 1865 | goto err_flush_rq; |
1379 | 1866 | ||
1380 | blk_mq_map_swqueue(q); | ||
1381 | |||
1382 | mutex_lock(&all_q_mutex); | 1867 | mutex_lock(&all_q_mutex); |
1383 | list_add_tail(&q->all_q_node, &all_q_list); | 1868 | list_add_tail(&q->all_q_node, &all_q_list); |
1384 | mutex_unlock(&all_q_mutex); | 1869 | mutex_unlock(&all_q_mutex); |
1385 | 1870 | ||
1871 | blk_mq_add_queue_tag_set(set, q); | ||
1872 | |||
1873 | blk_mq_map_swqueue(q); | ||
1874 | |||
1386 | return q; | 1875 | return q; |
1387 | 1876 | ||
1388 | err_flush_rq: | 1877 | err_flush_rq: |
1389 | kfree(q->flush_rq); | 1878 | kfree(q->flush_rq); |
1390 | err_hw: | 1879 | err_hw: |
1391 | kfree(q->mq_map); | ||
1392 | err_map: | ||
1393 | blk_cleanup_queue(q); | 1880 | blk_cleanup_queue(q); |
1394 | err_hctxs: | 1881 | err_hctxs: |
1395 | for (i = 0; i < reg->nr_hw_queues; i++) { | 1882 | kfree(map); |
1883 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
1396 | if (!hctxs[i]) | 1884 | if (!hctxs[i]) |
1397 | break; | 1885 | break; |
1398 | reg->ops->free_hctx(hctxs[i], i); | 1886 | free_cpumask_var(hctxs[i]->cpumask); |
1887 | kfree(hctxs[i]); | ||
1399 | } | 1888 | } |
1889 | err_map: | ||
1400 | kfree(hctxs); | 1890 | kfree(hctxs); |
1401 | err_percpu: | 1891 | err_percpu: |
1402 | free_percpu(ctx); | 1892 | free_percpu(ctx); |
@@ -1406,18 +1896,14 @@ EXPORT_SYMBOL(blk_mq_init_queue); | |||
1406 | 1896 | ||
1407 | void blk_mq_free_queue(struct request_queue *q) | 1897 | void blk_mq_free_queue(struct request_queue *q) |
1408 | { | 1898 | { |
1409 | struct blk_mq_hw_ctx *hctx; | 1899 | struct blk_mq_tag_set *set = q->tag_set; |
1410 | int i; | ||
1411 | 1900 | ||
1412 | queue_for_each_hw_ctx(q, hctx, i) { | 1901 | blk_mq_del_queue_tag_set(q); |
1413 | kfree(hctx->ctx_map); | 1902 | |
1414 | kfree(hctx->ctxs); | 1903 | blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); |
1415 | blk_mq_free_rq_map(hctx); | 1904 | blk_mq_free_hw_queues(q, set); |
1416 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); | 1905 | |
1417 | if (q->mq_ops->exit_hctx) | 1906 | percpu_counter_destroy(&q->mq_usage_counter); |
1418 | q->mq_ops->exit_hctx(hctx, i); | ||
1419 | q->mq_ops->free_hctx(hctx, i); | ||
1420 | } | ||
1421 | 1907 | ||
1422 | free_percpu(q->queue_ctx); | 1908 | free_percpu(q->queue_ctx); |
1423 | kfree(q->queue_hw_ctx); | 1909 | kfree(q->queue_hw_ctx); |
@@ -1456,10 +1942,10 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
1456 | struct request_queue *q; | 1942 | struct request_queue *q; |
1457 | 1943 | ||
1458 | /* | 1944 | /* |
1459 | * Before new mapping is established, hotadded cpu might already start | 1945 | * Before new mappings are established, hotadded cpu might already |
1460 | * handling requests. This doesn't break anything as we map offline | 1946 | * start handling requests. This doesn't break anything as we map |
1461 | * CPUs to first hardware queue. We will re-init queue below to get | 1947 | * offline CPUs to first hardware queue. We will re-init the queue |
1462 | * optimal settings. | 1948 | * below to get optimal settings. |
1463 | */ | 1949 | */ |
1464 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && | 1950 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && |
1465 | action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) | 1951 | action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) |
@@ -1472,6 +1958,81 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
1472 | return NOTIFY_OK; | 1958 | return NOTIFY_OK; |
1473 | } | 1959 | } |
1474 | 1960 | ||
1961 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | ||
1962 | { | ||
1963 | int i; | ||
1964 | |||
1965 | if (!set->nr_hw_queues) | ||
1966 | return -EINVAL; | ||
1967 | if (!set->queue_depth || set->queue_depth > BLK_MQ_MAX_DEPTH) | ||
1968 | return -EINVAL; | ||
1969 | if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) | ||
1970 | return -EINVAL; | ||
1971 | |||
1972 | if (!set->nr_hw_queues || !set->ops->queue_rq || !set->ops->map_queue) | ||
1973 | return -EINVAL; | ||
1974 | |||
1975 | |||
1976 | set->tags = kmalloc_node(set->nr_hw_queues * | ||
1977 | sizeof(struct blk_mq_tags *), | ||
1978 | GFP_KERNEL, set->numa_node); | ||
1979 | if (!set->tags) | ||
1980 | goto out; | ||
1981 | |||
1982 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
1983 | set->tags[i] = blk_mq_init_rq_map(set, i); | ||
1984 | if (!set->tags[i]) | ||
1985 | goto out_unwind; | ||
1986 | } | ||
1987 | |||
1988 | mutex_init(&set->tag_list_lock); | ||
1989 | INIT_LIST_HEAD(&set->tag_list); | ||
1990 | |||
1991 | return 0; | ||
1992 | |||
1993 | out_unwind: | ||
1994 | while (--i >= 0) | ||
1995 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
1996 | out: | ||
1997 | return -ENOMEM; | ||
1998 | } | ||
1999 | EXPORT_SYMBOL(blk_mq_alloc_tag_set); | ||
2000 | |||
2001 | void blk_mq_free_tag_set(struct blk_mq_tag_set *set) | ||
2002 | { | ||
2003 | int i; | ||
2004 | |||
2005 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
2006 | if (set->tags[i]) | ||
2007 | blk_mq_free_rq_map(set, set->tags[i], i); | ||
2008 | } | ||
2009 | |||
2010 | kfree(set->tags); | ||
2011 | } | ||
2012 | EXPORT_SYMBOL(blk_mq_free_tag_set); | ||
2013 | |||
2014 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr) | ||
2015 | { | ||
2016 | struct blk_mq_tag_set *set = q->tag_set; | ||
2017 | struct blk_mq_hw_ctx *hctx; | ||
2018 | int i, ret; | ||
2019 | |||
2020 | if (!set || nr > set->queue_depth) | ||
2021 | return -EINVAL; | ||
2022 | |||
2023 | ret = 0; | ||
2024 | queue_for_each_hw_ctx(q, hctx, i) { | ||
2025 | ret = blk_mq_tag_update_depth(hctx->tags, nr); | ||
2026 | if (ret) | ||
2027 | break; | ||
2028 | } | ||
2029 | |||
2030 | if (!ret) | ||
2031 | q->nr_requests = nr; | ||
2032 | |||
2033 | return ret; | ||
2034 | } | ||
2035 | |||
1475 | void blk_mq_disable_hotplug(void) | 2036 | void blk_mq_disable_hotplug(void) |
1476 | { | 2037 | { |
1477 | mutex_lock(&all_q_mutex); | 2038 | mutex_lock(&all_q_mutex); |
diff --git a/block/blk-mq.h b/block/blk-mq.h index ebbe6bac9d61..ff5e6bf0f691 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef INT_BLK_MQ_H | 1 | #ifndef INT_BLK_MQ_H |
2 | #define INT_BLK_MQ_H | 2 | #define INT_BLK_MQ_H |
3 | 3 | ||
4 | struct blk_mq_tag_set; | ||
5 | |||
4 | struct blk_mq_ctx { | 6 | struct blk_mq_ctx { |
5 | struct { | 7 | struct { |
6 | spinlock_t lock; | 8 | spinlock_t lock; |
@@ -9,7 +11,8 @@ struct blk_mq_ctx { | |||
9 | 11 | ||
10 | unsigned int cpu; | 12 | unsigned int cpu; |
11 | unsigned int index_hw; | 13 | unsigned int index_hw; |
12 | unsigned int ipi_redirect; | 14 | |
15 | unsigned int last_tag ____cacheline_aligned_in_smp; | ||
13 | 16 | ||
14 | /* incremented at dispatch time */ | 17 | /* incremented at dispatch time */ |
15 | unsigned long rq_dispatched[2]; | 18 | unsigned long rq_dispatched[2]; |
@@ -20,21 +23,23 @@ struct blk_mq_ctx { | |||
20 | 23 | ||
21 | struct request_queue *queue; | 24 | struct request_queue *queue; |
22 | struct kobject kobj; | 25 | struct kobject kobj; |
23 | }; | 26 | } ____cacheline_aligned_in_smp; |
24 | 27 | ||
25 | void __blk_mq_complete_request(struct request *rq); | 28 | void __blk_mq_complete_request(struct request *rq); |
26 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); | 29 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
27 | void blk_mq_init_flush(struct request_queue *q); | 30 | void blk_mq_init_flush(struct request_queue *q); |
28 | void blk_mq_drain_queue(struct request_queue *q); | 31 | void blk_mq_drain_queue(struct request_queue *q); |
29 | void blk_mq_free_queue(struct request_queue *q); | 32 | void blk_mq_free_queue(struct request_queue *q); |
30 | void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq); | 33 | void blk_mq_clone_flush_request(struct request *flush_rq, |
34 | struct request *orig_rq); | ||
35 | int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); | ||
31 | 36 | ||
32 | /* | 37 | /* |
33 | * CPU hotplug helpers | 38 | * CPU hotplug helpers |
34 | */ | 39 | */ |
35 | struct blk_mq_cpu_notifier; | 40 | struct blk_mq_cpu_notifier; |
36 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | 41 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, |
37 | void (*fn)(void *, unsigned long, unsigned int), | 42 | int (*fn)(void *, unsigned long, unsigned int), |
38 | void *data); | 43 | void *data); |
39 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); | 44 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); |
40 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); | 45 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); |
@@ -45,10 +50,17 @@ void blk_mq_disable_hotplug(void); | |||
45 | /* | 50 | /* |
46 | * CPU -> queue mappings | 51 | * CPU -> queue mappings |
47 | */ | 52 | */ |
48 | struct blk_mq_reg; | 53 | extern unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set); |
49 | extern unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg); | ||
50 | extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); | 54 | extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); |
55 | extern int blk_mq_hw_queue_to_node(unsigned int *map, unsigned int); | ||
51 | 56 | ||
52 | void blk_mq_add_timer(struct request *rq); | 57 | /* |
58 | * Basic implementation of sparser bitmap, allowing the user to spread | ||
59 | * the bits over more cachelines. | ||
60 | */ | ||
61 | struct blk_align_bitmap { | ||
62 | unsigned long word; | ||
63 | unsigned long depth; | ||
64 | } ____cacheline_aligned_in_smp; | ||
53 | 65 | ||
54 | #endif | 66 | #endif |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 7500f876dae4..23321fbab293 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -48,11 +48,10 @@ static ssize_t queue_requests_show(struct request_queue *q, char *page) | |||
48 | static ssize_t | 48 | static ssize_t |
49 | queue_requests_store(struct request_queue *q, const char *page, size_t count) | 49 | queue_requests_store(struct request_queue *q, const char *page, size_t count) |
50 | { | 50 | { |
51 | struct request_list *rl; | ||
52 | unsigned long nr; | 51 | unsigned long nr; |
53 | int ret; | 52 | int ret, err; |
54 | 53 | ||
55 | if (!q->request_fn) | 54 | if (!q->request_fn && !q->mq_ops) |
56 | return -EINVAL; | 55 | return -EINVAL; |
57 | 56 | ||
58 | ret = queue_var_store(&nr, page, count); | 57 | ret = queue_var_store(&nr, page, count); |
@@ -62,40 +61,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
62 | if (nr < BLKDEV_MIN_RQ) | 61 | if (nr < BLKDEV_MIN_RQ) |
63 | nr = BLKDEV_MIN_RQ; | 62 | nr = BLKDEV_MIN_RQ; |
64 | 63 | ||
65 | spin_lock_irq(q->queue_lock); | 64 | if (q->request_fn) |
66 | q->nr_requests = nr; | 65 | err = blk_update_nr_requests(q, nr); |
67 | blk_queue_congestion_threshold(q); | 66 | else |
68 | 67 | err = blk_mq_update_nr_requests(q, nr); | |
69 | /* congestion isn't cgroup aware and follows root blkcg for now */ | 68 | |
70 | rl = &q->root_rl; | 69 | if (err) |
71 | 70 | return err; | |
72 | if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) | ||
73 | blk_set_queue_congested(q, BLK_RW_SYNC); | ||
74 | else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) | ||
75 | blk_clear_queue_congested(q, BLK_RW_SYNC); | ||
76 | |||
77 | if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) | ||
78 | blk_set_queue_congested(q, BLK_RW_ASYNC); | ||
79 | else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) | ||
80 | blk_clear_queue_congested(q, BLK_RW_ASYNC); | ||
81 | |||
82 | blk_queue_for_each_rl(rl, q) { | ||
83 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | ||
84 | blk_set_rl_full(rl, BLK_RW_SYNC); | ||
85 | } else { | ||
86 | blk_clear_rl_full(rl, BLK_RW_SYNC); | ||
87 | wake_up(&rl->wait[BLK_RW_SYNC]); | ||
88 | } | ||
89 | |||
90 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | ||
91 | blk_set_rl_full(rl, BLK_RW_ASYNC); | ||
92 | } else { | ||
93 | blk_clear_rl_full(rl, BLK_RW_ASYNC); | ||
94 | wake_up(&rl->wait[BLK_RW_ASYNC]); | ||
95 | } | ||
96 | } | ||
97 | 71 | ||
98 | spin_unlock_irq(q->queue_lock); | ||
99 | return ret; | 72 | return ret; |
100 | } | 73 | } |
101 | 74 | ||
@@ -544,8 +517,6 @@ static void blk_release_queue(struct kobject *kobj) | |||
544 | if (q->queue_tags) | 517 | if (q->queue_tags) |
545 | __blk_queue_free_tags(q); | 518 | __blk_queue_free_tags(q); |
546 | 519 | ||
547 | percpu_counter_destroy(&q->mq_usage_counter); | ||
548 | |||
549 | if (q->mq_ops) | 520 | if (q->mq_ops) |
550 | blk_mq_free_queue(q); | 521 | blk_mq_free_queue(q); |
551 | 522 | ||
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 033745cd7fba..9353b4683359 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -744,7 +744,7 @@ static inline void throtl_extend_slice(struct throtl_grp *tg, bool rw, | |||
744 | static bool throtl_slice_used(struct throtl_grp *tg, bool rw) | 744 | static bool throtl_slice_used(struct throtl_grp *tg, bool rw) |
745 | { | 745 | { |
746 | if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) | 746 | if (time_in_range(jiffies, tg->slice_start[rw], tg->slice_end[rw])) |
747 | return 0; | 747 | return false; |
748 | 748 | ||
749 | return 1; | 749 | return 1; |
750 | } | 750 | } |
@@ -842,7 +842,7 @@ static bool tg_with_in_iops_limit(struct throtl_grp *tg, struct bio *bio, | |||
842 | if (tg->io_disp[rw] + 1 <= io_allowed) { | 842 | if (tg->io_disp[rw] + 1 <= io_allowed) { |
843 | if (wait) | 843 | if (wait) |
844 | *wait = 0; | 844 | *wait = 0; |
845 | return 1; | 845 | return true; |
846 | } | 846 | } |
847 | 847 | ||
848 | /* Calc approx time to dispatch */ | 848 | /* Calc approx time to dispatch */ |
@@ -880,7 +880,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio, | |||
880 | if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) { | 880 | if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) { |
881 | if (wait) | 881 | if (wait) |
882 | *wait = 0; | 882 | *wait = 0; |
883 | return 1; | 883 | return true; |
884 | } | 884 | } |
885 | 885 | ||
886 | /* Calc approx time to dispatch */ | 886 | /* Calc approx time to dispatch */ |
@@ -923,7 +923,7 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio, | |||
923 | if (tg->bps[rw] == -1 && tg->iops[rw] == -1) { | 923 | if (tg->bps[rw] == -1 && tg->iops[rw] == -1) { |
924 | if (wait) | 924 | if (wait) |
925 | *wait = 0; | 925 | *wait = 0; |
926 | return 1; | 926 | return true; |
927 | } | 927 | } |
928 | 928 | ||
929 | /* | 929 | /* |
@@ -1258,7 +1258,7 @@ out_unlock: | |||
1258 | * of throtl_data->service_queue. Those bio's are ready and issued by this | 1258 | * of throtl_data->service_queue. Those bio's are ready and issued by this |
1259 | * function. | 1259 | * function. |
1260 | */ | 1260 | */ |
1261 | void blk_throtl_dispatch_work_fn(struct work_struct *work) | 1261 | static void blk_throtl_dispatch_work_fn(struct work_struct *work) |
1262 | { | 1262 | { |
1263 | struct throtl_data *td = container_of(work, struct throtl_data, | 1263 | struct throtl_data *td = container_of(work, struct throtl_data, |
1264 | dispatch_work); | 1264 | dispatch_work); |
diff --git a/block/blk-timeout.c b/block/blk-timeout.c index d96f7061c6fd..43e8b515806f 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c | |||
@@ -96,11 +96,7 @@ static void blk_rq_timed_out(struct request *req) | |||
96 | __blk_complete_request(req); | 96 | __blk_complete_request(req); |
97 | break; | 97 | break; |
98 | case BLK_EH_RESET_TIMER: | 98 | case BLK_EH_RESET_TIMER: |
99 | if (q->mq_ops) | 99 | blk_add_timer(req); |
100 | blk_mq_add_timer(req); | ||
101 | else | ||
102 | blk_add_timer(req); | ||
103 | |||
104 | blk_clear_rq_complete(req); | 100 | blk_clear_rq_complete(req); |
105 | break; | 101 | break; |
106 | case BLK_EH_NOT_HANDLED: | 102 | case BLK_EH_NOT_HANDLED: |
@@ -170,7 +166,26 @@ void blk_abort_request(struct request *req) | |||
170 | } | 166 | } |
171 | EXPORT_SYMBOL_GPL(blk_abort_request); | 167 | EXPORT_SYMBOL_GPL(blk_abort_request); |
172 | 168 | ||
173 | void __blk_add_timer(struct request *req, struct list_head *timeout_list) | 169 | unsigned long blk_rq_timeout(unsigned long timeout) |
170 | { | ||
171 | unsigned long maxt; | ||
172 | |||
173 | maxt = round_jiffies_up(jiffies + BLK_MAX_TIMEOUT); | ||
174 | if (time_after(timeout, maxt)) | ||
175 | timeout = maxt; | ||
176 | |||
177 | return timeout; | ||
178 | } | ||
179 | |||
180 | /** | ||
181 | * blk_add_timer - Start timeout timer for a single request | ||
182 | * @req: request that is about to start running. | ||
183 | * | ||
184 | * Notes: | ||
185 | * Each request has its own timer, and as it is added to the queue, we | ||
186 | * set up the timer. When the request completes, we cancel the timer. | ||
187 | */ | ||
188 | void blk_add_timer(struct request *req) | ||
174 | { | 189 | { |
175 | struct request_queue *q = req->q; | 190 | struct request_queue *q = req->q; |
176 | unsigned long expiry; | 191 | unsigned long expiry; |
@@ -188,32 +203,29 @@ void __blk_add_timer(struct request *req, struct list_head *timeout_list) | |||
188 | req->timeout = q->rq_timeout; | 203 | req->timeout = q->rq_timeout; |
189 | 204 | ||
190 | req->deadline = jiffies + req->timeout; | 205 | req->deadline = jiffies + req->timeout; |
191 | if (timeout_list) | 206 | if (!q->mq_ops) |
192 | list_add_tail(&req->timeout_list, timeout_list); | 207 | list_add_tail(&req->timeout_list, &req->q->timeout_list); |
193 | 208 | ||
194 | /* | 209 | /* |
195 | * If the timer isn't already pending or this timeout is earlier | 210 | * If the timer isn't already pending or this timeout is earlier |
196 | * than an existing one, modify the timer. Round up to next nearest | 211 | * than an existing one, modify the timer. Round up to next nearest |
197 | * second. | 212 | * second. |
198 | */ | 213 | */ |
199 | expiry = round_jiffies_up(req->deadline); | 214 | expiry = blk_rq_timeout(round_jiffies_up(req->deadline)); |
200 | 215 | ||
201 | if (!timer_pending(&q->timeout) || | 216 | if (!timer_pending(&q->timeout) || |
202 | time_before(expiry, q->timeout.expires)) | 217 | time_before(expiry, q->timeout.expires)) { |
203 | mod_timer(&q->timeout, expiry); | 218 | unsigned long diff = q->timeout.expires - expiry; |
204 | 219 | ||
205 | } | 220 | /* |
221 | * Due to added timer slack to group timers, the timer | ||
222 | * will often be a little in front of what we asked for. | ||
223 | * So apply some tolerance here too, otherwise we keep | ||
224 | * modifying the timer because expires for value X | ||
225 | * will be X + something. | ||
226 | */ | ||
227 | if (diff >= HZ / 2) | ||
228 | mod_timer(&q->timeout, expiry); | ||
229 | } | ||
206 | 230 | ||
207 | /** | ||
208 | * blk_add_timer - Start timeout timer for a single request | ||
209 | * @req: request that is about to start running. | ||
210 | * | ||
211 | * Notes: | ||
212 | * Each request has its own timer, and as it is added to the queue, we | ||
213 | * set up the timer. When the request completes, we cancel the timer. | ||
214 | */ | ||
215 | void blk_add_timer(struct request *req) | ||
216 | { | ||
217 | __blk_add_timer(req, &req->q->timeout_list); | ||
218 | } | 231 | } |
219 | |||
diff --git a/block/blk.h b/block/blk.h index 1d880f1f957f..45385e9abf6f 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -9,6 +9,9 @@ | |||
9 | /* Number of requests a "batching" process may submit */ | 9 | /* Number of requests a "batching" process may submit */ |
10 | #define BLK_BATCH_REQ 32 | 10 | #define BLK_BATCH_REQ 32 |
11 | 11 | ||
12 | /* Max future timer expiry for timeouts */ | ||
13 | #define BLK_MAX_TIMEOUT (5 * HZ) | ||
14 | |||
12 | extern struct kmem_cache *blk_requestq_cachep; | 15 | extern struct kmem_cache *blk_requestq_cachep; |
13 | extern struct kmem_cache *request_cachep; | 16 | extern struct kmem_cache *request_cachep; |
14 | extern struct kobj_type blk_queue_ktype; | 17 | extern struct kobj_type blk_queue_ktype; |
@@ -37,9 +40,9 @@ bool __blk_end_bidi_request(struct request *rq, int error, | |||
37 | void blk_rq_timed_out_timer(unsigned long data); | 40 | void blk_rq_timed_out_timer(unsigned long data); |
38 | void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, | 41 | void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, |
39 | unsigned int *next_set); | 42 | unsigned int *next_set); |
40 | void __blk_add_timer(struct request *req, struct list_head *timeout_list); | 43 | unsigned long blk_rq_timeout(unsigned long timeout); |
44 | void blk_add_timer(struct request *req); | ||
41 | void blk_delete_timer(struct request *); | 45 | void blk_delete_timer(struct request *); |
42 | void blk_add_timer(struct request *); | ||
43 | 46 | ||
44 | 47 | ||
45 | bool bio_attempt_front_merge(struct request_queue *q, struct request *req, | 48 | bool bio_attempt_front_merge(struct request_queue *q, struct request *req, |
@@ -185,6 +188,8 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) | |||
185 | return q->nr_congestion_off; | 188 | return q->nr_congestion_off; |
186 | } | 189 | } |
187 | 190 | ||
191 | extern int blk_update_nr_requests(struct request_queue *, unsigned int); | ||
192 | |||
188 | /* | 193 | /* |
189 | * Contribute to IO statistics IFF: | 194 | * Contribute to IO statistics IFF: |
190 | * | 195 | * |
diff --git a/mm/bounce.c b/block/bounce.c index 523918b8c6dc..523918b8c6dc 100644 --- a/mm/bounce.c +++ b/block/bounce.c | |||
diff --git a/block/bsg.c b/block/bsg.c index 420a5a9f1b23..e5214c148096 100644 --- a/block/bsg.c +++ b/block/bsg.c | |||
@@ -1008,7 +1008,7 @@ int bsg_register_queue(struct request_queue *q, struct device *parent, | |||
1008 | /* | 1008 | /* |
1009 | * we need a proper transport to send commands, not a stacked device | 1009 | * we need a proper transport to send commands, not a stacked device |
1010 | */ | 1010 | */ |
1011 | if (!q->request_fn) | 1011 | if (!queue_is_rq_based(q)) |
1012 | return 0; | 1012 | return 0; |
1013 | 1013 | ||
1014 | bcd = &q->bsg_dev; | 1014 | bcd = &q->bsg_dev; |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e0985f1955e7..22dffebc7c73 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -908,7 +908,7 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) | |||
908 | { | 908 | { |
909 | if (cfqd->busy_queues) { | 909 | if (cfqd->busy_queues) { |
910 | cfq_log(cfqd, "schedule dispatch"); | 910 | cfq_log(cfqd, "schedule dispatch"); |
911 | kblockd_schedule_work(cfqd->queue, &cfqd->unplug_work); | 911 | kblockd_schedule_work(&cfqd->unplug_work); |
912 | } | 912 | } |
913 | } | 913 | } |
914 | 914 | ||
@@ -4460,7 +4460,7 @@ out_free: | |||
4460 | static ssize_t | 4460 | static ssize_t |
4461 | cfq_var_show(unsigned int var, char *page) | 4461 | cfq_var_show(unsigned int var, char *page) |
4462 | { | 4462 | { |
4463 | return sprintf(page, "%d\n", var); | 4463 | return sprintf(page, "%u\n", var); |
4464 | } | 4464 | } |
4465 | 4465 | ||
4466 | static ssize_t | 4466 | static ssize_t |
diff --git a/fs/ioprio.c b/block/ioprio.c index e50170ca7c33..e50170ca7c33 100644 --- a/fs/ioprio.c +++ b/block/ioprio.c | |||
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c index 748dea4f34dc..758da2287d9a 100644 --- a/drivers/block/amiflop.c +++ b/drivers/block/amiflop.c | |||
@@ -1406,7 +1406,7 @@ next_segment: | |||
1406 | 1406 | ||
1407 | track = block / (floppy->dtype->sects * floppy->type->sect_mult); | 1407 | track = block / (floppy->dtype->sects * floppy->type->sect_mult); |
1408 | sector = block % (floppy->dtype->sects * floppy->type->sect_mult); | 1408 | sector = block % (floppy->dtype->sects * floppy->type->sect_mult); |
1409 | data = rq->buffer + 512 * cnt; | 1409 | data = bio_data(rq->bio) + 512 * cnt; |
1410 | #ifdef DEBUG | 1410 | #ifdef DEBUG |
1411 | printk("access to track %d, sector %d, with buffer at " | 1411 | printk("access to track %d, sector %d, with buffer at " |
1412 | "0x%08lx\n", track, sector, data); | 1412 | "0x%08lx\n", track, sector, data); |
diff --git a/drivers/block/ataflop.c b/drivers/block/ataflop.c index 96b629e1f0c9..7e8a55f8917c 100644 --- a/drivers/block/ataflop.c +++ b/drivers/block/ataflop.c | |||
@@ -1484,7 +1484,7 @@ repeat: | |||
1484 | ReqCnt = 0; | 1484 | ReqCnt = 0; |
1485 | ReqCmd = rq_data_dir(fd_request); | 1485 | ReqCmd = rq_data_dir(fd_request); |
1486 | ReqBlock = blk_rq_pos(fd_request); | 1486 | ReqBlock = blk_rq_pos(fd_request); |
1487 | ReqBuffer = fd_request->buffer; | 1487 | ReqBuffer = bio_data(fd_request->bio); |
1488 | setup_req_params( drive ); | 1488 | setup_req_params( drive ); |
1489 | do_fd_action( drive ); | 1489 | do_fd_action( drive ); |
1490 | 1490 | ||
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c index 73894ca33956..4595c22f33f7 100644 --- a/drivers/block/cciss.c +++ b/drivers/block/cciss.c | |||
@@ -4080,7 +4080,7 @@ static void cciss_interrupt_mode(ctlr_info_t *h) | |||
4080 | goto default_int_mode; | 4080 | goto default_int_mode; |
4081 | 4081 | ||
4082 | if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { | 4082 | if (pci_find_capability(h->pdev, PCI_CAP_ID_MSIX)) { |
4083 | err = pci_enable_msix(h->pdev, cciss_msix_entries, 4); | 4083 | err = pci_enable_msix_exact(h->pdev, cciss_msix_entries, 4); |
4084 | if (!err) { | 4084 | if (!err) { |
4085 | h->intr[0] = cciss_msix_entries[0].vector; | 4085 | h->intr[0] = cciss_msix_entries[0].vector; |
4086 | h->intr[1] = cciss_msix_entries[1].vector; | 4086 | h->intr[1] = cciss_msix_entries[1].vector; |
@@ -4088,10 +4088,6 @@ static void cciss_interrupt_mode(ctlr_info_t *h) | |||
4088 | h->intr[3] = cciss_msix_entries[3].vector; | 4088 | h->intr[3] = cciss_msix_entries[3].vector; |
4089 | h->msix_vector = 1; | 4089 | h->msix_vector = 1; |
4090 | return; | 4090 | return; |
4091 | } | ||
4092 | if (err > 0) { | ||
4093 | dev_warn(&h->pdev->dev, | ||
4094 | "only %d MSI-X vectors available\n", err); | ||
4095 | } else { | 4091 | } else { |
4096 | dev_warn(&h->pdev->dev, | 4092 | dev_warn(&h->pdev->dev, |
4097 | "MSI-X init failed %d\n", err); | 4093 | "MSI-X init failed %d\n", err); |
diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c index 90ae4ba8f9ee..05a1780ffa85 100644 --- a/drivers/block/drbd/drbd_actlog.c +++ b/drivers/block/drbd/drbd_actlog.c | |||
@@ -29,7 +29,6 @@ | |||
29 | #include <linux/drbd_limits.h> | 29 | #include <linux/drbd_limits.h> |
30 | #include <linux/dynamic_debug.h> | 30 | #include <linux/dynamic_debug.h> |
31 | #include "drbd_int.h" | 31 | #include "drbd_int.h" |
32 | #include "drbd_wrappers.h" | ||
33 | 32 | ||
34 | 33 | ||
35 | enum al_transaction_types { | 34 | enum al_transaction_types { |
@@ -204,7 +203,7 @@ int drbd_md_sync_page_io(struct drbd_device *device, struct drbd_backing_dev *bd | |||
204 | 203 | ||
205 | BUG_ON(!bdev->md_bdev); | 204 | BUG_ON(!bdev->md_bdev); |
206 | 205 | ||
207 | drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", | 206 | dynamic_drbd_dbg(device, "meta_data io: %s [%d]:%s(,%llus,%s) %pS\n", |
208 | current->comm, current->pid, __func__, | 207 | current->comm, current->pid, __func__, |
209 | (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", | 208 | (unsigned long long)sector, (rw & WRITE) ? "WRITE" : "READ", |
210 | (void*)_RET_IP_ ); | 209 | (void*)_RET_IP_ ); |
@@ -276,7 +275,6 @@ bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval | |||
276 | return _al_get(device, first, true); | 275 | return _al_get(device, first, true); |
277 | } | 276 | } |
278 | 277 | ||
279 | static | ||
280 | bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) | 278 | bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i) |
281 | { | 279 | { |
282 | /* for bios crossing activity log extent boundaries, | 280 | /* for bios crossing activity log extent boundaries, |
@@ -846,7 +844,7 @@ void __drbd_set_in_sync(struct drbd_device *device, sector_t sector, int size, | |||
846 | int wake_up = 0; | 844 | int wake_up = 0; |
847 | unsigned long flags; | 845 | unsigned long flags; |
848 | 846 | ||
849 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { | 847 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { |
850 | drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", | 848 | drbd_err(device, "drbd_set_in_sync: sector=%llus size=%d nonsense!\n", |
851 | (unsigned long long)sector, size); | 849 | (unsigned long long)sector, size); |
852 | return; | 850 | return; |
@@ -920,7 +918,7 @@ int __drbd_set_out_of_sync(struct drbd_device *device, sector_t sector, int size | |||
920 | if (size == 0) | 918 | if (size == 0) |
921 | return 0; | 919 | return 0; |
922 | 920 | ||
923 | if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { | 921 | if (size < 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { |
924 | drbd_err(device, "sector: %llus, size: %d\n", | 922 | drbd_err(device, "sector: %llus, size: %d\n", |
925 | (unsigned long long)sector, size); | 923 | (unsigned long long)sector, size); |
926 | return 0; | 924 | return 0; |
@@ -1023,8 +1021,7 @@ int drbd_rs_begin_io(struct drbd_device *device, sector_t sector) | |||
1023 | unsigned int enr = BM_SECT_TO_EXT(sector); | 1021 | unsigned int enr = BM_SECT_TO_EXT(sector); |
1024 | struct bm_extent *bm_ext; | 1022 | struct bm_extent *bm_ext; |
1025 | int i, sig; | 1023 | int i, sig; |
1026 | int sa = 200; /* Step aside 200 times, then grab the extent and let app-IO wait. | 1024 | bool sa; |
1027 | 200 times -> 20 seconds. */ | ||
1028 | 1025 | ||
1029 | retry: | 1026 | retry: |
1030 | sig = wait_event_interruptible(device->al_wait, | 1027 | sig = wait_event_interruptible(device->al_wait, |
@@ -1035,12 +1032,15 @@ retry: | |||
1035 | if (test_bit(BME_LOCKED, &bm_ext->flags)) | 1032 | if (test_bit(BME_LOCKED, &bm_ext->flags)) |
1036 | return 0; | 1033 | return 0; |
1037 | 1034 | ||
1035 | /* step aside only while we are above c-min-rate; unless disabled. */ | ||
1036 | sa = drbd_rs_c_min_rate_throttle(device); | ||
1037 | |||
1038 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { | 1038 | for (i = 0; i < AL_EXT_PER_BM_SECT; i++) { |
1039 | sig = wait_event_interruptible(device->al_wait, | 1039 | sig = wait_event_interruptible(device->al_wait, |
1040 | !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || | 1040 | !_is_in_al(device, enr * AL_EXT_PER_BM_SECT + i) || |
1041 | test_bit(BME_PRIORITY, &bm_ext->flags)); | 1041 | (sa && test_bit(BME_PRIORITY, &bm_ext->flags))); |
1042 | 1042 | ||
1043 | if (sig || (test_bit(BME_PRIORITY, &bm_ext->flags) && sa)) { | 1043 | if (sig || (sa && test_bit(BME_PRIORITY, &bm_ext->flags))) { |
1044 | spin_lock_irq(&device->al_lock); | 1044 | spin_lock_irq(&device->al_lock); |
1045 | if (lc_put(device->resync, &bm_ext->lce) == 0) { | 1045 | if (lc_put(device->resync, &bm_ext->lce) == 0) { |
1046 | bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ | 1046 | bm_ext->flags = 0; /* clears BME_NO_WRITES and eventually BME_PRIORITY */ |
@@ -1052,9 +1052,6 @@ retry: | |||
1052 | return -EINTR; | 1052 | return -EINTR; |
1053 | if (schedule_timeout_interruptible(HZ/10)) | 1053 | if (schedule_timeout_interruptible(HZ/10)) |
1054 | return -EINTR; | 1054 | return -EINTR; |
1055 | if (sa && --sa == 0) | ||
1056 | drbd_warn(device, "drbd_rs_begin_io() stepped aside for 20sec." | ||
1057 | "Resync stalled?\n"); | ||
1058 | goto retry; | 1055 | goto retry; |
1059 | } | 1056 | } |
1060 | } | 1057 | } |
@@ -1288,7 +1285,7 @@ void drbd_rs_failed_io(struct drbd_device *device, sector_t sector, int size) | |||
1288 | sector_t esector, nr_sectors; | 1285 | sector_t esector, nr_sectors; |
1289 | int wake_up = 0; | 1286 | int wake_up = 0; |
1290 | 1287 | ||
1291 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) { | 1288 | if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_DISCARD_SIZE) { |
1292 | drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", | 1289 | drbd_err(device, "drbd_rs_failed_io: sector=%llus size=%d nonsense!\n", |
1293 | (unsigned long long)sector, size); | 1290 | (unsigned long long)sector, size); |
1294 | return; | 1291 | return; |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index e7093d4291f1..a76ceb344d64 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -382,6 +382,12 @@ enum { | |||
382 | __EE_CALL_AL_COMPLETE_IO, | 382 | __EE_CALL_AL_COMPLETE_IO, |
383 | __EE_MAY_SET_IN_SYNC, | 383 | __EE_MAY_SET_IN_SYNC, |
384 | 384 | ||
385 | /* is this a TRIM aka REQ_DISCARD? */ | ||
386 | __EE_IS_TRIM, | ||
387 | /* our lower level cannot handle trim, | ||
388 | * and we want to fall back to zeroout instead */ | ||
389 | __EE_IS_TRIM_USE_ZEROOUT, | ||
390 | |||
385 | /* In case a barrier failed, | 391 | /* In case a barrier failed, |
386 | * we need to resubmit without the barrier flag. */ | 392 | * we need to resubmit without the barrier flag. */ |
387 | __EE_RESUBMITTED, | 393 | __EE_RESUBMITTED, |
@@ -405,7 +411,9 @@ enum { | |||
405 | }; | 411 | }; |
406 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) | 412 | #define EE_CALL_AL_COMPLETE_IO (1<<__EE_CALL_AL_COMPLETE_IO) |
407 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) | 413 | #define EE_MAY_SET_IN_SYNC (1<<__EE_MAY_SET_IN_SYNC) |
408 | #define EE_RESUBMITTED (1<<__EE_RESUBMITTED) | 414 | #define EE_IS_TRIM (1<<__EE_IS_TRIM) |
415 | #define EE_IS_TRIM_USE_ZEROOUT (1<<__EE_IS_TRIM_USE_ZEROOUT) | ||
416 | #define EE_RESUBMITTED (1<<__EE_RESUBMITTED) | ||
409 | #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) | 417 | #define EE_WAS_ERROR (1<<__EE_WAS_ERROR) |
410 | #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) | 418 | #define EE_HAS_DIGEST (1<<__EE_HAS_DIGEST) |
411 | #define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) | 419 | #define EE_RESTART_REQUESTS (1<<__EE_RESTART_REQUESTS) |
@@ -579,6 +587,7 @@ struct drbd_resource { | |||
579 | struct list_head resources; | 587 | struct list_head resources; |
580 | struct res_opts res_opts; | 588 | struct res_opts res_opts; |
581 | struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */ | 589 | struct mutex conf_update; /* mutex for ready-copy-update of net_conf and disk_conf */ |
590 | struct mutex adm_mutex; /* mutex to serialize administrative requests */ | ||
582 | spinlock_t req_lock; | 591 | spinlock_t req_lock; |
583 | 592 | ||
584 | unsigned susp:1; /* IO suspended by user */ | 593 | unsigned susp:1; /* IO suspended by user */ |
@@ -609,6 +618,7 @@ struct drbd_connection { | |||
609 | struct drbd_socket data; /* data/barrier/cstate/parameter packets */ | 618 | struct drbd_socket data; /* data/barrier/cstate/parameter packets */ |
610 | struct drbd_socket meta; /* ping/ack (metadata) packets */ | 619 | struct drbd_socket meta; /* ping/ack (metadata) packets */ |
611 | int agreed_pro_version; /* actually used protocol version */ | 620 | int agreed_pro_version; /* actually used protocol version */ |
621 | u32 agreed_features; | ||
612 | unsigned long last_received; /* in jiffies, either socket */ | 622 | unsigned long last_received; /* in jiffies, either socket */ |
613 | unsigned int ko_count; | 623 | unsigned int ko_count; |
614 | 624 | ||
@@ -814,6 +824,28 @@ struct drbd_device { | |||
814 | struct submit_worker submit; | 824 | struct submit_worker submit; |
815 | }; | 825 | }; |
816 | 826 | ||
827 | struct drbd_config_context { | ||
828 | /* assigned from drbd_genlmsghdr */ | ||
829 | unsigned int minor; | ||
830 | /* assigned from request attributes, if present */ | ||
831 | unsigned int volume; | ||
832 | #define VOLUME_UNSPECIFIED (-1U) | ||
833 | /* pointer into the request skb, | ||
834 | * limited lifetime! */ | ||
835 | char *resource_name; | ||
836 | struct nlattr *my_addr; | ||
837 | struct nlattr *peer_addr; | ||
838 | |||
839 | /* reply buffer */ | ||
840 | struct sk_buff *reply_skb; | ||
841 | /* pointer into reply buffer */ | ||
842 | struct drbd_genlmsghdr *reply_dh; | ||
843 | /* resolved from attributes, if possible */ | ||
844 | struct drbd_device *device; | ||
845 | struct drbd_resource *resource; | ||
846 | struct drbd_connection *connection; | ||
847 | }; | ||
848 | |||
817 | static inline struct drbd_device *minor_to_device(unsigned int minor) | 849 | static inline struct drbd_device *minor_to_device(unsigned int minor) |
818 | { | 850 | { |
819 | return (struct drbd_device *)idr_find(&drbd_devices, minor); | 851 | return (struct drbd_device *)idr_find(&drbd_devices, minor); |
@@ -821,7 +853,7 @@ static inline struct drbd_device *minor_to_device(unsigned int minor) | |||
821 | 853 | ||
822 | static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device) | 854 | static inline struct drbd_peer_device *first_peer_device(struct drbd_device *device) |
823 | { | 855 | { |
824 | return list_first_entry(&device->peer_devices, struct drbd_peer_device, peer_devices); | 856 | return list_first_entry_or_null(&device->peer_devices, struct drbd_peer_device, peer_devices); |
825 | } | 857 | } |
826 | 858 | ||
827 | #define for_each_resource(resource, _resources) \ | 859 | #define for_each_resource(resource, _resources) \ |
@@ -1139,6 +1171,12 @@ struct bm_extent { | |||
1139 | #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */ | 1171 | #define DRBD_MAX_SIZE_H80_PACKET (1U << 15) /* Header 80 only allows packets up to 32KiB data */ |
1140 | #define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */ | 1172 | #define DRBD_MAX_BIO_SIZE_P95 (1U << 17) /* Protocol 95 to 99 allows bios up to 128KiB */ |
1141 | 1173 | ||
1174 | /* For now, don't allow more than one activity log extent worth of data | ||
1175 | * to be discarded in one go. We may need to rework drbd_al_begin_io() | ||
1176 | * to allow for even larger discard ranges */ | ||
1177 | #define DRBD_MAX_DISCARD_SIZE AL_EXTENT_SIZE | ||
1178 | #define DRBD_MAX_DISCARD_SECTORS (DRBD_MAX_DISCARD_SIZE >> 9) | ||
1179 | |||
1142 | extern int drbd_bm_init(struct drbd_device *device); | 1180 | extern int drbd_bm_init(struct drbd_device *device); |
1143 | extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits); | 1181 | extern int drbd_bm_resize(struct drbd_device *device, sector_t sectors, int set_new_bits); |
1144 | extern void drbd_bm_cleanup(struct drbd_device *device); | 1182 | extern void drbd_bm_cleanup(struct drbd_device *device); |
@@ -1229,9 +1267,9 @@ extern struct bio *bio_alloc_drbd(gfp_t gfp_mask); | |||
1229 | extern rwlock_t global_state_lock; | 1267 | extern rwlock_t global_state_lock; |
1230 | 1268 | ||
1231 | extern int conn_lowest_minor(struct drbd_connection *connection); | 1269 | extern int conn_lowest_minor(struct drbd_connection *connection); |
1232 | enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr); | 1270 | extern enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor); |
1233 | extern void drbd_destroy_device(struct kref *kref); | 1271 | extern void drbd_destroy_device(struct kref *kref); |
1234 | extern void drbd_delete_device(struct drbd_device *mdev); | 1272 | extern void drbd_delete_device(struct drbd_device *device); |
1235 | 1273 | ||
1236 | extern struct drbd_resource *drbd_create_resource(const char *name); | 1274 | extern struct drbd_resource *drbd_create_resource(const char *name); |
1237 | extern void drbd_free_resource(struct drbd_resource *resource); | 1275 | extern void drbd_free_resource(struct drbd_resource *resource); |
@@ -1257,7 +1295,7 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t); | |||
1257 | 1295 | ||
1258 | 1296 | ||
1259 | /* drbd_nl.c */ | 1297 | /* drbd_nl.c */ |
1260 | extern int drbd_msg_put_info(const char *info); | 1298 | extern int drbd_msg_put_info(struct sk_buff *skb, const char *info); |
1261 | extern void drbd_suspend_io(struct drbd_device *device); | 1299 | extern void drbd_suspend_io(struct drbd_device *device); |
1262 | extern void drbd_resume_io(struct drbd_device *device); | 1300 | extern void drbd_resume_io(struct drbd_device *device); |
1263 | extern char *ppsize(char *buf, unsigned long long size); | 1301 | extern char *ppsize(char *buf, unsigned long long size); |
@@ -1283,6 +1321,10 @@ extern void conn_try_outdate_peer_async(struct drbd_connection *connection); | |||
1283 | extern int drbd_khelper(struct drbd_device *device, char *cmd); | 1321 | extern int drbd_khelper(struct drbd_device *device, char *cmd); |
1284 | 1322 | ||
1285 | /* drbd_worker.c */ | 1323 | /* drbd_worker.c */ |
1324 | /* bi_end_io handlers */ | ||
1325 | extern void drbd_md_io_complete(struct bio *bio, int error); | ||
1326 | extern void drbd_peer_request_endio(struct bio *bio, int error); | ||
1327 | extern void drbd_request_endio(struct bio *bio, int error); | ||
1286 | extern int drbd_worker(struct drbd_thread *thi); | 1328 | extern int drbd_worker(struct drbd_thread *thi); |
1287 | enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor); | 1329 | enum drbd_ret_code drbd_resync_after_valid(struct drbd_device *device, int o_minor); |
1288 | void drbd_resync_after_changed(struct drbd_device *device); | 1330 | void drbd_resync_after_changed(struct drbd_device *device); |
@@ -1332,16 +1374,20 @@ extern int w_start_resync(struct drbd_work *, int); | |||
1332 | extern void resync_timer_fn(unsigned long data); | 1374 | extern void resync_timer_fn(unsigned long data); |
1333 | extern void start_resync_timer_fn(unsigned long data); | 1375 | extern void start_resync_timer_fn(unsigned long data); |
1334 | 1376 | ||
1377 | extern void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req); | ||
1378 | |||
1335 | /* drbd_receiver.c */ | 1379 | /* drbd_receiver.c */ |
1336 | extern int drbd_receiver(struct drbd_thread *thi); | 1380 | extern int drbd_receiver(struct drbd_thread *thi); |
1337 | extern int drbd_asender(struct drbd_thread *thi); | 1381 | extern int drbd_asender(struct drbd_thread *thi); |
1338 | extern int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); | 1382 | extern bool drbd_rs_c_min_rate_throttle(struct drbd_device *device); |
1383 | extern bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector); | ||
1339 | extern int drbd_submit_peer_request(struct drbd_device *, | 1384 | extern int drbd_submit_peer_request(struct drbd_device *, |
1340 | struct drbd_peer_request *, const unsigned, | 1385 | struct drbd_peer_request *, const unsigned, |
1341 | const int); | 1386 | const int); |
1342 | extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); | 1387 | extern int drbd_free_peer_reqs(struct drbd_device *, struct list_head *); |
1343 | extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, | 1388 | extern struct drbd_peer_request *drbd_alloc_peer_req(struct drbd_peer_device *, u64, |
1344 | sector_t, unsigned int, | 1389 | sector_t, unsigned int, |
1390 | bool, | ||
1345 | gfp_t) __must_hold(local); | 1391 | gfp_t) __must_hold(local); |
1346 | extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, | 1392 | extern void __drbd_free_peer_req(struct drbd_device *, struct drbd_peer_request *, |
1347 | int); | 1393 | int); |
@@ -1401,6 +1447,37 @@ static inline void drbd_tcp_quickack(struct socket *sock) | |||
1401 | (char*)&val, sizeof(val)); | 1447 | (char*)&val, sizeof(val)); |
1402 | } | 1448 | } |
1403 | 1449 | ||
1450 | /* sets the number of 512 byte sectors of our virtual device */ | ||
1451 | static inline void drbd_set_my_capacity(struct drbd_device *device, | ||
1452 | sector_t size) | ||
1453 | { | ||
1454 | /* set_capacity(device->this_bdev->bd_disk, size); */ | ||
1455 | set_capacity(device->vdisk, size); | ||
1456 | device->this_bdev->bd_inode->i_size = (loff_t)size << 9; | ||
1457 | } | ||
1458 | |||
1459 | /* | ||
1460 | * used to submit our private bio | ||
1461 | */ | ||
1462 | static inline void drbd_generic_make_request(struct drbd_device *device, | ||
1463 | int fault_type, struct bio *bio) | ||
1464 | { | ||
1465 | __release(local); | ||
1466 | if (!bio->bi_bdev) { | ||
1467 | printk(KERN_ERR "drbd%d: drbd_generic_make_request: " | ||
1468 | "bio->bi_bdev == NULL\n", | ||
1469 | device_to_minor(device)); | ||
1470 | dump_stack(); | ||
1471 | bio_endio(bio, -ENODEV); | ||
1472 | return; | ||
1473 | } | ||
1474 | |||
1475 | if (drbd_insert_fault(device, fault_type)) | ||
1476 | bio_endio(bio, -EIO); | ||
1477 | else | ||
1478 | generic_make_request(bio); | ||
1479 | } | ||
1480 | |||
1404 | void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo); | 1481 | void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo); |
1405 | 1482 | ||
1406 | /* drbd_proc.c */ | 1483 | /* drbd_proc.c */ |
@@ -1410,6 +1487,7 @@ extern const char *drbd_conn_str(enum drbd_conns s); | |||
1410 | extern const char *drbd_role_str(enum drbd_role s); | 1487 | extern const char *drbd_role_str(enum drbd_role s); |
1411 | 1488 | ||
1412 | /* drbd_actlog.c */ | 1489 | /* drbd_actlog.c */ |
1490 | extern bool drbd_al_begin_io_prepare(struct drbd_device *device, struct drbd_interval *i); | ||
1413 | extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); | 1491 | extern int drbd_al_begin_io_nonblock(struct drbd_device *device, struct drbd_interval *i); |
1414 | extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate); | 1492 | extern void drbd_al_begin_io_commit(struct drbd_device *device, bool delegate); |
1415 | extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); | 1493 | extern bool drbd_al_begin_io_fastpath(struct drbd_device *device, struct drbd_interval *i); |
@@ -2144,7 +2222,7 @@ static inline void drbd_md_flush(struct drbd_device *device) | |||
2144 | 2222 | ||
2145 | static inline struct drbd_connection *first_connection(struct drbd_resource *resource) | 2223 | static inline struct drbd_connection *first_connection(struct drbd_resource *resource) |
2146 | { | 2224 | { |
2147 | return list_first_entry(&resource->connections, | 2225 | return list_first_entry_or_null(&resource->connections, |
2148 | struct drbd_connection, connections); | 2226 | struct drbd_connection, connections); |
2149 | } | 2227 | } |
2150 | 2228 | ||
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 331e5cc1227d..960645c26e6f 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -1607,8 +1607,8 @@ static u32 bio_flags_to_wire(struct drbd_connection *connection, unsigned long b | |||
1607 | return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0; | 1607 | return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0; |
1608 | } | 1608 | } |
1609 | 1609 | ||
1610 | /* Used to send write requests | 1610 | /* Used to send write or TRIM aka REQ_DISCARD requests |
1611 | * R_PRIMARY -> Peer (P_DATA) | 1611 | * R_PRIMARY -> Peer (P_DATA, P_TRIM) |
1612 | */ | 1612 | */ |
1613 | int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req) | 1613 | int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request *req) |
1614 | { | 1614 | { |
@@ -1640,6 +1640,16 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * | |||
1640 | dp_flags |= DP_SEND_WRITE_ACK; | 1640 | dp_flags |= DP_SEND_WRITE_ACK; |
1641 | } | 1641 | } |
1642 | p->dp_flags = cpu_to_be32(dp_flags); | 1642 | p->dp_flags = cpu_to_be32(dp_flags); |
1643 | |||
1644 | if (dp_flags & DP_DISCARD) { | ||
1645 | struct p_trim *t = (struct p_trim*)p; | ||
1646 | t->size = cpu_to_be32(req->i.size); | ||
1647 | err = __send_command(peer_device->connection, device->vnr, sock, P_TRIM, sizeof(*t), NULL, 0); | ||
1648 | goto out; | ||
1649 | } | ||
1650 | |||
1651 | /* our digest is still only over the payload. | ||
1652 | * TRIM does not carry any payload. */ | ||
1643 | if (dgs) | 1653 | if (dgs) |
1644 | drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1); | 1654 | drbd_csum_bio(peer_device->connection->integrity_tfm, req->master_bio, p + 1); |
1645 | err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); | 1655 | err = __send_command(peer_device->connection, device->vnr, sock, P_DATA, sizeof(*p) + dgs, NULL, req->i.size); |
@@ -1675,6 +1685,7 @@ int drbd_send_dblock(struct drbd_peer_device *peer_device, struct drbd_request * | |||
1675 | ... Be noisy about digest too large ... | 1685 | ... Be noisy about digest too large ... |
1676 | } */ | 1686 | } */ |
1677 | } | 1687 | } |
1688 | out: | ||
1678 | mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ | 1689 | mutex_unlock(&sock->mutex); /* locked by drbd_prepare_command() */ |
1679 | 1690 | ||
1680 | return err; | 1691 | return err; |
@@ -2570,6 +2581,7 @@ struct drbd_resource *drbd_create_resource(const char *name) | |||
2570 | INIT_LIST_HEAD(&resource->connections); | 2581 | INIT_LIST_HEAD(&resource->connections); |
2571 | list_add_tail_rcu(&resource->resources, &drbd_resources); | 2582 | list_add_tail_rcu(&resource->resources, &drbd_resources); |
2572 | mutex_init(&resource->conf_update); | 2583 | mutex_init(&resource->conf_update); |
2584 | mutex_init(&resource->adm_mutex); | ||
2573 | spin_lock_init(&resource->req_lock); | 2585 | spin_lock_init(&resource->req_lock); |
2574 | return resource; | 2586 | return resource; |
2575 | 2587 | ||
@@ -2687,14 +2699,16 @@ static int init_submitter(struct drbd_device *device) | |||
2687 | return 0; | 2699 | return 0; |
2688 | } | 2700 | } |
2689 | 2701 | ||
2690 | enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned int minor, int vnr) | 2702 | enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsigned int minor) |
2691 | { | 2703 | { |
2704 | struct drbd_resource *resource = adm_ctx->resource; | ||
2692 | struct drbd_connection *connection; | 2705 | struct drbd_connection *connection; |
2693 | struct drbd_device *device; | 2706 | struct drbd_device *device; |
2694 | struct drbd_peer_device *peer_device, *tmp_peer_device; | 2707 | struct drbd_peer_device *peer_device, *tmp_peer_device; |
2695 | struct gendisk *disk; | 2708 | struct gendisk *disk; |
2696 | struct request_queue *q; | 2709 | struct request_queue *q; |
2697 | int id; | 2710 | int id; |
2711 | int vnr = adm_ctx->volume; | ||
2698 | enum drbd_ret_code err = ERR_NOMEM; | 2712 | enum drbd_ret_code err = ERR_NOMEM; |
2699 | 2713 | ||
2700 | device = minor_to_device(minor); | 2714 | device = minor_to_device(minor); |
@@ -2763,7 +2777,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i | |||
2763 | if (id < 0) { | 2777 | if (id < 0) { |
2764 | if (id == -ENOSPC) { | 2778 | if (id == -ENOSPC) { |
2765 | err = ERR_MINOR_EXISTS; | 2779 | err = ERR_MINOR_EXISTS; |
2766 | drbd_msg_put_info("requested minor exists already"); | 2780 | drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already"); |
2767 | } | 2781 | } |
2768 | goto out_no_minor_idr; | 2782 | goto out_no_minor_idr; |
2769 | } | 2783 | } |
@@ -2773,7 +2787,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i | |||
2773 | if (id < 0) { | 2787 | if (id < 0) { |
2774 | if (id == -ENOSPC) { | 2788 | if (id == -ENOSPC) { |
2775 | err = ERR_MINOR_EXISTS; | 2789 | err = ERR_MINOR_EXISTS; |
2776 | drbd_msg_put_info("requested minor exists already"); | 2790 | drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already"); |
2777 | } | 2791 | } |
2778 | goto out_idr_remove_minor; | 2792 | goto out_idr_remove_minor; |
2779 | } | 2793 | } |
@@ -2794,7 +2808,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i | |||
2794 | if (id < 0) { | 2808 | if (id < 0) { |
2795 | if (id == -ENOSPC) { | 2809 | if (id == -ENOSPC) { |
2796 | err = ERR_INVALID_REQUEST; | 2810 | err = ERR_INVALID_REQUEST; |
2797 | drbd_msg_put_info("requested volume exists already"); | 2811 | drbd_msg_put_info(adm_ctx->reply_skb, "requested volume exists already"); |
2798 | } | 2812 | } |
2799 | goto out_idr_remove_from_resource; | 2813 | goto out_idr_remove_from_resource; |
2800 | } | 2814 | } |
@@ -2803,7 +2817,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_resource *resource, unsigned i | |||
2803 | 2817 | ||
2804 | if (init_submitter(device)) { | 2818 | if (init_submitter(device)) { |
2805 | err = ERR_NOMEM; | 2819 | err = ERR_NOMEM; |
2806 | drbd_msg_put_info("unable to create submit workqueue"); | 2820 | drbd_msg_put_info(adm_ctx->reply_skb, "unable to create submit workqueue"); |
2807 | goto out_idr_remove_vol; | 2821 | goto out_idr_remove_vol; |
2808 | } | 2822 | } |
2809 | 2823 | ||
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 526414bc2cab..1b35c45c92b7 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include "drbd_int.h" | 34 | #include "drbd_int.h" |
35 | #include "drbd_protocol.h" | 35 | #include "drbd_protocol.h" |
36 | #include "drbd_req.h" | 36 | #include "drbd_req.h" |
37 | #include "drbd_wrappers.h" | ||
38 | #include <asm/unaligned.h> | 37 | #include <asm/unaligned.h> |
39 | #include <linux/drbd_limits.h> | 38 | #include <linux/drbd_limits.h> |
40 | #include <linux/kthread.h> | 39 | #include <linux/kthread.h> |
@@ -82,32 +81,6 @@ int drbd_adm_get_status_all(struct sk_buff *skb, struct netlink_callback *cb); | |||
82 | /* used blkdev_get_by_path, to claim our meta data device(s) */ | 81 | /* used blkdev_get_by_path, to claim our meta data device(s) */ |
83 | static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; | 82 | static char *drbd_m_holder = "Hands off! this is DRBD's meta data device."; |
84 | 83 | ||
85 | /* Configuration is strictly serialized, because generic netlink message | ||
86 | * processing is strictly serialized by the genl_lock(). | ||
87 | * Which means we can use one static global drbd_config_context struct. | ||
88 | */ | ||
89 | static struct drbd_config_context { | ||
90 | /* assigned from drbd_genlmsghdr */ | ||
91 | unsigned int minor; | ||
92 | /* assigned from request attributes, if present */ | ||
93 | unsigned int volume; | ||
94 | #define VOLUME_UNSPECIFIED (-1U) | ||
95 | /* pointer into the request skb, | ||
96 | * limited lifetime! */ | ||
97 | char *resource_name; | ||
98 | struct nlattr *my_addr; | ||
99 | struct nlattr *peer_addr; | ||
100 | |||
101 | /* reply buffer */ | ||
102 | struct sk_buff *reply_skb; | ||
103 | /* pointer into reply buffer */ | ||
104 | struct drbd_genlmsghdr *reply_dh; | ||
105 | /* resolved from attributes, if possible */ | ||
106 | struct drbd_device *device; | ||
107 | struct drbd_resource *resource; | ||
108 | struct drbd_connection *connection; | ||
109 | } adm_ctx; | ||
110 | |||
111 | static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) | 84 | static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) |
112 | { | 85 | { |
113 | genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); | 86 | genlmsg_end(skb, genlmsg_data(nlmsg_data(nlmsg_hdr(skb)))); |
@@ -117,9 +90,8 @@ static void drbd_adm_send_reply(struct sk_buff *skb, struct genl_info *info) | |||
117 | 90 | ||
118 | /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only | 91 | /* Used on a fresh "drbd_adm_prepare"d reply_skb, this cannot fail: The only |
119 | * reason it could fail was no space in skb, and there are 4k available. */ | 92 | * reason it could fail was no space in skb, and there are 4k available. */ |
120 | int drbd_msg_put_info(const char *info) | 93 | int drbd_msg_put_info(struct sk_buff *skb, const char *info) |
121 | { | 94 | { |
122 | struct sk_buff *skb = adm_ctx.reply_skb; | ||
123 | struct nlattr *nla; | 95 | struct nlattr *nla; |
124 | int err = -EMSGSIZE; | 96 | int err = -EMSGSIZE; |
125 | 97 | ||
@@ -143,42 +115,46 @@ int drbd_msg_put_info(const char *info) | |||
143 | * and per-family private info->pointers. | 115 | * and per-family private info->pointers. |
144 | * But we need to stay compatible with older kernels. | 116 | * But we need to stay compatible with older kernels. |
145 | * If it returns successfully, adm_ctx members are valid. | 117 | * If it returns successfully, adm_ctx members are valid. |
118 | * | ||
119 | * At this point, we still rely on the global genl_lock(). | ||
120 | * If we want to avoid that, and allow "genl_family.parallel_ops", we may need | ||
121 | * to add additional synchronization against object destruction/modification. | ||
146 | */ | 122 | */ |
147 | #define DRBD_ADM_NEED_MINOR 1 | 123 | #define DRBD_ADM_NEED_MINOR 1 |
148 | #define DRBD_ADM_NEED_RESOURCE 2 | 124 | #define DRBD_ADM_NEED_RESOURCE 2 |
149 | #define DRBD_ADM_NEED_CONNECTION 4 | 125 | #define DRBD_ADM_NEED_CONNECTION 4 |
150 | static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, | 126 | static int drbd_adm_prepare(struct drbd_config_context *adm_ctx, |
151 | unsigned flags) | 127 | struct sk_buff *skb, struct genl_info *info, unsigned flags) |
152 | { | 128 | { |
153 | struct drbd_genlmsghdr *d_in = info->userhdr; | 129 | struct drbd_genlmsghdr *d_in = info->userhdr; |
154 | const u8 cmd = info->genlhdr->cmd; | 130 | const u8 cmd = info->genlhdr->cmd; |
155 | int err; | 131 | int err; |
156 | 132 | ||
157 | memset(&adm_ctx, 0, sizeof(adm_ctx)); | 133 | memset(adm_ctx, 0, sizeof(*adm_ctx)); |
158 | 134 | ||
159 | /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ | 135 | /* genl_rcv_msg only checks for CAP_NET_ADMIN on "GENL_ADMIN_PERM" :( */ |
160 | if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN)) | 136 | if (cmd != DRBD_ADM_GET_STATUS && !capable(CAP_NET_ADMIN)) |
161 | return -EPERM; | 137 | return -EPERM; |
162 | 138 | ||
163 | adm_ctx.reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); | 139 | adm_ctx->reply_skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); |
164 | if (!adm_ctx.reply_skb) { | 140 | if (!adm_ctx->reply_skb) { |
165 | err = -ENOMEM; | 141 | err = -ENOMEM; |
166 | goto fail; | 142 | goto fail; |
167 | } | 143 | } |
168 | 144 | ||
169 | adm_ctx.reply_dh = genlmsg_put_reply(adm_ctx.reply_skb, | 145 | adm_ctx->reply_dh = genlmsg_put_reply(adm_ctx->reply_skb, |
170 | info, &drbd_genl_family, 0, cmd); | 146 | info, &drbd_genl_family, 0, cmd); |
171 | /* put of a few bytes into a fresh skb of >= 4k will always succeed. | 147 | /* put of a few bytes into a fresh skb of >= 4k will always succeed. |
172 | * but anyways */ | 148 | * but anyways */ |
173 | if (!adm_ctx.reply_dh) { | 149 | if (!adm_ctx->reply_dh) { |
174 | err = -ENOMEM; | 150 | err = -ENOMEM; |
175 | goto fail; | 151 | goto fail; |
176 | } | 152 | } |
177 | 153 | ||
178 | adm_ctx.reply_dh->minor = d_in->minor; | 154 | adm_ctx->reply_dh->minor = d_in->minor; |
179 | adm_ctx.reply_dh->ret_code = NO_ERROR; | 155 | adm_ctx->reply_dh->ret_code = NO_ERROR; |
180 | 156 | ||
181 | adm_ctx.volume = VOLUME_UNSPECIFIED; | 157 | adm_ctx->volume = VOLUME_UNSPECIFIED; |
182 | if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { | 158 | if (info->attrs[DRBD_NLA_CFG_CONTEXT]) { |
183 | struct nlattr *nla; | 159 | struct nlattr *nla; |
184 | /* parse and validate only */ | 160 | /* parse and validate only */ |
@@ -188,111 +164,131 @@ static int drbd_adm_prepare(struct sk_buff *skb, struct genl_info *info, | |||
188 | 164 | ||
189 | /* It was present, and valid, | 165 | /* It was present, and valid, |
190 | * copy it over to the reply skb. */ | 166 | * copy it over to the reply skb. */ |
191 | err = nla_put_nohdr(adm_ctx.reply_skb, | 167 | err = nla_put_nohdr(adm_ctx->reply_skb, |
192 | info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len, | 168 | info->attrs[DRBD_NLA_CFG_CONTEXT]->nla_len, |
193 | info->attrs[DRBD_NLA_CFG_CONTEXT]); | 169 | info->attrs[DRBD_NLA_CFG_CONTEXT]); |
194 | if (err) | 170 | if (err) |
195 | goto fail; | 171 | goto fail; |
196 | 172 | ||
197 | /* and assign stuff to the global adm_ctx */ | 173 | /* and assign stuff to the adm_ctx */ |
198 | nla = nested_attr_tb[__nla_type(T_ctx_volume)]; | 174 | nla = nested_attr_tb[__nla_type(T_ctx_volume)]; |
199 | if (nla) | 175 | if (nla) |
200 | adm_ctx.volume = nla_get_u32(nla); | 176 | adm_ctx->volume = nla_get_u32(nla); |
201 | nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; | 177 | nla = nested_attr_tb[__nla_type(T_ctx_resource_name)]; |
202 | if (nla) | 178 | if (nla) |
203 | adm_ctx.resource_name = nla_data(nla); | 179 | adm_ctx->resource_name = nla_data(nla); |
204 | adm_ctx.my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; | 180 | adm_ctx->my_addr = nested_attr_tb[__nla_type(T_ctx_my_addr)]; |
205 | adm_ctx.peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; | 181 | adm_ctx->peer_addr = nested_attr_tb[__nla_type(T_ctx_peer_addr)]; |
206 | if ((adm_ctx.my_addr && | 182 | if ((adm_ctx->my_addr && |
207 | nla_len(adm_ctx.my_addr) > sizeof(adm_ctx.connection->my_addr)) || | 183 | nla_len(adm_ctx->my_addr) > sizeof(adm_ctx->connection->my_addr)) || |
208 | (adm_ctx.peer_addr && | 184 | (adm_ctx->peer_addr && |
209 | nla_len(adm_ctx.peer_addr) > sizeof(adm_ctx.connection->peer_addr))) { | 185 | nla_len(adm_ctx->peer_addr) > sizeof(adm_ctx->connection->peer_addr))) { |
210 | err = -EINVAL; | 186 | err = -EINVAL; |
211 | goto fail; | 187 | goto fail; |
212 | } | 188 | } |
213 | } | 189 | } |
214 | 190 | ||
215 | adm_ctx.minor = d_in->minor; | 191 | adm_ctx->minor = d_in->minor; |
216 | adm_ctx.device = minor_to_device(d_in->minor); | 192 | adm_ctx->device = minor_to_device(d_in->minor); |
217 | if (adm_ctx.resource_name) { | 193 | |
218 | adm_ctx.resource = drbd_find_resource(adm_ctx.resource_name); | 194 | /* We are protected by the global genl_lock(). |
195 | * But we may explicitly drop it/retake it in drbd_adm_set_role(), | ||
196 | * so make sure this object stays around. */ | ||
197 | if (adm_ctx->device) | ||
198 | kref_get(&adm_ctx->device->kref); | ||
199 | |||
200 | if (adm_ctx->resource_name) { | ||
201 | adm_ctx->resource = drbd_find_resource(adm_ctx->resource_name); | ||
219 | } | 202 | } |
220 | 203 | ||
221 | if (!adm_ctx.device && (flags & DRBD_ADM_NEED_MINOR)) { | 204 | if (!adm_ctx->device && (flags & DRBD_ADM_NEED_MINOR)) { |
222 | drbd_msg_put_info("unknown minor"); | 205 | drbd_msg_put_info(adm_ctx->reply_skb, "unknown minor"); |
223 | return ERR_MINOR_INVALID; | 206 | return ERR_MINOR_INVALID; |
224 | } | 207 | } |
225 | if (!adm_ctx.resource && (flags & DRBD_ADM_NEED_RESOURCE)) { | 208 | if (!adm_ctx->resource && (flags & DRBD_ADM_NEED_RESOURCE)) { |
226 | drbd_msg_put_info("unknown resource"); | 209 | drbd_msg_put_info(adm_ctx->reply_skb, "unknown resource"); |
227 | if (adm_ctx.resource_name) | 210 | if (adm_ctx->resource_name) |
228 | return ERR_RES_NOT_KNOWN; | 211 | return ERR_RES_NOT_KNOWN; |
229 | return ERR_INVALID_REQUEST; | 212 | return ERR_INVALID_REQUEST; |
230 | } | 213 | } |
231 | 214 | ||
232 | if (flags & DRBD_ADM_NEED_CONNECTION) { | 215 | if (flags & DRBD_ADM_NEED_CONNECTION) { |
233 | if (adm_ctx.resource) { | 216 | if (adm_ctx->resource) { |
234 | drbd_msg_put_info("no resource name expected"); | 217 | drbd_msg_put_info(adm_ctx->reply_skb, "no resource name expected"); |
235 | return ERR_INVALID_REQUEST; | 218 | return ERR_INVALID_REQUEST; |
236 | } | 219 | } |
237 | if (adm_ctx.device) { | 220 | if (adm_ctx->device) { |
238 | drbd_msg_put_info("no minor number expected"); | 221 | drbd_msg_put_info(adm_ctx->reply_skb, "no minor number expected"); |
239 | return ERR_INVALID_REQUEST; | 222 | return ERR_INVALID_REQUEST; |
240 | } | 223 | } |
241 | if (adm_ctx.my_addr && adm_ctx.peer_addr) | 224 | if (adm_ctx->my_addr && adm_ctx->peer_addr) |
242 | adm_ctx.connection = conn_get_by_addrs(nla_data(adm_ctx.my_addr), | 225 | adm_ctx->connection = conn_get_by_addrs(nla_data(adm_ctx->my_addr), |
243 | nla_len(adm_ctx.my_addr), | 226 | nla_len(adm_ctx->my_addr), |
244 | nla_data(adm_ctx.peer_addr), | 227 | nla_data(adm_ctx->peer_addr), |
245 | nla_len(adm_ctx.peer_addr)); | 228 | nla_len(adm_ctx->peer_addr)); |
246 | if (!adm_ctx.connection) { | 229 | if (!adm_ctx->connection) { |
247 | drbd_msg_put_info("unknown connection"); | 230 | drbd_msg_put_info(adm_ctx->reply_skb, "unknown connection"); |
248 | return ERR_INVALID_REQUEST; | 231 | return ERR_INVALID_REQUEST; |
249 | } | 232 | } |
250 | } | 233 | } |
251 | 234 | ||
252 | /* some more paranoia, if the request was over-determined */ | 235 | /* some more paranoia, if the request was over-determined */ |
253 | if (adm_ctx.device && adm_ctx.resource && | 236 | if (adm_ctx->device && adm_ctx->resource && |
254 | adm_ctx.device->resource != adm_ctx.resource) { | 237 | adm_ctx->device->resource != adm_ctx->resource) { |
255 | pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n", | 238 | pr_warning("request: minor=%u, resource=%s; but that minor belongs to resource %s\n", |
256 | adm_ctx.minor, adm_ctx.resource->name, | 239 | adm_ctx->minor, adm_ctx->resource->name, |
257 | adm_ctx.device->resource->name); | 240 | adm_ctx->device->resource->name); |
258 | drbd_msg_put_info("minor exists in different resource"); | 241 | drbd_msg_put_info(adm_ctx->reply_skb, "minor exists in different resource"); |
259 | return ERR_INVALID_REQUEST; | 242 | return ERR_INVALID_REQUEST; |
260 | } | 243 | } |
261 | if (adm_ctx.device && | 244 | if (adm_ctx->device && |
262 | adm_ctx.volume != VOLUME_UNSPECIFIED && | 245 | adm_ctx->volume != VOLUME_UNSPECIFIED && |
263 | adm_ctx.volume != adm_ctx.device->vnr) { | 246 | adm_ctx->volume != adm_ctx->device->vnr) { |
264 | pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", | 247 | pr_warning("request: minor=%u, volume=%u; but that minor is volume %u in %s\n", |
265 | adm_ctx.minor, adm_ctx.volume, | 248 | adm_ctx->minor, adm_ctx->volume, |
266 | adm_ctx.device->vnr, | 249 | adm_ctx->device->vnr, |
267 | adm_ctx.device->resource->name); | 250 | adm_ctx->device->resource->name); |
268 | drbd_msg_put_info("minor exists as different volume"); | 251 | drbd_msg_put_info(adm_ctx->reply_skb, "minor exists as different volume"); |
269 | return ERR_INVALID_REQUEST; | 252 | return ERR_INVALID_REQUEST; |
270 | } | 253 | } |
271 | 254 | ||
255 | /* still, provide adm_ctx->resource always, if possible. */ | ||
256 | if (!adm_ctx->resource) { | ||
257 | adm_ctx->resource = adm_ctx->device ? adm_ctx->device->resource | ||
258 | : adm_ctx->connection ? adm_ctx->connection->resource : NULL; | ||
259 | if (adm_ctx->resource) | ||
260 | kref_get(&adm_ctx->resource->kref); | ||
261 | } | ||
262 | |||
272 | return NO_ERROR; | 263 | return NO_ERROR; |
273 | 264 | ||
274 | fail: | 265 | fail: |
275 | nlmsg_free(adm_ctx.reply_skb); | 266 | nlmsg_free(adm_ctx->reply_skb); |
276 | adm_ctx.reply_skb = NULL; | 267 | adm_ctx->reply_skb = NULL; |
277 | return err; | 268 | return err; |
278 | } | 269 | } |
279 | 270 | ||
280 | static int drbd_adm_finish(struct genl_info *info, int retcode) | 271 | static int drbd_adm_finish(struct drbd_config_context *adm_ctx, |
272 | struct genl_info *info, int retcode) | ||
281 | { | 273 | { |
282 | if (adm_ctx.connection) { | 274 | if (adm_ctx->device) { |
283 | kref_put(&adm_ctx.connection->kref, drbd_destroy_connection); | 275 | kref_put(&adm_ctx->device->kref, drbd_destroy_device); |
284 | adm_ctx.connection = NULL; | 276 | adm_ctx->device = NULL; |
285 | } | 277 | } |
286 | if (adm_ctx.resource) { | 278 | if (adm_ctx->connection) { |
287 | kref_put(&adm_ctx.resource->kref, drbd_destroy_resource); | 279 | kref_put(&adm_ctx->connection->kref, &drbd_destroy_connection); |
288 | adm_ctx.resource = NULL; | 280 | adm_ctx->connection = NULL; |
281 | } | ||
282 | if (adm_ctx->resource) { | ||
283 | kref_put(&adm_ctx->resource->kref, drbd_destroy_resource); | ||
284 | adm_ctx->resource = NULL; | ||
289 | } | 285 | } |
290 | 286 | ||
291 | if (!adm_ctx.reply_skb) | 287 | if (!adm_ctx->reply_skb) |
292 | return -ENOMEM; | 288 | return -ENOMEM; |
293 | 289 | ||
294 | adm_ctx.reply_dh->ret_code = retcode; | 290 | adm_ctx->reply_dh->ret_code = retcode; |
295 | drbd_adm_send_reply(adm_ctx.reply_skb, info); | 291 | drbd_adm_send_reply(adm_ctx->reply_skb, info); |
296 | return 0; | 292 | return 0; |
297 | } | 293 | } |
298 | 294 | ||
@@ -426,6 +422,14 @@ static enum drbd_fencing_p highest_fencing_policy(struct drbd_connection *connec | |||
426 | } | 422 | } |
427 | rcu_read_unlock(); | 423 | rcu_read_unlock(); |
428 | 424 | ||
425 | if (fp == FP_NOT_AVAIL) { | ||
426 | /* IO Suspending works on the whole resource. | ||
427 | Do it only for one device. */ | ||
428 | vnr = 0; | ||
429 | peer_device = idr_get_next(&connection->peer_devices, &vnr); | ||
430 | drbd_change_state(peer_device->device, CS_VERBOSE | CS_HARD, NS(susp_fen, 0)); | ||
431 | } | ||
432 | |||
429 | return fp; | 433 | return fp; |
430 | } | 434 | } |
431 | 435 | ||
@@ -438,12 +442,13 @@ bool conn_try_outdate_peer(struct drbd_connection *connection) | |||
438 | char *ex_to_string; | 442 | char *ex_to_string; |
439 | int r; | 443 | int r; |
440 | 444 | ||
445 | spin_lock_irq(&connection->resource->req_lock); | ||
441 | if (connection->cstate >= C_WF_REPORT_PARAMS) { | 446 | if (connection->cstate >= C_WF_REPORT_PARAMS) { |
442 | drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n"); | 447 | drbd_err(connection, "Expected cstate < C_WF_REPORT_PARAMS\n"); |
448 | spin_unlock_irq(&connection->resource->req_lock); | ||
443 | return false; | 449 | return false; |
444 | } | 450 | } |
445 | 451 | ||
446 | spin_lock_irq(&connection->resource->req_lock); | ||
447 | connect_cnt = connection->connect_cnt; | 452 | connect_cnt = connection->connect_cnt; |
448 | spin_unlock_irq(&connection->resource->req_lock); | 453 | spin_unlock_irq(&connection->resource->req_lock); |
449 | 454 | ||
@@ -654,11 +659,11 @@ drbd_set_role(struct drbd_device *device, enum drbd_role new_role, int force) | |||
654 | put_ldev(device); | 659 | put_ldev(device); |
655 | } | 660 | } |
656 | } else { | 661 | } else { |
657 | mutex_lock(&device->resource->conf_update); | 662 | /* Called from drbd_adm_set_role only. |
663 | * We are still holding the conf_update mutex. */ | ||
658 | nc = first_peer_device(device)->connection->net_conf; | 664 | nc = first_peer_device(device)->connection->net_conf; |
659 | if (nc) | 665 | if (nc) |
660 | nc->discard_my_data = 0; /* without copy; single bit op is atomic */ | 666 | nc->discard_my_data = 0; /* without copy; single bit op is atomic */ |
661 | mutex_unlock(&device->resource->conf_update); | ||
662 | 667 | ||
663 | set_disk_ro(device->vdisk, false); | 668 | set_disk_ro(device->vdisk, false); |
664 | if (get_ldev(device)) { | 669 | if (get_ldev(device)) { |
@@ -700,11 +705,12 @@ static const char *from_attrs_err_to_txt(int err) | |||
700 | 705 | ||
701 | int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) | 706 | int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) |
702 | { | 707 | { |
708 | struct drbd_config_context adm_ctx; | ||
703 | struct set_role_parms parms; | 709 | struct set_role_parms parms; |
704 | int err; | 710 | int err; |
705 | enum drbd_ret_code retcode; | 711 | enum drbd_ret_code retcode; |
706 | 712 | ||
707 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 713 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
708 | if (!adm_ctx.reply_skb) | 714 | if (!adm_ctx.reply_skb) |
709 | return retcode; | 715 | return retcode; |
710 | if (retcode != NO_ERROR) | 716 | if (retcode != NO_ERROR) |
@@ -715,17 +721,22 @@ int drbd_adm_set_role(struct sk_buff *skb, struct genl_info *info) | |||
715 | err = set_role_parms_from_attrs(&parms, info); | 721 | err = set_role_parms_from_attrs(&parms, info); |
716 | if (err) { | 722 | if (err) { |
717 | retcode = ERR_MANDATORY_TAG; | 723 | retcode = ERR_MANDATORY_TAG; |
718 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 724 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
719 | goto out; | 725 | goto out; |
720 | } | 726 | } |
721 | } | 727 | } |
728 | genl_unlock(); | ||
729 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
722 | 730 | ||
723 | if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) | 731 | if (info->genlhdr->cmd == DRBD_ADM_PRIMARY) |
724 | retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate); | 732 | retcode = drbd_set_role(adm_ctx.device, R_PRIMARY, parms.assume_uptodate); |
725 | else | 733 | else |
726 | retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0); | 734 | retcode = drbd_set_role(adm_ctx.device, R_SECONDARY, 0); |
735 | |||
736 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
737 | genl_lock(); | ||
727 | out: | 738 | out: |
728 | drbd_adm_finish(info, retcode); | 739 | drbd_adm_finish(&adm_ctx, info, retcode); |
729 | return 0; | 740 | return 0; |
730 | } | 741 | } |
731 | 742 | ||
@@ -1104,15 +1115,18 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_ | |||
1104 | struct request_queue * const q = device->rq_queue; | 1115 | struct request_queue * const q = device->rq_queue; |
1105 | unsigned int max_hw_sectors = max_bio_size >> 9; | 1116 | unsigned int max_hw_sectors = max_bio_size >> 9; |
1106 | unsigned int max_segments = 0; | 1117 | unsigned int max_segments = 0; |
1118 | struct request_queue *b = NULL; | ||
1107 | 1119 | ||
1108 | if (get_ldev_if_state(device, D_ATTACHING)) { | 1120 | if (get_ldev_if_state(device, D_ATTACHING)) { |
1109 | struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue; | 1121 | b = device->ldev->backing_bdev->bd_disk->queue; |
1110 | 1122 | ||
1111 | max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); | 1123 | max_hw_sectors = min(queue_max_hw_sectors(b), max_bio_size >> 9); |
1112 | rcu_read_lock(); | 1124 | rcu_read_lock(); |
1113 | max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; | 1125 | max_segments = rcu_dereference(device->ldev->disk_conf)->max_bio_bvecs; |
1114 | rcu_read_unlock(); | 1126 | rcu_read_unlock(); |
1115 | put_ldev(device); | 1127 | |
1128 | blk_set_stacking_limits(&q->limits); | ||
1129 | blk_queue_max_write_same_sectors(q, 0); | ||
1116 | } | 1130 | } |
1117 | 1131 | ||
1118 | blk_queue_logical_block_size(q, 512); | 1132 | blk_queue_logical_block_size(q, 512); |
@@ -1121,8 +1135,25 @@ static void drbd_setup_queue_param(struct drbd_device *device, unsigned int max_ | |||
1121 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); | 1135 | blk_queue_max_segments(q, max_segments ? max_segments : BLK_MAX_SEGMENTS); |
1122 | blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); | 1136 | blk_queue_segment_boundary(q, PAGE_CACHE_SIZE-1); |
1123 | 1137 | ||
1124 | if (get_ldev_if_state(device, D_ATTACHING)) { | 1138 | if (b) { |
1125 | struct request_queue * const b = device->ldev->backing_bdev->bd_disk->queue; | 1139 | struct drbd_connection *connection = first_peer_device(device)->connection; |
1140 | |||
1141 | if (blk_queue_discard(b) && | ||
1142 | (connection->cstate < C_CONNECTED || connection->agreed_features & FF_TRIM)) { | ||
1143 | /* For now, don't allow more than one activity log extent worth of data | ||
1144 | * to be discarded in one go. We may need to rework drbd_al_begin_io() | ||
1145 | * to allow for even larger discard ranges */ | ||
1146 | q->limits.max_discard_sectors = DRBD_MAX_DISCARD_SECTORS; | ||
1147 | |||
1148 | queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); | ||
1149 | /* REALLY? Is stacking secdiscard "legal"? */ | ||
1150 | if (blk_queue_secdiscard(b)) | ||
1151 | queue_flag_set_unlocked(QUEUE_FLAG_SECDISCARD, q); | ||
1152 | } else { | ||
1153 | q->limits.max_discard_sectors = 0; | ||
1154 | queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); | ||
1155 | queue_flag_clear_unlocked(QUEUE_FLAG_SECDISCARD, q); | ||
1156 | } | ||
1126 | 1157 | ||
1127 | blk_queue_stack_limits(q, b); | 1158 | blk_queue_stack_limits(q, b); |
1128 | 1159 | ||
@@ -1164,8 +1195,14 @@ void drbd_reconsider_max_bio_size(struct drbd_device *device) | |||
1164 | peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ | 1195 | peer = DRBD_MAX_BIO_SIZE_P95; /* drbd 8.3.8 onwards, before 8.4.0 */ |
1165 | else | 1196 | else |
1166 | peer = DRBD_MAX_BIO_SIZE; | 1197 | peer = DRBD_MAX_BIO_SIZE; |
1167 | } | ||
1168 | 1198 | ||
1199 | /* We may later detach and re-attach on a disconnected Primary. | ||
1200 | * Avoid this setting to jump back in that case. | ||
1201 | * We want to store what we know the peer DRBD can handle, | ||
1202 | * not what the peer IO backend can handle. */ | ||
1203 | if (peer > device->peer_max_bio_size) | ||
1204 | device->peer_max_bio_size = peer; | ||
1205 | } | ||
1169 | new = min(local, peer); | 1206 | new = min(local, peer); |
1170 | 1207 | ||
1171 | if (device->state.role == R_PRIMARY && new < now) | 1208 | if (device->state.role == R_PRIMARY && new < now) |
@@ -1258,19 +1295,21 @@ static unsigned int drbd_al_extents_max(struct drbd_backing_dev *bdev) | |||
1258 | 1295 | ||
1259 | int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | 1296 | int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) |
1260 | { | 1297 | { |
1298 | struct drbd_config_context adm_ctx; | ||
1261 | enum drbd_ret_code retcode; | 1299 | enum drbd_ret_code retcode; |
1262 | struct drbd_device *device; | 1300 | struct drbd_device *device; |
1263 | struct disk_conf *new_disk_conf, *old_disk_conf; | 1301 | struct disk_conf *new_disk_conf, *old_disk_conf; |
1264 | struct fifo_buffer *old_plan = NULL, *new_plan = NULL; | 1302 | struct fifo_buffer *old_plan = NULL, *new_plan = NULL; |
1265 | int err, fifo_size; | 1303 | int err, fifo_size; |
1266 | 1304 | ||
1267 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 1305 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
1268 | if (!adm_ctx.reply_skb) | 1306 | if (!adm_ctx.reply_skb) |
1269 | return retcode; | 1307 | return retcode; |
1270 | if (retcode != NO_ERROR) | 1308 | if (retcode != NO_ERROR) |
1271 | goto out; | 1309 | goto finish; |
1272 | 1310 | ||
1273 | device = adm_ctx.device; | 1311 | device = adm_ctx.device; |
1312 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
1274 | 1313 | ||
1275 | /* we also need a disk | 1314 | /* we also need a disk |
1276 | * to change the options on */ | 1315 | * to change the options on */ |
@@ -1294,7 +1333,7 @@ int drbd_adm_disk_opts(struct sk_buff *skb, struct genl_info *info) | |||
1294 | err = disk_conf_from_attrs_for_change(new_disk_conf, info); | 1333 | err = disk_conf_from_attrs_for_change(new_disk_conf, info); |
1295 | if (err && err != -ENOMSG) { | 1334 | if (err && err != -ENOMSG) { |
1296 | retcode = ERR_MANDATORY_TAG; | 1335 | retcode = ERR_MANDATORY_TAG; |
1297 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 1336 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
1298 | goto fail_unlock; | 1337 | goto fail_unlock; |
1299 | } | 1338 | } |
1300 | 1339 | ||
@@ -1385,12 +1424,15 @@ fail_unlock: | |||
1385 | success: | 1424 | success: |
1386 | put_ldev(device); | 1425 | put_ldev(device); |
1387 | out: | 1426 | out: |
1388 | drbd_adm_finish(info, retcode); | 1427 | mutex_unlock(&adm_ctx.resource->adm_mutex); |
1428 | finish: | ||
1429 | drbd_adm_finish(&adm_ctx, info, retcode); | ||
1389 | return 0; | 1430 | return 0; |
1390 | } | 1431 | } |
1391 | 1432 | ||
1392 | int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | 1433 | int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) |
1393 | { | 1434 | { |
1435 | struct drbd_config_context adm_ctx; | ||
1394 | struct drbd_device *device; | 1436 | struct drbd_device *device; |
1395 | int err; | 1437 | int err; |
1396 | enum drbd_ret_code retcode; | 1438 | enum drbd_ret_code retcode; |
@@ -1406,13 +1448,14 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1406 | enum drbd_state_rv rv; | 1448 | enum drbd_state_rv rv; |
1407 | struct net_conf *nc; | 1449 | struct net_conf *nc; |
1408 | 1450 | ||
1409 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 1451 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
1410 | if (!adm_ctx.reply_skb) | 1452 | if (!adm_ctx.reply_skb) |
1411 | return retcode; | 1453 | return retcode; |
1412 | if (retcode != NO_ERROR) | 1454 | if (retcode != NO_ERROR) |
1413 | goto finish; | 1455 | goto finish; |
1414 | 1456 | ||
1415 | device = adm_ctx.device; | 1457 | device = adm_ctx.device; |
1458 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
1416 | conn_reconfig_start(first_peer_device(device)->connection); | 1459 | conn_reconfig_start(first_peer_device(device)->connection); |
1417 | 1460 | ||
1418 | /* if you want to reconfigure, please tear down first */ | 1461 | /* if you want to reconfigure, please tear down first */ |
@@ -1455,7 +1498,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1455 | err = disk_conf_from_attrs(new_disk_conf, info); | 1498 | err = disk_conf_from_attrs(new_disk_conf, info); |
1456 | if (err) { | 1499 | if (err) { |
1457 | retcode = ERR_MANDATORY_TAG; | 1500 | retcode = ERR_MANDATORY_TAG; |
1458 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 1501 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
1459 | goto fail; | 1502 | goto fail; |
1460 | } | 1503 | } |
1461 | 1504 | ||
@@ -1619,7 +1662,7 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1619 | } | 1662 | } |
1620 | 1663 | ||
1621 | if (device->state.conn < C_CONNECTED && | 1664 | if (device->state.conn < C_CONNECTED && |
1622 | device->state.role == R_PRIMARY && | 1665 | device->state.role == R_PRIMARY && device->ed_uuid && |
1623 | (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { | 1666 | (device->ed_uuid & ~((u64)1)) != (nbc->md.uuid[UI_CURRENT] & ~((u64)1))) { |
1624 | drbd_err(device, "Can only attach to data with current UUID=%016llX\n", | 1667 | drbd_err(device, "Can only attach to data with current UUID=%016llX\n", |
1625 | (unsigned long long)device->ed_uuid); | 1668 | (unsigned long long)device->ed_uuid); |
@@ -1797,7 +1840,8 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1797 | kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); | 1840 | kobject_uevent(&disk_to_dev(device->vdisk)->kobj, KOBJ_CHANGE); |
1798 | put_ldev(device); | 1841 | put_ldev(device); |
1799 | conn_reconfig_done(first_peer_device(device)->connection); | 1842 | conn_reconfig_done(first_peer_device(device)->connection); |
1800 | drbd_adm_finish(info, retcode); | 1843 | mutex_unlock(&adm_ctx.resource->adm_mutex); |
1844 | drbd_adm_finish(&adm_ctx, info, retcode); | ||
1801 | return 0; | 1845 | return 0; |
1802 | 1846 | ||
1803 | force_diskless_dec: | 1847 | force_diskless_dec: |
@@ -1819,9 +1863,9 @@ int drbd_adm_attach(struct sk_buff *skb, struct genl_info *info) | |||
1819 | kfree(new_disk_conf); | 1863 | kfree(new_disk_conf); |
1820 | lc_destroy(resync_lru); | 1864 | lc_destroy(resync_lru); |
1821 | kfree(new_plan); | 1865 | kfree(new_plan); |
1822 | 1866 | mutex_unlock(&adm_ctx.resource->adm_mutex); | |
1823 | finish: | 1867 | finish: |
1824 | drbd_adm_finish(info, retcode); | 1868 | drbd_adm_finish(&adm_ctx, info, retcode); |
1825 | return 0; | 1869 | return 0; |
1826 | } | 1870 | } |
1827 | 1871 | ||
@@ -1860,11 +1904,12 @@ out: | |||
1860 | * Only then we have finally detached. */ | 1904 | * Only then we have finally detached. */ |
1861 | int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) | 1905 | int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) |
1862 | { | 1906 | { |
1907 | struct drbd_config_context adm_ctx; | ||
1863 | enum drbd_ret_code retcode; | 1908 | enum drbd_ret_code retcode; |
1864 | struct detach_parms parms = { }; | 1909 | struct detach_parms parms = { }; |
1865 | int err; | 1910 | int err; |
1866 | 1911 | ||
1867 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 1912 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
1868 | if (!adm_ctx.reply_skb) | 1913 | if (!adm_ctx.reply_skb) |
1869 | return retcode; | 1914 | return retcode; |
1870 | if (retcode != NO_ERROR) | 1915 | if (retcode != NO_ERROR) |
@@ -1874,14 +1919,16 @@ int drbd_adm_detach(struct sk_buff *skb, struct genl_info *info) | |||
1874 | err = detach_parms_from_attrs(&parms, info); | 1919 | err = detach_parms_from_attrs(&parms, info); |
1875 | if (err) { | 1920 | if (err) { |
1876 | retcode = ERR_MANDATORY_TAG; | 1921 | retcode = ERR_MANDATORY_TAG; |
1877 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 1922 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
1878 | goto out; | 1923 | goto out; |
1879 | } | 1924 | } |
1880 | } | 1925 | } |
1881 | 1926 | ||
1927 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
1882 | retcode = adm_detach(adm_ctx.device, parms.force_detach); | 1928 | retcode = adm_detach(adm_ctx.device, parms.force_detach); |
1929 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
1883 | out: | 1930 | out: |
1884 | drbd_adm_finish(info, retcode); | 1931 | drbd_adm_finish(&adm_ctx, info, retcode); |
1885 | return 0; | 1932 | return 0; |
1886 | } | 1933 | } |
1887 | 1934 | ||
@@ -2055,6 +2102,7 @@ static void free_crypto(struct crypto *crypto) | |||
2055 | 2102 | ||
2056 | int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) | 2103 | int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) |
2057 | { | 2104 | { |
2105 | struct drbd_config_context adm_ctx; | ||
2058 | enum drbd_ret_code retcode; | 2106 | enum drbd_ret_code retcode; |
2059 | struct drbd_connection *connection; | 2107 | struct drbd_connection *connection; |
2060 | struct net_conf *old_net_conf, *new_net_conf = NULL; | 2108 | struct net_conf *old_net_conf, *new_net_conf = NULL; |
@@ -2063,13 +2111,14 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) | |||
2063 | int rsr; /* re-sync running */ | 2111 | int rsr; /* re-sync running */ |
2064 | struct crypto crypto = { }; | 2112 | struct crypto crypto = { }; |
2065 | 2113 | ||
2066 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); | 2114 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION); |
2067 | if (!adm_ctx.reply_skb) | 2115 | if (!adm_ctx.reply_skb) |
2068 | return retcode; | 2116 | return retcode; |
2069 | if (retcode != NO_ERROR) | 2117 | if (retcode != NO_ERROR) |
2070 | goto out; | 2118 | goto finish; |
2071 | 2119 | ||
2072 | connection = adm_ctx.connection; | 2120 | connection = adm_ctx.connection; |
2121 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2073 | 2122 | ||
2074 | new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); | 2123 | new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL); |
2075 | if (!new_net_conf) { | 2124 | if (!new_net_conf) { |
@@ -2084,7 +2133,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) | |||
2084 | old_net_conf = connection->net_conf; | 2133 | old_net_conf = connection->net_conf; |
2085 | 2134 | ||
2086 | if (!old_net_conf) { | 2135 | if (!old_net_conf) { |
2087 | drbd_msg_put_info("net conf missing, try connect"); | 2136 | drbd_msg_put_info(adm_ctx.reply_skb, "net conf missing, try connect"); |
2088 | retcode = ERR_INVALID_REQUEST; | 2137 | retcode = ERR_INVALID_REQUEST; |
2089 | goto fail; | 2138 | goto fail; |
2090 | } | 2139 | } |
@@ -2096,7 +2145,7 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) | |||
2096 | err = net_conf_from_attrs_for_change(new_net_conf, info); | 2145 | err = net_conf_from_attrs_for_change(new_net_conf, info); |
2097 | if (err && err != -ENOMSG) { | 2146 | if (err && err != -ENOMSG) { |
2098 | retcode = ERR_MANDATORY_TAG; | 2147 | retcode = ERR_MANDATORY_TAG; |
2099 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 2148 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
2100 | goto fail; | 2149 | goto fail; |
2101 | } | 2150 | } |
2102 | 2151 | ||
@@ -2167,12 +2216,15 @@ int drbd_adm_net_opts(struct sk_buff *skb, struct genl_info *info) | |||
2167 | done: | 2216 | done: |
2168 | conn_reconfig_done(connection); | 2217 | conn_reconfig_done(connection); |
2169 | out: | 2218 | out: |
2170 | drbd_adm_finish(info, retcode); | 2219 | mutex_unlock(&adm_ctx.resource->adm_mutex); |
2220 | finish: | ||
2221 | drbd_adm_finish(&adm_ctx, info, retcode); | ||
2171 | return 0; | 2222 | return 0; |
2172 | } | 2223 | } |
2173 | 2224 | ||
2174 | int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) | 2225 | int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) |
2175 | { | 2226 | { |
2227 | struct drbd_config_context adm_ctx; | ||
2176 | struct drbd_peer_device *peer_device; | 2228 | struct drbd_peer_device *peer_device; |
2177 | struct net_conf *old_net_conf, *new_net_conf = NULL; | 2229 | struct net_conf *old_net_conf, *new_net_conf = NULL; |
2178 | struct crypto crypto = { }; | 2230 | struct crypto crypto = { }; |
@@ -2182,14 +2234,14 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) | |||
2182 | int i; | 2234 | int i; |
2183 | int err; | 2235 | int err; |
2184 | 2236 | ||
2185 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); | 2237 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE); |
2186 | 2238 | ||
2187 | if (!adm_ctx.reply_skb) | 2239 | if (!adm_ctx.reply_skb) |
2188 | return retcode; | 2240 | return retcode; |
2189 | if (retcode != NO_ERROR) | 2241 | if (retcode != NO_ERROR) |
2190 | goto out; | 2242 | goto out; |
2191 | if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) { | 2243 | if (!(adm_ctx.my_addr && adm_ctx.peer_addr)) { |
2192 | drbd_msg_put_info("connection endpoint(s) missing"); | 2244 | drbd_msg_put_info(adm_ctx.reply_skb, "connection endpoint(s) missing"); |
2193 | retcode = ERR_INVALID_REQUEST; | 2245 | retcode = ERR_INVALID_REQUEST; |
2194 | goto out; | 2246 | goto out; |
2195 | } | 2247 | } |
@@ -2215,6 +2267,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) | |||
2215 | } | 2267 | } |
2216 | } | 2268 | } |
2217 | 2269 | ||
2270 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2218 | connection = first_connection(adm_ctx.resource); | 2271 | connection = first_connection(adm_ctx.resource); |
2219 | conn_reconfig_start(connection); | 2272 | conn_reconfig_start(connection); |
2220 | 2273 | ||
@@ -2235,7 +2288,7 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) | |||
2235 | err = net_conf_from_attrs(new_net_conf, info); | 2288 | err = net_conf_from_attrs(new_net_conf, info); |
2236 | if (err && err != -ENOMSG) { | 2289 | if (err && err != -ENOMSG) { |
2237 | retcode = ERR_MANDATORY_TAG; | 2290 | retcode = ERR_MANDATORY_TAG; |
2238 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 2291 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
2239 | goto fail; | 2292 | goto fail; |
2240 | } | 2293 | } |
2241 | 2294 | ||
@@ -2284,7 +2337,8 @@ int drbd_adm_connect(struct sk_buff *skb, struct genl_info *info) | |||
2284 | retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); | 2337 | retcode = conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE); |
2285 | 2338 | ||
2286 | conn_reconfig_done(connection); | 2339 | conn_reconfig_done(connection); |
2287 | drbd_adm_finish(info, retcode); | 2340 | mutex_unlock(&adm_ctx.resource->adm_mutex); |
2341 | drbd_adm_finish(&adm_ctx, info, retcode); | ||
2288 | return 0; | 2342 | return 0; |
2289 | 2343 | ||
2290 | fail: | 2344 | fail: |
@@ -2292,8 +2346,9 @@ fail: | |||
2292 | kfree(new_net_conf); | 2346 | kfree(new_net_conf); |
2293 | 2347 | ||
2294 | conn_reconfig_done(connection); | 2348 | conn_reconfig_done(connection); |
2349 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
2295 | out: | 2350 | out: |
2296 | drbd_adm_finish(info, retcode); | 2351 | drbd_adm_finish(&adm_ctx, info, retcode); |
2297 | return 0; | 2352 | return 0; |
2298 | } | 2353 | } |
2299 | 2354 | ||
@@ -2356,13 +2411,14 @@ static enum drbd_state_rv conn_try_disconnect(struct drbd_connection *connection | |||
2356 | 2411 | ||
2357 | int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) | 2412 | int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) |
2358 | { | 2413 | { |
2414 | struct drbd_config_context adm_ctx; | ||
2359 | struct disconnect_parms parms; | 2415 | struct disconnect_parms parms; |
2360 | struct drbd_connection *connection; | 2416 | struct drbd_connection *connection; |
2361 | enum drbd_state_rv rv; | 2417 | enum drbd_state_rv rv; |
2362 | enum drbd_ret_code retcode; | 2418 | enum drbd_ret_code retcode; |
2363 | int err; | 2419 | int err; |
2364 | 2420 | ||
2365 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_CONNECTION); | 2421 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_CONNECTION); |
2366 | if (!adm_ctx.reply_skb) | 2422 | if (!adm_ctx.reply_skb) |
2367 | return retcode; | 2423 | return retcode; |
2368 | if (retcode != NO_ERROR) | 2424 | if (retcode != NO_ERROR) |
@@ -2374,18 +2430,20 @@ int drbd_adm_disconnect(struct sk_buff *skb, struct genl_info *info) | |||
2374 | err = disconnect_parms_from_attrs(&parms, info); | 2430 | err = disconnect_parms_from_attrs(&parms, info); |
2375 | if (err) { | 2431 | if (err) { |
2376 | retcode = ERR_MANDATORY_TAG; | 2432 | retcode = ERR_MANDATORY_TAG; |
2377 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 2433 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
2378 | goto fail; | 2434 | goto fail; |
2379 | } | 2435 | } |
2380 | } | 2436 | } |
2381 | 2437 | ||
2438 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2382 | rv = conn_try_disconnect(connection, parms.force_disconnect); | 2439 | rv = conn_try_disconnect(connection, parms.force_disconnect); |
2383 | if (rv < SS_SUCCESS) | 2440 | if (rv < SS_SUCCESS) |
2384 | retcode = rv; /* FIXME: Type mismatch. */ | 2441 | retcode = rv; /* FIXME: Type mismatch. */ |
2385 | else | 2442 | else |
2386 | retcode = NO_ERROR; | 2443 | retcode = NO_ERROR; |
2444 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
2387 | fail: | 2445 | fail: |
2388 | drbd_adm_finish(info, retcode); | 2446 | drbd_adm_finish(&adm_ctx, info, retcode); |
2389 | return 0; | 2447 | return 0; |
2390 | } | 2448 | } |
2391 | 2449 | ||
@@ -2407,6 +2465,7 @@ void resync_after_online_grow(struct drbd_device *device) | |||
2407 | 2465 | ||
2408 | int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) | 2466 | int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) |
2409 | { | 2467 | { |
2468 | struct drbd_config_context adm_ctx; | ||
2410 | struct disk_conf *old_disk_conf, *new_disk_conf = NULL; | 2469 | struct disk_conf *old_disk_conf, *new_disk_conf = NULL; |
2411 | struct resize_parms rs; | 2470 | struct resize_parms rs; |
2412 | struct drbd_device *device; | 2471 | struct drbd_device *device; |
@@ -2417,12 +2476,13 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) | |||
2417 | sector_t u_size; | 2476 | sector_t u_size; |
2418 | int err; | 2477 | int err; |
2419 | 2478 | ||
2420 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 2479 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
2421 | if (!adm_ctx.reply_skb) | 2480 | if (!adm_ctx.reply_skb) |
2422 | return retcode; | 2481 | return retcode; |
2423 | if (retcode != NO_ERROR) | 2482 | if (retcode != NO_ERROR) |
2424 | goto fail; | 2483 | goto finish; |
2425 | 2484 | ||
2485 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2426 | device = adm_ctx.device; | 2486 | device = adm_ctx.device; |
2427 | if (!get_ldev(device)) { | 2487 | if (!get_ldev(device)) { |
2428 | retcode = ERR_NO_DISK; | 2488 | retcode = ERR_NO_DISK; |
@@ -2436,7 +2496,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) | |||
2436 | err = resize_parms_from_attrs(&rs, info); | 2496 | err = resize_parms_from_attrs(&rs, info); |
2437 | if (err) { | 2497 | if (err) { |
2438 | retcode = ERR_MANDATORY_TAG; | 2498 | retcode = ERR_MANDATORY_TAG; |
2439 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 2499 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
2440 | goto fail_ldev; | 2500 | goto fail_ldev; |
2441 | } | 2501 | } |
2442 | } | 2502 | } |
@@ -2482,7 +2542,7 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) | |||
2482 | goto fail_ldev; | 2542 | goto fail_ldev; |
2483 | } | 2543 | } |
2484 | 2544 | ||
2485 | if (device->state.conn != C_CONNECTED) { | 2545 | if (device->state.conn != C_CONNECTED && !rs.resize_force) { |
2486 | retcode = ERR_MD_LAYOUT_CONNECTED; | 2546 | retcode = ERR_MD_LAYOUT_CONNECTED; |
2487 | goto fail_ldev; | 2547 | goto fail_ldev; |
2488 | } | 2548 | } |
@@ -2528,7 +2588,9 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) | |||
2528 | } | 2588 | } |
2529 | 2589 | ||
2530 | fail: | 2590 | fail: |
2531 | drbd_adm_finish(info, retcode); | 2591 | mutex_unlock(&adm_ctx.resource->adm_mutex); |
2592 | finish: | ||
2593 | drbd_adm_finish(&adm_ctx, info, retcode); | ||
2532 | return 0; | 2594 | return 0; |
2533 | 2595 | ||
2534 | fail_ldev: | 2596 | fail_ldev: |
@@ -2538,11 +2600,12 @@ int drbd_adm_resize(struct sk_buff *skb, struct genl_info *info) | |||
2538 | 2600 | ||
2539 | int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) | 2601 | int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) |
2540 | { | 2602 | { |
2603 | struct drbd_config_context adm_ctx; | ||
2541 | enum drbd_ret_code retcode; | 2604 | enum drbd_ret_code retcode; |
2542 | struct res_opts res_opts; | 2605 | struct res_opts res_opts; |
2543 | int err; | 2606 | int err; |
2544 | 2607 | ||
2545 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); | 2608 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE); |
2546 | if (!adm_ctx.reply_skb) | 2609 | if (!adm_ctx.reply_skb) |
2547 | return retcode; | 2610 | return retcode; |
2548 | if (retcode != NO_ERROR) | 2611 | if (retcode != NO_ERROR) |
@@ -2555,33 +2618,37 @@ int drbd_adm_resource_opts(struct sk_buff *skb, struct genl_info *info) | |||
2555 | err = res_opts_from_attrs(&res_opts, info); | 2618 | err = res_opts_from_attrs(&res_opts, info); |
2556 | if (err && err != -ENOMSG) { | 2619 | if (err && err != -ENOMSG) { |
2557 | retcode = ERR_MANDATORY_TAG; | 2620 | retcode = ERR_MANDATORY_TAG; |
2558 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 2621 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
2559 | goto fail; | 2622 | goto fail; |
2560 | } | 2623 | } |
2561 | 2624 | ||
2625 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2562 | err = set_resource_options(adm_ctx.resource, &res_opts); | 2626 | err = set_resource_options(adm_ctx.resource, &res_opts); |
2563 | if (err) { | 2627 | if (err) { |
2564 | retcode = ERR_INVALID_REQUEST; | 2628 | retcode = ERR_INVALID_REQUEST; |
2565 | if (err == -ENOMEM) | 2629 | if (err == -ENOMEM) |
2566 | retcode = ERR_NOMEM; | 2630 | retcode = ERR_NOMEM; |
2567 | } | 2631 | } |
2632 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
2568 | 2633 | ||
2569 | fail: | 2634 | fail: |
2570 | drbd_adm_finish(info, retcode); | 2635 | drbd_adm_finish(&adm_ctx, info, retcode); |
2571 | return 0; | 2636 | return 0; |
2572 | } | 2637 | } |
2573 | 2638 | ||
2574 | int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) | 2639 | int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) |
2575 | { | 2640 | { |
2641 | struct drbd_config_context adm_ctx; | ||
2576 | struct drbd_device *device; | 2642 | struct drbd_device *device; |
2577 | int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ | 2643 | int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ |
2578 | 2644 | ||
2579 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 2645 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
2580 | if (!adm_ctx.reply_skb) | 2646 | if (!adm_ctx.reply_skb) |
2581 | return retcode; | 2647 | return retcode; |
2582 | if (retcode != NO_ERROR) | 2648 | if (retcode != NO_ERROR) |
2583 | goto out; | 2649 | goto out; |
2584 | 2650 | ||
2651 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2585 | device = adm_ctx.device; | 2652 | device = adm_ctx.device; |
2586 | 2653 | ||
2587 | /* If there is still bitmap IO pending, probably because of a previous | 2654 | /* If there is still bitmap IO pending, probably because of a previous |
@@ -2605,26 +2672,29 @@ int drbd_adm_invalidate(struct sk_buff *skb, struct genl_info *info) | |||
2605 | } else | 2672 | } else |
2606 | retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); | 2673 | retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_T)); |
2607 | drbd_resume_io(device); | 2674 | drbd_resume_io(device); |
2608 | 2675 | mutex_unlock(&adm_ctx.resource->adm_mutex); | |
2609 | out: | 2676 | out: |
2610 | drbd_adm_finish(info, retcode); | 2677 | drbd_adm_finish(&adm_ctx, info, retcode); |
2611 | return 0; | 2678 | return 0; |
2612 | } | 2679 | } |
2613 | 2680 | ||
2614 | static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, | 2681 | static int drbd_adm_simple_request_state(struct sk_buff *skb, struct genl_info *info, |
2615 | union drbd_state mask, union drbd_state val) | 2682 | union drbd_state mask, union drbd_state val) |
2616 | { | 2683 | { |
2684 | struct drbd_config_context adm_ctx; | ||
2617 | enum drbd_ret_code retcode; | 2685 | enum drbd_ret_code retcode; |
2618 | 2686 | ||
2619 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 2687 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
2620 | if (!adm_ctx.reply_skb) | 2688 | if (!adm_ctx.reply_skb) |
2621 | return retcode; | 2689 | return retcode; |
2622 | if (retcode != NO_ERROR) | 2690 | if (retcode != NO_ERROR) |
2623 | goto out; | 2691 | goto out; |
2624 | 2692 | ||
2693 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2625 | retcode = drbd_request_state(adm_ctx.device, mask, val); | 2694 | retcode = drbd_request_state(adm_ctx.device, mask, val); |
2695 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
2626 | out: | 2696 | out: |
2627 | drbd_adm_finish(info, retcode); | 2697 | drbd_adm_finish(&adm_ctx, info, retcode); |
2628 | return 0; | 2698 | return 0; |
2629 | } | 2699 | } |
2630 | 2700 | ||
@@ -2639,15 +2709,17 @@ static int drbd_bmio_set_susp_al(struct drbd_device *device) | |||
2639 | 2709 | ||
2640 | int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) | 2710 | int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) |
2641 | { | 2711 | { |
2712 | struct drbd_config_context adm_ctx; | ||
2642 | int retcode; /* drbd_ret_code, drbd_state_rv */ | 2713 | int retcode; /* drbd_ret_code, drbd_state_rv */ |
2643 | struct drbd_device *device; | 2714 | struct drbd_device *device; |
2644 | 2715 | ||
2645 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 2716 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
2646 | if (!adm_ctx.reply_skb) | 2717 | if (!adm_ctx.reply_skb) |
2647 | return retcode; | 2718 | return retcode; |
2648 | if (retcode != NO_ERROR) | 2719 | if (retcode != NO_ERROR) |
2649 | goto out; | 2720 | goto out; |
2650 | 2721 | ||
2722 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2651 | device = adm_ctx.device; | 2723 | device = adm_ctx.device; |
2652 | 2724 | ||
2653 | /* If there is still bitmap IO pending, probably because of a previous | 2725 | /* If there is still bitmap IO pending, probably because of a previous |
@@ -2674,40 +2746,45 @@ int drbd_adm_invalidate_peer(struct sk_buff *skb, struct genl_info *info) | |||
2674 | } else | 2746 | } else |
2675 | retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); | 2747 | retcode = drbd_request_state(device, NS(conn, C_STARTING_SYNC_S)); |
2676 | drbd_resume_io(device); | 2748 | drbd_resume_io(device); |
2677 | 2749 | mutex_unlock(&adm_ctx.resource->adm_mutex); | |
2678 | out: | 2750 | out: |
2679 | drbd_adm_finish(info, retcode); | 2751 | drbd_adm_finish(&adm_ctx, info, retcode); |
2680 | return 0; | 2752 | return 0; |
2681 | } | 2753 | } |
2682 | 2754 | ||
2683 | int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) | 2755 | int drbd_adm_pause_sync(struct sk_buff *skb, struct genl_info *info) |
2684 | { | 2756 | { |
2757 | struct drbd_config_context adm_ctx; | ||
2685 | enum drbd_ret_code retcode; | 2758 | enum drbd_ret_code retcode; |
2686 | 2759 | ||
2687 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 2760 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
2688 | if (!adm_ctx.reply_skb) | 2761 | if (!adm_ctx.reply_skb) |
2689 | return retcode; | 2762 | return retcode; |
2690 | if (retcode != NO_ERROR) | 2763 | if (retcode != NO_ERROR) |
2691 | goto out; | 2764 | goto out; |
2692 | 2765 | ||
2766 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2693 | if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO) | 2767 | if (drbd_request_state(adm_ctx.device, NS(user_isp, 1)) == SS_NOTHING_TO_DO) |
2694 | retcode = ERR_PAUSE_IS_SET; | 2768 | retcode = ERR_PAUSE_IS_SET; |
2769 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
2695 | out: | 2770 | out: |
2696 | drbd_adm_finish(info, retcode); | 2771 | drbd_adm_finish(&adm_ctx, info, retcode); |
2697 | return 0; | 2772 | return 0; |
2698 | } | 2773 | } |
2699 | 2774 | ||
2700 | int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) | 2775 | int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) |
2701 | { | 2776 | { |
2777 | struct drbd_config_context adm_ctx; | ||
2702 | union drbd_dev_state s; | 2778 | union drbd_dev_state s; |
2703 | enum drbd_ret_code retcode; | 2779 | enum drbd_ret_code retcode; |
2704 | 2780 | ||
2705 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 2781 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
2706 | if (!adm_ctx.reply_skb) | 2782 | if (!adm_ctx.reply_skb) |
2707 | return retcode; | 2783 | return retcode; |
2708 | if (retcode != NO_ERROR) | 2784 | if (retcode != NO_ERROR) |
2709 | goto out; | 2785 | goto out; |
2710 | 2786 | ||
2787 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2711 | if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { | 2788 | if (drbd_request_state(adm_ctx.device, NS(user_isp, 0)) == SS_NOTHING_TO_DO) { |
2712 | s = adm_ctx.device->state; | 2789 | s = adm_ctx.device->state; |
2713 | if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { | 2790 | if (s.conn == C_PAUSED_SYNC_S || s.conn == C_PAUSED_SYNC_T) { |
@@ -2717,9 +2794,9 @@ int drbd_adm_resume_sync(struct sk_buff *skb, struct genl_info *info) | |||
2717 | retcode = ERR_PAUSE_IS_CLEAR; | 2794 | retcode = ERR_PAUSE_IS_CLEAR; |
2718 | } | 2795 | } |
2719 | } | 2796 | } |
2720 | 2797 | mutex_unlock(&adm_ctx.resource->adm_mutex); | |
2721 | out: | 2798 | out: |
2722 | drbd_adm_finish(info, retcode); | 2799 | drbd_adm_finish(&adm_ctx, info, retcode); |
2723 | return 0; | 2800 | return 0; |
2724 | } | 2801 | } |
2725 | 2802 | ||
@@ -2730,15 +2807,17 @@ int drbd_adm_suspend_io(struct sk_buff *skb, struct genl_info *info) | |||
2730 | 2807 | ||
2731 | int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) | 2808 | int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) |
2732 | { | 2809 | { |
2810 | struct drbd_config_context adm_ctx; | ||
2733 | struct drbd_device *device; | 2811 | struct drbd_device *device; |
2734 | int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ | 2812 | int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ |
2735 | 2813 | ||
2736 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 2814 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
2737 | if (!adm_ctx.reply_skb) | 2815 | if (!adm_ctx.reply_skb) |
2738 | return retcode; | 2816 | return retcode; |
2739 | if (retcode != NO_ERROR) | 2817 | if (retcode != NO_ERROR) |
2740 | goto out; | 2818 | goto out; |
2741 | 2819 | ||
2820 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
2742 | device = adm_ctx.device; | 2821 | device = adm_ctx.device; |
2743 | if (test_bit(NEW_CUR_UUID, &device->flags)) { | 2822 | if (test_bit(NEW_CUR_UUID, &device->flags)) { |
2744 | drbd_uuid_new_current(device); | 2823 | drbd_uuid_new_current(device); |
@@ -2753,9 +2832,9 @@ int drbd_adm_resume_io(struct sk_buff *skb, struct genl_info *info) | |||
2753 | tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO); | 2832 | tl_restart(first_peer_device(device)->connection, FAIL_FROZEN_DISK_IO); |
2754 | } | 2833 | } |
2755 | drbd_resume_io(device); | 2834 | drbd_resume_io(device); |
2756 | 2835 | mutex_unlock(&adm_ctx.resource->adm_mutex); | |
2757 | out: | 2836 | out: |
2758 | drbd_adm_finish(info, retcode); | 2837 | drbd_adm_finish(&adm_ctx, info, retcode); |
2759 | return 0; | 2838 | return 0; |
2760 | } | 2839 | } |
2761 | 2840 | ||
@@ -2931,10 +3010,11 @@ nla_put_failure: | |||
2931 | 3010 | ||
2932 | int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info) | 3011 | int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info) |
2933 | { | 3012 | { |
3013 | struct drbd_config_context adm_ctx; | ||
2934 | enum drbd_ret_code retcode; | 3014 | enum drbd_ret_code retcode; |
2935 | int err; | 3015 | int err; |
2936 | 3016 | ||
2937 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 3017 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
2938 | if (!adm_ctx.reply_skb) | 3018 | if (!adm_ctx.reply_skb) |
2939 | return retcode; | 3019 | return retcode; |
2940 | if (retcode != NO_ERROR) | 3020 | if (retcode != NO_ERROR) |
@@ -2946,7 +3026,7 @@ int drbd_adm_get_status(struct sk_buff *skb, struct genl_info *info) | |||
2946 | return err; | 3026 | return err; |
2947 | } | 3027 | } |
2948 | out: | 3028 | out: |
2949 | drbd_adm_finish(info, retcode); | 3029 | drbd_adm_finish(&adm_ctx, info, retcode); |
2950 | return 0; | 3030 | return 0; |
2951 | } | 3031 | } |
2952 | 3032 | ||
@@ -3133,11 +3213,12 @@ dump: | |||
3133 | 3213 | ||
3134 | int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) | 3214 | int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) |
3135 | { | 3215 | { |
3216 | struct drbd_config_context adm_ctx; | ||
3136 | enum drbd_ret_code retcode; | 3217 | enum drbd_ret_code retcode; |
3137 | struct timeout_parms tp; | 3218 | struct timeout_parms tp; |
3138 | int err; | 3219 | int err; |
3139 | 3220 | ||
3140 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 3221 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
3141 | if (!adm_ctx.reply_skb) | 3222 | if (!adm_ctx.reply_skb) |
3142 | return retcode; | 3223 | return retcode; |
3143 | if (retcode != NO_ERROR) | 3224 | if (retcode != NO_ERROR) |
@@ -3154,17 +3235,18 @@ int drbd_adm_get_timeout_type(struct sk_buff *skb, struct genl_info *info) | |||
3154 | return err; | 3235 | return err; |
3155 | } | 3236 | } |
3156 | out: | 3237 | out: |
3157 | drbd_adm_finish(info, retcode); | 3238 | drbd_adm_finish(&adm_ctx, info, retcode); |
3158 | return 0; | 3239 | return 0; |
3159 | } | 3240 | } |
3160 | 3241 | ||
3161 | int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) | 3242 | int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) |
3162 | { | 3243 | { |
3244 | struct drbd_config_context adm_ctx; | ||
3163 | struct drbd_device *device; | 3245 | struct drbd_device *device; |
3164 | enum drbd_ret_code retcode; | 3246 | enum drbd_ret_code retcode; |
3165 | struct start_ov_parms parms; | 3247 | struct start_ov_parms parms; |
3166 | 3248 | ||
3167 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 3249 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
3168 | if (!adm_ctx.reply_skb) | 3250 | if (!adm_ctx.reply_skb) |
3169 | return retcode; | 3251 | return retcode; |
3170 | if (retcode != NO_ERROR) | 3252 | if (retcode != NO_ERROR) |
@@ -3179,10 +3261,12 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) | |||
3179 | int err = start_ov_parms_from_attrs(&parms, info); | 3261 | int err = start_ov_parms_from_attrs(&parms, info); |
3180 | if (err) { | 3262 | if (err) { |
3181 | retcode = ERR_MANDATORY_TAG; | 3263 | retcode = ERR_MANDATORY_TAG; |
3182 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 3264 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
3183 | goto out; | 3265 | goto out; |
3184 | } | 3266 | } |
3185 | } | 3267 | } |
3268 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
3269 | |||
3186 | /* w_make_ov_request expects position to be aligned */ | 3270 | /* w_make_ov_request expects position to be aligned */ |
3187 | device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); | 3271 | device->ov_start_sector = parms.ov_start_sector & ~(BM_SECT_PER_BIT-1); |
3188 | device->ov_stop_sector = parms.ov_stop_sector; | 3272 | device->ov_stop_sector = parms.ov_stop_sector; |
@@ -3193,21 +3277,24 @@ int drbd_adm_start_ov(struct sk_buff *skb, struct genl_info *info) | |||
3193 | wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); | 3277 | wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags)); |
3194 | retcode = drbd_request_state(device, NS(conn, C_VERIFY_S)); | 3278 | retcode = drbd_request_state(device, NS(conn, C_VERIFY_S)); |
3195 | drbd_resume_io(device); | 3279 | drbd_resume_io(device); |
3280 | |||
3281 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
3196 | out: | 3282 | out: |
3197 | drbd_adm_finish(info, retcode); | 3283 | drbd_adm_finish(&adm_ctx, info, retcode); |
3198 | return 0; | 3284 | return 0; |
3199 | } | 3285 | } |
3200 | 3286 | ||
3201 | 3287 | ||
3202 | int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) | 3288 | int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) |
3203 | { | 3289 | { |
3290 | struct drbd_config_context adm_ctx; | ||
3204 | struct drbd_device *device; | 3291 | struct drbd_device *device; |
3205 | enum drbd_ret_code retcode; | 3292 | enum drbd_ret_code retcode; |
3206 | int skip_initial_sync = 0; | 3293 | int skip_initial_sync = 0; |
3207 | int err; | 3294 | int err; |
3208 | struct new_c_uuid_parms args; | 3295 | struct new_c_uuid_parms args; |
3209 | 3296 | ||
3210 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 3297 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
3211 | if (!adm_ctx.reply_skb) | 3298 | if (!adm_ctx.reply_skb) |
3212 | return retcode; | 3299 | return retcode; |
3213 | if (retcode != NO_ERROR) | 3300 | if (retcode != NO_ERROR) |
@@ -3219,11 +3306,12 @@ int drbd_adm_new_c_uuid(struct sk_buff *skb, struct genl_info *info) | |||
3219 | err = new_c_uuid_parms_from_attrs(&args, info); | 3306 | err = new_c_uuid_parms_from_attrs(&args, info); |
3220 | if (err) { | 3307 | if (err) { |
3221 | retcode = ERR_MANDATORY_TAG; | 3308 | retcode = ERR_MANDATORY_TAG; |
3222 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 3309 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
3223 | goto out_nolock; | 3310 | goto out_nolock; |
3224 | } | 3311 | } |
3225 | } | 3312 | } |
3226 | 3313 | ||
3314 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
3227 | mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */ | 3315 | mutex_lock(device->state_mutex); /* Protects us against serialized state changes. */ |
3228 | 3316 | ||
3229 | if (!get_ldev(device)) { | 3317 | if (!get_ldev(device)) { |
@@ -3268,22 +3356,24 @@ out_dec: | |||
3268 | put_ldev(device); | 3356 | put_ldev(device); |
3269 | out: | 3357 | out: |
3270 | mutex_unlock(device->state_mutex); | 3358 | mutex_unlock(device->state_mutex); |
3359 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
3271 | out_nolock: | 3360 | out_nolock: |
3272 | drbd_adm_finish(info, retcode); | 3361 | drbd_adm_finish(&adm_ctx, info, retcode); |
3273 | return 0; | 3362 | return 0; |
3274 | } | 3363 | } |
3275 | 3364 | ||
3276 | static enum drbd_ret_code | 3365 | static enum drbd_ret_code |
3277 | drbd_check_resource_name(const char *name) | 3366 | drbd_check_resource_name(struct drbd_config_context *adm_ctx) |
3278 | { | 3367 | { |
3368 | const char *name = adm_ctx->resource_name; | ||
3279 | if (!name || !name[0]) { | 3369 | if (!name || !name[0]) { |
3280 | drbd_msg_put_info("resource name missing"); | 3370 | drbd_msg_put_info(adm_ctx->reply_skb, "resource name missing"); |
3281 | return ERR_MANDATORY_TAG; | 3371 | return ERR_MANDATORY_TAG; |
3282 | } | 3372 | } |
3283 | /* if we want to use these in sysfs/configfs/debugfs some day, | 3373 | /* if we want to use these in sysfs/configfs/debugfs some day, |
3284 | * we must not allow slashes */ | 3374 | * we must not allow slashes */ |
3285 | if (strchr(name, '/')) { | 3375 | if (strchr(name, '/')) { |
3286 | drbd_msg_put_info("invalid resource name"); | 3376 | drbd_msg_put_info(adm_ctx->reply_skb, "invalid resource name"); |
3287 | return ERR_INVALID_REQUEST; | 3377 | return ERR_INVALID_REQUEST; |
3288 | } | 3378 | } |
3289 | return NO_ERROR; | 3379 | return NO_ERROR; |
@@ -3291,11 +3381,12 @@ drbd_check_resource_name(const char *name) | |||
3291 | 3381 | ||
3292 | int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) | 3382 | int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) |
3293 | { | 3383 | { |
3384 | struct drbd_config_context adm_ctx; | ||
3294 | enum drbd_ret_code retcode; | 3385 | enum drbd_ret_code retcode; |
3295 | struct res_opts res_opts; | 3386 | struct res_opts res_opts; |
3296 | int err; | 3387 | int err; |
3297 | 3388 | ||
3298 | retcode = drbd_adm_prepare(skb, info, 0); | 3389 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, 0); |
3299 | if (!adm_ctx.reply_skb) | 3390 | if (!adm_ctx.reply_skb) |
3300 | return retcode; | 3391 | return retcode; |
3301 | if (retcode != NO_ERROR) | 3392 | if (retcode != NO_ERROR) |
@@ -3305,48 +3396,50 @@ int drbd_adm_new_resource(struct sk_buff *skb, struct genl_info *info) | |||
3305 | err = res_opts_from_attrs(&res_opts, info); | 3396 | err = res_opts_from_attrs(&res_opts, info); |
3306 | if (err && err != -ENOMSG) { | 3397 | if (err && err != -ENOMSG) { |
3307 | retcode = ERR_MANDATORY_TAG; | 3398 | retcode = ERR_MANDATORY_TAG; |
3308 | drbd_msg_put_info(from_attrs_err_to_txt(err)); | 3399 | drbd_msg_put_info(adm_ctx.reply_skb, from_attrs_err_to_txt(err)); |
3309 | goto out; | 3400 | goto out; |
3310 | } | 3401 | } |
3311 | 3402 | ||
3312 | retcode = drbd_check_resource_name(adm_ctx.resource_name); | 3403 | retcode = drbd_check_resource_name(&adm_ctx); |
3313 | if (retcode != NO_ERROR) | 3404 | if (retcode != NO_ERROR) |
3314 | goto out; | 3405 | goto out; |
3315 | 3406 | ||
3316 | if (adm_ctx.resource) { | 3407 | if (adm_ctx.resource) { |
3317 | if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) { | 3408 | if (info->nlhdr->nlmsg_flags & NLM_F_EXCL) { |
3318 | retcode = ERR_INVALID_REQUEST; | 3409 | retcode = ERR_INVALID_REQUEST; |
3319 | drbd_msg_put_info("resource exists"); | 3410 | drbd_msg_put_info(adm_ctx.reply_skb, "resource exists"); |
3320 | } | 3411 | } |
3321 | /* else: still NO_ERROR */ | 3412 | /* else: still NO_ERROR */ |
3322 | goto out; | 3413 | goto out; |
3323 | } | 3414 | } |
3324 | 3415 | ||
3416 | /* not yet safe for genl_family.parallel_ops */ | ||
3325 | if (!conn_create(adm_ctx.resource_name, &res_opts)) | 3417 | if (!conn_create(adm_ctx.resource_name, &res_opts)) |
3326 | retcode = ERR_NOMEM; | 3418 | retcode = ERR_NOMEM; |
3327 | out: | 3419 | out: |
3328 | drbd_adm_finish(info, retcode); | 3420 | drbd_adm_finish(&adm_ctx, info, retcode); |
3329 | return 0; | 3421 | return 0; |
3330 | } | 3422 | } |
3331 | 3423 | ||
3332 | int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) | 3424 | int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) |
3333 | { | 3425 | { |
3426 | struct drbd_config_context adm_ctx; | ||
3334 | struct drbd_genlmsghdr *dh = info->userhdr; | 3427 | struct drbd_genlmsghdr *dh = info->userhdr; |
3335 | enum drbd_ret_code retcode; | 3428 | enum drbd_ret_code retcode; |
3336 | 3429 | ||
3337 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); | 3430 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE); |
3338 | if (!adm_ctx.reply_skb) | 3431 | if (!adm_ctx.reply_skb) |
3339 | return retcode; | 3432 | return retcode; |
3340 | if (retcode != NO_ERROR) | 3433 | if (retcode != NO_ERROR) |
3341 | goto out; | 3434 | goto out; |
3342 | 3435 | ||
3343 | if (dh->minor > MINORMASK) { | 3436 | if (dh->minor > MINORMASK) { |
3344 | drbd_msg_put_info("requested minor out of range"); | 3437 | drbd_msg_put_info(adm_ctx.reply_skb, "requested minor out of range"); |
3345 | retcode = ERR_INVALID_REQUEST; | 3438 | retcode = ERR_INVALID_REQUEST; |
3346 | goto out; | 3439 | goto out; |
3347 | } | 3440 | } |
3348 | if (adm_ctx.volume > DRBD_VOLUME_MAX) { | 3441 | if (adm_ctx.volume > DRBD_VOLUME_MAX) { |
3349 | drbd_msg_put_info("requested volume id out of range"); | 3442 | drbd_msg_put_info(adm_ctx.reply_skb, "requested volume id out of range"); |
3350 | retcode = ERR_INVALID_REQUEST; | 3443 | retcode = ERR_INVALID_REQUEST; |
3351 | goto out; | 3444 | goto out; |
3352 | } | 3445 | } |
@@ -3360,9 +3453,11 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info) | |||
3360 | goto out; | 3453 | goto out; |
3361 | } | 3454 | } |
3362 | 3455 | ||
3363 | retcode = drbd_create_device(adm_ctx.resource, dh->minor, adm_ctx.volume); | 3456 | mutex_lock(&adm_ctx.resource->adm_mutex); |
3457 | retcode = drbd_create_device(&adm_ctx, dh->minor); | ||
3458 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
3364 | out: | 3459 | out: |
3365 | drbd_adm_finish(info, retcode); | 3460 | drbd_adm_finish(&adm_ctx, info, retcode); |
3366 | return 0; | 3461 | return 0; |
3367 | } | 3462 | } |
3368 | 3463 | ||
@@ -3383,35 +3478,40 @@ static enum drbd_ret_code adm_del_minor(struct drbd_device *device) | |||
3383 | 3478 | ||
3384 | int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info) | 3479 | int drbd_adm_del_minor(struct sk_buff *skb, struct genl_info *info) |
3385 | { | 3480 | { |
3481 | struct drbd_config_context adm_ctx; | ||
3386 | enum drbd_ret_code retcode; | 3482 | enum drbd_ret_code retcode; |
3387 | 3483 | ||
3388 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_MINOR); | 3484 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_MINOR); |
3389 | if (!adm_ctx.reply_skb) | 3485 | if (!adm_ctx.reply_skb) |
3390 | return retcode; | 3486 | return retcode; |
3391 | if (retcode != NO_ERROR) | 3487 | if (retcode != NO_ERROR) |
3392 | goto out; | 3488 | goto out; |
3393 | 3489 | ||
3490 | mutex_lock(&adm_ctx.resource->adm_mutex); | ||
3394 | retcode = adm_del_minor(adm_ctx.device); | 3491 | retcode = adm_del_minor(adm_ctx.device); |
3492 | mutex_unlock(&adm_ctx.resource->adm_mutex); | ||
3395 | out: | 3493 | out: |
3396 | drbd_adm_finish(info, retcode); | 3494 | drbd_adm_finish(&adm_ctx, info, retcode); |
3397 | return 0; | 3495 | return 0; |
3398 | } | 3496 | } |
3399 | 3497 | ||
3400 | int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | 3498 | int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) |
3401 | { | 3499 | { |
3500 | struct drbd_config_context adm_ctx; | ||
3402 | struct drbd_resource *resource; | 3501 | struct drbd_resource *resource; |
3403 | struct drbd_connection *connection; | 3502 | struct drbd_connection *connection; |
3404 | struct drbd_device *device; | 3503 | struct drbd_device *device; |
3405 | int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ | 3504 | int retcode; /* enum drbd_ret_code rsp. enum drbd_state_rv */ |
3406 | unsigned i; | 3505 | unsigned i; |
3407 | 3506 | ||
3408 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); | 3507 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE); |
3409 | if (!adm_ctx.reply_skb) | 3508 | if (!adm_ctx.reply_skb) |
3410 | return retcode; | 3509 | return retcode; |
3411 | if (retcode != NO_ERROR) | 3510 | if (retcode != NO_ERROR) |
3412 | goto out; | 3511 | goto finish; |
3413 | 3512 | ||
3414 | resource = adm_ctx.resource; | 3513 | resource = adm_ctx.resource; |
3514 | mutex_lock(&resource->adm_mutex); | ||
3415 | /* demote */ | 3515 | /* demote */ |
3416 | for_each_connection(connection, resource) { | 3516 | for_each_connection(connection, resource) { |
3417 | struct drbd_peer_device *peer_device; | 3517 | struct drbd_peer_device *peer_device; |
@@ -3419,14 +3519,14 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | |||
3419 | idr_for_each_entry(&connection->peer_devices, peer_device, i) { | 3519 | idr_for_each_entry(&connection->peer_devices, peer_device, i) { |
3420 | retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0); | 3520 | retcode = drbd_set_role(peer_device->device, R_SECONDARY, 0); |
3421 | if (retcode < SS_SUCCESS) { | 3521 | if (retcode < SS_SUCCESS) { |
3422 | drbd_msg_put_info("failed to demote"); | 3522 | drbd_msg_put_info(adm_ctx.reply_skb, "failed to demote"); |
3423 | goto out; | 3523 | goto out; |
3424 | } | 3524 | } |
3425 | } | 3525 | } |
3426 | 3526 | ||
3427 | retcode = conn_try_disconnect(connection, 0); | 3527 | retcode = conn_try_disconnect(connection, 0); |
3428 | if (retcode < SS_SUCCESS) { | 3528 | if (retcode < SS_SUCCESS) { |
3429 | drbd_msg_put_info("failed to disconnect"); | 3529 | drbd_msg_put_info(adm_ctx.reply_skb, "failed to disconnect"); |
3430 | goto out; | 3530 | goto out; |
3431 | } | 3531 | } |
3432 | } | 3532 | } |
@@ -3435,7 +3535,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | |||
3435 | idr_for_each_entry(&resource->devices, device, i) { | 3535 | idr_for_each_entry(&resource->devices, device, i) { |
3436 | retcode = adm_detach(device, 0); | 3536 | retcode = adm_detach(device, 0); |
3437 | if (retcode < SS_SUCCESS || retcode > NO_ERROR) { | 3537 | if (retcode < SS_SUCCESS || retcode > NO_ERROR) { |
3438 | drbd_msg_put_info("failed to detach"); | 3538 | drbd_msg_put_info(adm_ctx.reply_skb, "failed to detach"); |
3439 | goto out; | 3539 | goto out; |
3440 | } | 3540 | } |
3441 | } | 3541 | } |
@@ -3453,7 +3553,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | |||
3453 | retcode = adm_del_minor(device); | 3553 | retcode = adm_del_minor(device); |
3454 | if (retcode != NO_ERROR) { | 3554 | if (retcode != NO_ERROR) { |
3455 | /* "can not happen" */ | 3555 | /* "can not happen" */ |
3456 | drbd_msg_put_info("failed to delete volume"); | 3556 | drbd_msg_put_info(adm_ctx.reply_skb, "failed to delete volume"); |
3457 | goto out; | 3557 | goto out; |
3458 | } | 3558 | } |
3459 | } | 3559 | } |
@@ -3462,25 +3562,28 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info) | |||
3462 | synchronize_rcu(); | 3562 | synchronize_rcu(); |
3463 | drbd_free_resource(resource); | 3563 | drbd_free_resource(resource); |
3464 | retcode = NO_ERROR; | 3564 | retcode = NO_ERROR; |
3465 | |||
3466 | out: | 3565 | out: |
3467 | drbd_adm_finish(info, retcode); | 3566 | mutex_unlock(&resource->adm_mutex); |
3567 | finish: | ||
3568 | drbd_adm_finish(&adm_ctx, info, retcode); | ||
3468 | return 0; | 3569 | return 0; |
3469 | } | 3570 | } |
3470 | 3571 | ||
3471 | int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) | 3572 | int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) |
3472 | { | 3573 | { |
3574 | struct drbd_config_context adm_ctx; | ||
3473 | struct drbd_resource *resource; | 3575 | struct drbd_resource *resource; |
3474 | struct drbd_connection *connection; | 3576 | struct drbd_connection *connection; |
3475 | enum drbd_ret_code retcode; | 3577 | enum drbd_ret_code retcode; |
3476 | 3578 | ||
3477 | retcode = drbd_adm_prepare(skb, info, DRBD_ADM_NEED_RESOURCE); | 3579 | retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE); |
3478 | if (!adm_ctx.reply_skb) | 3580 | if (!adm_ctx.reply_skb) |
3479 | return retcode; | 3581 | return retcode; |
3480 | if (retcode != NO_ERROR) | 3582 | if (retcode != NO_ERROR) |
3481 | goto out; | 3583 | goto finish; |
3482 | 3584 | ||
3483 | resource = adm_ctx.resource; | 3585 | resource = adm_ctx.resource; |
3586 | mutex_lock(&resource->adm_mutex); | ||
3484 | for_each_connection(connection, resource) { | 3587 | for_each_connection(connection, resource) { |
3485 | if (connection->cstate > C_STANDALONE) { | 3588 | if (connection->cstate > C_STANDALONE) { |
3486 | retcode = ERR_NET_CONFIGURED; | 3589 | retcode = ERR_NET_CONFIGURED; |
@@ -3499,7 +3602,9 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info) | |||
3499 | drbd_free_resource(resource); | 3602 | drbd_free_resource(resource); |
3500 | retcode = NO_ERROR; | 3603 | retcode = NO_ERROR; |
3501 | out: | 3604 | out: |
3502 | drbd_adm_finish(info, retcode); | 3605 | mutex_unlock(&resource->adm_mutex); |
3606 | finish: | ||
3607 | drbd_adm_finish(&adm_ctx, info, retcode); | ||
3503 | return 0; | 3608 | return 0; |
3504 | } | 3609 | } |
3505 | 3610 | ||
diff --git a/drivers/block/drbd/drbd_nla.c b/drivers/block/drbd/drbd_nla.c index fa672b6df8d6..b2d4791498a6 100644 --- a/drivers/block/drbd/drbd_nla.c +++ b/drivers/block/drbd/drbd_nla.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #include "drbd_wrappers.h" | ||
2 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
3 | #include <net/netlink.h> | 2 | #include <net/netlink.h> |
4 | #include <linux/drbd_genl_api.h> | 3 | #include <linux/drbd_genl_api.h> |
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index 2f26e8ffa45b..89736bdbbc70 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -116,7 +116,7 @@ static void drbd_syncer_progress(struct drbd_device *device, struct seq_file *se | |||
116 | /* ------------------------ ~18s average ------------------------ */ | 116 | /* ------------------------ ~18s average ------------------------ */ |
117 | i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS; | 117 | i = (device->rs_last_mark + 2) % DRBD_SYNC_MARKS; |
118 | dt = (jiffies - device->rs_mark_time[i]) / HZ; | 118 | dt = (jiffies - device->rs_mark_time[i]) / HZ; |
119 | if (dt > (DRBD_SYNC_MARK_STEP * DRBD_SYNC_MARKS)) | 119 | if (dt > 180) |
120 | stalled = 1; | 120 | stalled = 1; |
121 | 121 | ||
122 | if (!dt) | 122 | if (!dt) |
diff --git a/drivers/block/drbd/drbd_protocol.h b/drivers/block/drbd/drbd_protocol.h index 3c04ec0ea333..2da9104a3851 100644 --- a/drivers/block/drbd/drbd_protocol.h +++ b/drivers/block/drbd/drbd_protocol.h | |||
@@ -54,6 +54,11 @@ enum drbd_packet { | |||
54 | P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ | 54 | P_CONN_ST_CHG_REPLY = 0x2b, /* meta sock: Connection side state req reply */ |
55 | P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ | 55 | P_RETRY_WRITE = 0x2c, /* Protocol C: retry conflicting write request */ |
56 | P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */ | 56 | P_PROTOCOL_UPDATE = 0x2d, /* data sock: is used in established connections */ |
57 | /* 0x2e to 0x30 reserved, used in drbd 9 */ | ||
58 | |||
59 | /* REQ_DISCARD. We used "discard" in different contexts before, | ||
60 | * which is why I chose TRIM here, to disambiguate. */ | ||
61 | P_TRIM = 0x31, | ||
57 | 62 | ||
58 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ | 63 | P_MAY_IGNORE = 0x100, /* Flag to test if (cmd > P_MAY_IGNORE) ... */ |
59 | P_MAX_OPT_CMD = 0x101, | 64 | P_MAX_OPT_CMD = 0x101, |
@@ -119,6 +124,11 @@ struct p_data { | |||
119 | u32 dp_flags; | 124 | u32 dp_flags; |
120 | } __packed; | 125 | } __packed; |
121 | 126 | ||
127 | struct p_trim { | ||
128 | struct p_data p_data; | ||
129 | u32 size; /* == bio->bi_size */ | ||
130 | } __packed; | ||
131 | |||
122 | /* | 132 | /* |
123 | * commands which share a struct: | 133 | * commands which share a struct: |
124 | * p_block_ack: | 134 | * p_block_ack: |
@@ -150,6 +160,8 @@ struct p_block_req { | |||
150 | * ReportParams | 160 | * ReportParams |
151 | */ | 161 | */ |
152 | 162 | ||
163 | #define FF_TRIM 1 | ||
164 | |||
153 | struct p_connection_features { | 165 | struct p_connection_features { |
154 | u32 protocol_min; | 166 | u32 protocol_min; |
155 | u32 feature_flags; | 167 | u32 feature_flags; |
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 68e3992e8838..b6c8aaf4931b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c | |||
@@ -46,9 +46,10 @@ | |||
46 | #include "drbd_int.h" | 46 | #include "drbd_int.h" |
47 | #include "drbd_protocol.h" | 47 | #include "drbd_protocol.h" |
48 | #include "drbd_req.h" | 48 | #include "drbd_req.h" |
49 | |||
50 | #include "drbd_vli.h" | 49 | #include "drbd_vli.h" |
51 | 50 | ||
51 | #define PRO_FEATURES (FF_TRIM) | ||
52 | |||
52 | struct packet_info { | 53 | struct packet_info { |
53 | enum drbd_packet cmd; | 54 | enum drbd_packet cmd; |
54 | unsigned int size; | 55 | unsigned int size; |
@@ -65,7 +66,7 @@ enum finish_epoch { | |||
65 | static int drbd_do_features(struct drbd_connection *connection); | 66 | static int drbd_do_features(struct drbd_connection *connection); |
66 | static int drbd_do_auth(struct drbd_connection *connection); | 67 | static int drbd_do_auth(struct drbd_connection *connection); |
67 | static int drbd_disconnected(struct drbd_peer_device *); | 68 | static int drbd_disconnected(struct drbd_peer_device *); |
68 | 69 | static void conn_wait_active_ee_empty(struct drbd_connection *connection); | |
69 | static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event); | 70 | static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event); |
70 | static int e_end_block(struct drbd_work *, int); | 71 | static int e_end_block(struct drbd_work *, int); |
71 | 72 | ||
@@ -234,9 +235,17 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device) | |||
234 | * @retry: whether to retry, if not enough pages are available right now | 235 | * @retry: whether to retry, if not enough pages are available right now |
235 | * | 236 | * |
236 | * Tries to allocate number pages, first from our own page pool, then from | 237 | * Tries to allocate number pages, first from our own page pool, then from |
237 | * the kernel, unless this allocation would exceed the max_buffers setting. | 238 | * the kernel. |
238 | * Possibly retry until DRBD frees sufficient pages somewhere else. | 239 | * Possibly retry until DRBD frees sufficient pages somewhere else. |
239 | * | 240 | * |
241 | * If this allocation would exceed the max_buffers setting, we throttle | ||
242 | * allocation (schedule_timeout) to give the system some room to breathe. | ||
243 | * | ||
244 | * We do not use max-buffers as hard limit, because it could lead to | ||
245 | * congestion and further to a distributed deadlock during online-verify or | ||
246 | * (checksum based) resync, if the max-buffers, socket buffer sizes and | ||
247 | * resync-rate settings are mis-configured. | ||
248 | * | ||
240 | * Returns a page chain linked via page->private. | 249 | * Returns a page chain linked via page->private. |
241 | */ | 250 | */ |
242 | struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number, | 251 | struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number, |
@@ -246,10 +255,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int | |||
246 | struct page *page = NULL; | 255 | struct page *page = NULL; |
247 | struct net_conf *nc; | 256 | struct net_conf *nc; |
248 | DEFINE_WAIT(wait); | 257 | DEFINE_WAIT(wait); |
249 | int mxb; | 258 | unsigned int mxb; |
250 | 259 | ||
251 | /* Yes, we may run up to @number over max_buffers. If we | ||
252 | * follow it strictly, the admin will get it wrong anyways. */ | ||
253 | rcu_read_lock(); | 260 | rcu_read_lock(); |
254 | nc = rcu_dereference(peer_device->connection->net_conf); | 261 | nc = rcu_dereference(peer_device->connection->net_conf); |
255 | mxb = nc ? nc->max_buffers : 1000000; | 262 | mxb = nc ? nc->max_buffers : 1000000; |
@@ -277,7 +284,8 @@ struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int | |||
277 | break; | 284 | break; |
278 | } | 285 | } |
279 | 286 | ||
280 | schedule(); | 287 | if (schedule_timeout(HZ/10) == 0) |
288 | mxb = UINT_MAX; | ||
281 | } | 289 | } |
282 | finish_wait(&drbd_pp_wait, &wait); | 290 | finish_wait(&drbd_pp_wait, &wait); |
283 | 291 | ||
@@ -331,7 +339,7 @@ You must not have the req_lock: | |||
331 | 339 | ||
332 | struct drbd_peer_request * | 340 | struct drbd_peer_request * |
333 | drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | 341 | drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector, |
334 | unsigned int data_size, gfp_t gfp_mask) __must_hold(local) | 342 | unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local) |
335 | { | 343 | { |
336 | struct drbd_device *device = peer_device->device; | 344 | struct drbd_device *device = peer_device->device; |
337 | struct drbd_peer_request *peer_req; | 345 | struct drbd_peer_request *peer_req; |
@@ -348,7 +356,7 @@ drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t secto | |||
348 | return NULL; | 356 | return NULL; |
349 | } | 357 | } |
350 | 358 | ||
351 | if (data_size) { | 359 | if (has_payload && data_size) { |
352 | page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT)); | 360 | page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT)); |
353 | if (!page) | 361 | if (!page) |
354 | goto fail; | 362 | goto fail; |
@@ -1026,24 +1034,27 @@ randomize: | |||
1026 | if (drbd_send_protocol(connection) == -EOPNOTSUPP) | 1034 | if (drbd_send_protocol(connection) == -EOPNOTSUPP) |
1027 | return -1; | 1035 | return -1; |
1028 | 1036 | ||
1037 | /* Prevent a race between resync-handshake and | ||
1038 | * being promoted to Primary. | ||
1039 | * | ||
1040 | * Grab and release the state mutex, so we know that any current | ||
1041 | * drbd_set_role() is finished, and any incoming drbd_set_role | ||
1042 | * will see the STATE_SENT flag, and wait for it to be cleared. | ||
1043 | */ | ||
1044 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) | ||
1045 | mutex_lock(peer_device->device->state_mutex); | ||
1046 | |||
1029 | set_bit(STATE_SENT, &connection->flags); | 1047 | set_bit(STATE_SENT, &connection->flags); |
1030 | 1048 | ||
1049 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) | ||
1050 | mutex_unlock(peer_device->device->state_mutex); | ||
1051 | |||
1031 | rcu_read_lock(); | 1052 | rcu_read_lock(); |
1032 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 1053 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
1033 | struct drbd_device *device = peer_device->device; | 1054 | struct drbd_device *device = peer_device->device; |
1034 | kref_get(&device->kref); | 1055 | kref_get(&device->kref); |
1035 | rcu_read_unlock(); | 1056 | rcu_read_unlock(); |
1036 | 1057 | ||
1037 | /* Prevent a race between resync-handshake and | ||
1038 | * being promoted to Primary. | ||
1039 | * | ||
1040 | * Grab and release the state mutex, so we know that any current | ||
1041 | * drbd_set_role() is finished, and any incoming drbd_set_role | ||
1042 | * will see the STATE_SENT flag, and wait for it to be cleared. | ||
1043 | */ | ||
1044 | mutex_lock(device->state_mutex); | ||
1045 | mutex_unlock(device->state_mutex); | ||
1046 | |||
1047 | if (discard_my_data) | 1058 | if (discard_my_data) |
1048 | set_bit(DISCARD_MY_DATA, &device->flags); | 1059 | set_bit(DISCARD_MY_DATA, &device->flags); |
1049 | else | 1060 | else |
@@ -1315,6 +1326,20 @@ int drbd_submit_peer_request(struct drbd_device *device, | |||
1315 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; | 1326 | unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT; |
1316 | int err = -ENOMEM; | 1327 | int err = -ENOMEM; |
1317 | 1328 | ||
1329 | if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) { | ||
1330 | /* wait for all pending IO completions, before we start | ||
1331 | * zeroing things out. */ | ||
1332 | conn_wait_active_ee_empty(first_peer_device(device)->connection); | ||
1333 | if (blkdev_issue_zeroout(device->ldev->backing_bdev, | ||
1334 | sector, ds >> 9, GFP_NOIO)) | ||
1335 | peer_req->flags |= EE_WAS_ERROR; | ||
1336 | drbd_endio_write_sec_final(peer_req); | ||
1337 | return 0; | ||
1338 | } | ||
1339 | |||
1340 | if (peer_req->flags & EE_IS_TRIM) | ||
1341 | nr_pages = 0; /* discards don't have any payload. */ | ||
1342 | |||
1318 | /* In most cases, we will only need one bio. But in case the lower | 1343 | /* In most cases, we will only need one bio. But in case the lower |
1319 | * level restrictions happen to be different at this offset on this | 1344 | * level restrictions happen to be different at this offset on this |
1320 | * side than those of the sending peer, we may need to submit the | 1345 | * side than those of the sending peer, we may need to submit the |
@@ -1326,7 +1351,7 @@ int drbd_submit_peer_request(struct drbd_device *device, | |||
1326 | next_bio: | 1351 | next_bio: |
1327 | bio = bio_alloc(GFP_NOIO, nr_pages); | 1352 | bio = bio_alloc(GFP_NOIO, nr_pages); |
1328 | if (!bio) { | 1353 | if (!bio) { |
1329 | drbd_err(device, "submit_ee: Allocation of a bio failed\n"); | 1354 | drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages); |
1330 | goto fail; | 1355 | goto fail; |
1331 | } | 1356 | } |
1332 | /* > peer_req->i.sector, unless this is the first bio */ | 1357 | /* > peer_req->i.sector, unless this is the first bio */ |
@@ -1340,6 +1365,11 @@ next_bio: | |||
1340 | bios = bio; | 1365 | bios = bio; |
1341 | ++n_bios; | 1366 | ++n_bios; |
1342 | 1367 | ||
1368 | if (rw & REQ_DISCARD) { | ||
1369 | bio->bi_iter.bi_size = ds; | ||
1370 | goto submit; | ||
1371 | } | ||
1372 | |||
1343 | page_chain_for_each(page) { | 1373 | page_chain_for_each(page) { |
1344 | unsigned len = min_t(unsigned, ds, PAGE_SIZE); | 1374 | unsigned len = min_t(unsigned, ds, PAGE_SIZE); |
1345 | if (!bio_add_page(bio, page, len, 0)) { | 1375 | if (!bio_add_page(bio, page, len, 0)) { |
@@ -1360,8 +1390,9 @@ next_bio: | |||
1360 | sector += len >> 9; | 1390 | sector += len >> 9; |
1361 | --nr_pages; | 1391 | --nr_pages; |
1362 | } | 1392 | } |
1363 | D_ASSERT(device, page == NULL); | ||
1364 | D_ASSERT(device, ds == 0); | 1393 | D_ASSERT(device, ds == 0); |
1394 | submit: | ||
1395 | D_ASSERT(device, page == NULL); | ||
1365 | 1396 | ||
1366 | atomic_set(&peer_req->pending_bios, n_bios); | 1397 | atomic_set(&peer_req->pending_bios, n_bios); |
1367 | do { | 1398 | do { |
@@ -1490,19 +1521,21 @@ static int receive_Barrier(struct drbd_connection *connection, struct packet_inf | |||
1490 | * and from receive_Data */ | 1521 | * and from receive_Data */ |
1491 | static struct drbd_peer_request * | 1522 | static struct drbd_peer_request * |
1492 | read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | 1523 | read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, |
1493 | int data_size) __must_hold(local) | 1524 | struct packet_info *pi) __must_hold(local) |
1494 | { | 1525 | { |
1495 | struct drbd_device *device = peer_device->device; | 1526 | struct drbd_device *device = peer_device->device; |
1496 | const sector_t capacity = drbd_get_capacity(device->this_bdev); | 1527 | const sector_t capacity = drbd_get_capacity(device->this_bdev); |
1497 | struct drbd_peer_request *peer_req; | 1528 | struct drbd_peer_request *peer_req; |
1498 | struct page *page; | 1529 | struct page *page; |
1499 | int dgs, ds, err; | 1530 | int dgs, ds, err; |
1531 | int data_size = pi->size; | ||
1500 | void *dig_in = peer_device->connection->int_dig_in; | 1532 | void *dig_in = peer_device->connection->int_dig_in; |
1501 | void *dig_vv = peer_device->connection->int_dig_vv; | 1533 | void *dig_vv = peer_device->connection->int_dig_vv; |
1502 | unsigned long *data; | 1534 | unsigned long *data; |
1535 | struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL; | ||
1503 | 1536 | ||
1504 | dgs = 0; | 1537 | dgs = 0; |
1505 | if (peer_device->connection->peer_integrity_tfm) { | 1538 | if (!trim && peer_device->connection->peer_integrity_tfm) { |
1506 | dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); | 1539 | dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm); |
1507 | /* | 1540 | /* |
1508 | * FIXME: Receive the incoming digest into the receive buffer | 1541 | * FIXME: Receive the incoming digest into the receive buffer |
@@ -1514,9 +1547,15 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | |||
1514 | data_size -= dgs; | 1547 | data_size -= dgs; |
1515 | } | 1548 | } |
1516 | 1549 | ||
1550 | if (trim) { | ||
1551 | D_ASSERT(peer_device, data_size == 0); | ||
1552 | data_size = be32_to_cpu(trim->size); | ||
1553 | } | ||
1554 | |||
1517 | if (!expect(IS_ALIGNED(data_size, 512))) | 1555 | if (!expect(IS_ALIGNED(data_size, 512))) |
1518 | return NULL; | 1556 | return NULL; |
1519 | if (!expect(data_size <= DRBD_MAX_BIO_SIZE)) | 1557 | /* prepare for larger trim requests. */ |
1558 | if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE)) | ||
1520 | return NULL; | 1559 | return NULL; |
1521 | 1560 | ||
1522 | /* even though we trust out peer, | 1561 | /* even though we trust out peer, |
@@ -1532,11 +1571,11 @@ read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector, | |||
1532 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD | 1571 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD |
1533 | * "criss-cross" setup, that might cause write-out on some other DRBD, | 1572 | * "criss-cross" setup, that might cause write-out on some other DRBD, |
1534 | * which in turn might block on the other node at this very place. */ | 1573 | * which in turn might block on the other node at this very place. */ |
1535 | peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO); | 1574 | peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO); |
1536 | if (!peer_req) | 1575 | if (!peer_req) |
1537 | return NULL; | 1576 | return NULL; |
1538 | 1577 | ||
1539 | if (!data_size) | 1578 | if (trim) |
1540 | return peer_req; | 1579 | return peer_req; |
1541 | 1580 | ||
1542 | ds = data_size; | 1581 | ds = data_size; |
@@ -1676,12 +1715,12 @@ static int e_end_resync_block(struct drbd_work *w, int unused) | |||
1676 | } | 1715 | } |
1677 | 1716 | ||
1678 | static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector, | 1717 | static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector, |
1679 | int data_size) __releases(local) | 1718 | struct packet_info *pi) __releases(local) |
1680 | { | 1719 | { |
1681 | struct drbd_device *device = peer_device->device; | 1720 | struct drbd_device *device = peer_device->device; |
1682 | struct drbd_peer_request *peer_req; | 1721 | struct drbd_peer_request *peer_req; |
1683 | 1722 | ||
1684 | peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size); | 1723 | peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi); |
1685 | if (!peer_req) | 1724 | if (!peer_req) |
1686 | goto fail; | 1725 | goto fail; |
1687 | 1726 | ||
@@ -1697,7 +1736,7 @@ static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t secto | |||
1697 | list_add(&peer_req->w.list, &device->sync_ee); | 1736 | list_add(&peer_req->w.list, &device->sync_ee); |
1698 | spin_unlock_irq(&device->resource->req_lock); | 1737 | spin_unlock_irq(&device->resource->req_lock); |
1699 | 1738 | ||
1700 | atomic_add(data_size >> 9, &device->rs_sect_ev); | 1739 | atomic_add(pi->size >> 9, &device->rs_sect_ev); |
1701 | if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) | 1740 | if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0) |
1702 | return 0; | 1741 | return 0; |
1703 | 1742 | ||
@@ -1785,7 +1824,7 @@ static int receive_RSDataReply(struct drbd_connection *connection, struct packet | |||
1785 | /* data is submitted to disk within recv_resync_read. | 1824 | /* data is submitted to disk within recv_resync_read. |
1786 | * corresponding put_ldev done below on error, | 1825 | * corresponding put_ldev done below on error, |
1787 | * or in drbd_peer_request_endio. */ | 1826 | * or in drbd_peer_request_endio. */ |
1788 | err = recv_resync_read(peer_device, sector, pi->size); | 1827 | err = recv_resync_read(peer_device, sector, pi); |
1789 | } else { | 1828 | } else { |
1790 | if (__ratelimit(&drbd_ratelimit_state)) | 1829 | if (__ratelimit(&drbd_ratelimit_state)) |
1791 | drbd_err(device, "Can not write resync data to local disk.\n"); | 1830 | drbd_err(device, "Can not write resync data to local disk.\n"); |
@@ -2196,7 +2235,7 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2196 | */ | 2235 | */ |
2197 | 2236 | ||
2198 | sector = be64_to_cpu(p->sector); | 2237 | sector = be64_to_cpu(p->sector); |
2199 | peer_req = read_in_block(peer_device, p->block_id, sector, pi->size); | 2238 | peer_req = read_in_block(peer_device, p->block_id, sector, pi); |
2200 | if (!peer_req) { | 2239 | if (!peer_req) { |
2201 | put_ldev(device); | 2240 | put_ldev(device); |
2202 | return -EIO; | 2241 | return -EIO; |
@@ -2206,7 +2245,15 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2206 | 2245 | ||
2207 | dp_flags = be32_to_cpu(p->dp_flags); | 2246 | dp_flags = be32_to_cpu(p->dp_flags); |
2208 | rw |= wire_flags_to_bio(dp_flags); | 2247 | rw |= wire_flags_to_bio(dp_flags); |
2209 | if (peer_req->pages == NULL) { | 2248 | if (pi->cmd == P_TRIM) { |
2249 | struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev); | ||
2250 | peer_req->flags |= EE_IS_TRIM; | ||
2251 | if (!blk_queue_discard(q)) | ||
2252 | peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT; | ||
2253 | D_ASSERT(peer_device, peer_req->i.size > 0); | ||
2254 | D_ASSERT(peer_device, rw & REQ_DISCARD); | ||
2255 | D_ASSERT(peer_device, peer_req->pages == NULL); | ||
2256 | } else if (peer_req->pages == NULL) { | ||
2210 | D_ASSERT(device, peer_req->i.size == 0); | 2257 | D_ASSERT(device, peer_req->i.size == 0); |
2211 | D_ASSERT(device, dp_flags & DP_FLUSH); | 2258 | D_ASSERT(device, dp_flags & DP_FLUSH); |
2212 | } | 2259 | } |
@@ -2242,7 +2289,12 @@ static int receive_Data(struct drbd_connection *connection, struct packet_info * | |||
2242 | update_peer_seq(peer_device, peer_seq); | 2289 | update_peer_seq(peer_device, peer_seq); |
2243 | spin_lock_irq(&device->resource->req_lock); | 2290 | spin_lock_irq(&device->resource->req_lock); |
2244 | } | 2291 | } |
2245 | list_add(&peer_req->w.list, &device->active_ee); | 2292 | /* if we use the zeroout fallback code, we process synchronously |
2293 | * and we wait for all pending requests, respectively wait for | ||
2294 | * active_ee to become empty in drbd_submit_peer_request(); | ||
2295 | * better not add ourselves here. */ | ||
2296 | if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0) | ||
2297 | list_add(&peer_req->w.list, &device->active_ee); | ||
2246 | spin_unlock_irq(&device->resource->req_lock); | 2298 | spin_unlock_irq(&device->resource->req_lock); |
2247 | 2299 | ||
2248 | if (device->state.conn == C_SYNC_TARGET) | 2300 | if (device->state.conn == C_SYNC_TARGET) |
@@ -2313,39 +2365,45 @@ out_interrupted: | |||
2313 | * The current sync rate used here uses only the most recent two step marks, | 2365 | * The current sync rate used here uses only the most recent two step marks, |
2314 | * to have a short time average so we can react faster. | 2366 | * to have a short time average so we can react faster. |
2315 | */ | 2367 | */ |
2316 | int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) | 2368 | bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) |
2317 | { | 2369 | { |
2318 | struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; | ||
2319 | unsigned long db, dt, dbdt; | ||
2320 | struct lc_element *tmp; | 2370 | struct lc_element *tmp; |
2321 | int curr_events; | 2371 | bool throttle = true; |
2322 | int throttle = 0; | ||
2323 | unsigned int c_min_rate; | ||
2324 | |||
2325 | rcu_read_lock(); | ||
2326 | c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; | ||
2327 | rcu_read_unlock(); | ||
2328 | 2372 | ||
2329 | /* feature disabled? */ | 2373 | if (!drbd_rs_c_min_rate_throttle(device)) |
2330 | if (c_min_rate == 0) | 2374 | return false; |
2331 | return 0; | ||
2332 | 2375 | ||
2333 | spin_lock_irq(&device->al_lock); | 2376 | spin_lock_irq(&device->al_lock); |
2334 | tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); | 2377 | tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector)); |
2335 | if (tmp) { | 2378 | if (tmp) { |
2336 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); | 2379 | struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce); |
2337 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) { | 2380 | if (test_bit(BME_PRIORITY, &bm_ext->flags)) |
2338 | spin_unlock_irq(&device->al_lock); | 2381 | throttle = false; |
2339 | return 0; | ||
2340 | } | ||
2341 | /* Do not slow down if app IO is already waiting for this extent */ | 2382 | /* Do not slow down if app IO is already waiting for this extent */ |
2342 | } | 2383 | } |
2343 | spin_unlock_irq(&device->al_lock); | 2384 | spin_unlock_irq(&device->al_lock); |
2344 | 2385 | ||
2386 | return throttle; | ||
2387 | } | ||
2388 | |||
2389 | bool drbd_rs_c_min_rate_throttle(struct drbd_device *device) | ||
2390 | { | ||
2391 | struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk; | ||
2392 | unsigned long db, dt, dbdt; | ||
2393 | unsigned int c_min_rate; | ||
2394 | int curr_events; | ||
2395 | |||
2396 | rcu_read_lock(); | ||
2397 | c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate; | ||
2398 | rcu_read_unlock(); | ||
2399 | |||
2400 | /* feature disabled? */ | ||
2401 | if (c_min_rate == 0) | ||
2402 | return false; | ||
2403 | |||
2345 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + | 2404 | curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + |
2346 | (int)part_stat_read(&disk->part0, sectors[1]) - | 2405 | (int)part_stat_read(&disk->part0, sectors[1]) - |
2347 | atomic_read(&device->rs_sect_ev); | 2406 | atomic_read(&device->rs_sect_ev); |
2348 | |||
2349 | if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { | 2407 | if (!device->rs_last_events || curr_events - device->rs_last_events > 64) { |
2350 | unsigned long rs_left; | 2408 | unsigned long rs_left; |
2351 | int i; | 2409 | int i; |
@@ -2368,12 +2426,11 @@ int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector) | |||
2368 | dbdt = Bit2KB(db/dt); | 2426 | dbdt = Bit2KB(db/dt); |
2369 | 2427 | ||
2370 | if (dbdt > c_min_rate) | 2428 | if (dbdt > c_min_rate) |
2371 | throttle = 1; | 2429 | return true; |
2372 | } | 2430 | } |
2373 | return throttle; | 2431 | return false; |
2374 | } | 2432 | } |
2375 | 2433 | ||
2376 | |||
2377 | static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) | 2434 | static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi) |
2378 | { | 2435 | { |
2379 | struct drbd_peer_device *peer_device; | 2436 | struct drbd_peer_device *peer_device; |
@@ -2436,7 +2493,8 @@ static int receive_DataRequest(struct drbd_connection *connection, struct packet | |||
2436 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD | 2493 | /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD |
2437 | * "criss-cross" setup, that might cause write-out on some other DRBD, | 2494 | * "criss-cross" setup, that might cause write-out on some other DRBD, |
2438 | * which in turn might block on the other node at this very place. */ | 2495 | * which in turn might block on the other node at this very place. */ |
2439 | peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO); | 2496 | peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, |
2497 | true /* has real payload */, GFP_NOIO); | ||
2440 | if (!peer_req) { | 2498 | if (!peer_req) { |
2441 | put_ldev(device); | 2499 | put_ldev(device); |
2442 | return -ENOMEM; | 2500 | return -ENOMEM; |
@@ -3648,6 +3706,13 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3648 | put_ldev(device); | 3706 | put_ldev(device); |
3649 | } | 3707 | } |
3650 | 3708 | ||
3709 | device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); | ||
3710 | drbd_reconsider_max_bio_size(device); | ||
3711 | /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size(). | ||
3712 | In case we cleared the QUEUE_FLAG_DISCARD from our queue in | ||
3713 | drbd_reconsider_max_bio_size(), we can be sure that after | ||
3714 | drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */ | ||
3715 | |||
3651 | ddsf = be16_to_cpu(p->dds_flags); | 3716 | ddsf = be16_to_cpu(p->dds_flags); |
3652 | if (get_ldev(device)) { | 3717 | if (get_ldev(device)) { |
3653 | dd = drbd_determine_dev_size(device, ddsf, NULL); | 3718 | dd = drbd_determine_dev_size(device, ddsf, NULL); |
@@ -3660,9 +3725,6 @@ static int receive_sizes(struct drbd_connection *connection, struct packet_info | |||
3660 | drbd_set_my_capacity(device, p_size); | 3725 | drbd_set_my_capacity(device, p_size); |
3661 | } | 3726 | } |
3662 | 3727 | ||
3663 | device->peer_max_bio_size = be32_to_cpu(p->max_bio_size); | ||
3664 | drbd_reconsider_max_bio_size(device); | ||
3665 | |||
3666 | if (get_ldev(device)) { | 3728 | if (get_ldev(device)) { |
3667 | if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) { | 3729 | if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) { |
3668 | device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); | 3730 | device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev); |
@@ -4423,6 +4485,7 @@ static struct data_cmd drbd_cmd_handler[] = { | |||
4423 | [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, | 4485 | [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync }, |
4424 | [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, | 4486 | [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state }, |
4425 | [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, | 4487 | [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol }, |
4488 | [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data }, | ||
4426 | }; | 4489 | }; |
4427 | 4490 | ||
4428 | static void drbdd(struct drbd_connection *connection) | 4491 | static void drbdd(struct drbd_connection *connection) |
@@ -4630,6 +4693,7 @@ static int drbd_send_features(struct drbd_connection *connection) | |||
4630 | memset(p, 0, sizeof(*p)); | 4693 | memset(p, 0, sizeof(*p)); |
4631 | p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); | 4694 | p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); |
4632 | p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); | 4695 | p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); |
4696 | p->feature_flags = cpu_to_be32(PRO_FEATURES); | ||
4633 | return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); | 4697 | return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0); |
4634 | } | 4698 | } |
4635 | 4699 | ||
@@ -4683,10 +4747,14 @@ static int drbd_do_features(struct drbd_connection *connection) | |||
4683 | goto incompat; | 4747 | goto incompat; |
4684 | 4748 | ||
4685 | connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); | 4749 | connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max); |
4750 | connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags); | ||
4686 | 4751 | ||
4687 | drbd_info(connection, "Handshake successful: " | 4752 | drbd_info(connection, "Handshake successful: " |
4688 | "Agreed network protocol version %d\n", connection->agreed_pro_version); | 4753 | "Agreed network protocol version %d\n", connection->agreed_pro_version); |
4689 | 4754 | ||
4755 | drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n", | ||
4756 | connection->agreed_features & FF_TRIM ? " " : " not "); | ||
4757 | |||
4690 | return 1; | 4758 | return 1; |
4691 | 4759 | ||
4692 | incompat: | 4760 | incompat: |
@@ -4778,6 +4846,12 @@ static int drbd_do_auth(struct drbd_connection *connection) | |||
4778 | goto fail; | 4846 | goto fail; |
4779 | } | 4847 | } |
4780 | 4848 | ||
4849 | if (pi.size < CHALLENGE_LEN) { | ||
4850 | drbd_err(connection, "AuthChallenge payload too small.\n"); | ||
4851 | rv = -1; | ||
4852 | goto fail; | ||
4853 | } | ||
4854 | |||
4781 | peers_ch = kmalloc(pi.size, GFP_NOIO); | 4855 | peers_ch = kmalloc(pi.size, GFP_NOIO); |
4782 | if (peers_ch == NULL) { | 4856 | if (peers_ch == NULL) { |
4783 | drbd_err(connection, "kmalloc of peers_ch failed\n"); | 4857 | drbd_err(connection, "kmalloc of peers_ch failed\n"); |
@@ -4791,6 +4865,12 @@ static int drbd_do_auth(struct drbd_connection *connection) | |||
4791 | goto fail; | 4865 | goto fail; |
4792 | } | 4866 | } |
4793 | 4867 | ||
4868 | if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) { | ||
4869 | drbd_err(connection, "Peer presented the same challenge!\n"); | ||
4870 | rv = -1; | ||
4871 | goto fail; | ||
4872 | } | ||
4873 | |||
4794 | resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm); | 4874 | resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm); |
4795 | response = kmalloc(resp_size, GFP_NOIO); | 4875 | response = kmalloc(resp_size, GFP_NOIO); |
4796 | if (response == NULL) { | 4876 | if (response == NULL) { |
diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index 3779c8d2875b..09803d0d5207 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c | |||
@@ -522,6 +522,13 @@ int __req_mod(struct drbd_request *req, enum drbd_req_event what, | |||
522 | mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); | 522 | mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); |
523 | break; | 523 | break; |
524 | 524 | ||
525 | case DISCARD_COMPLETED_NOTSUPP: | ||
526 | case DISCARD_COMPLETED_WITH_ERROR: | ||
527 | /* I'd rather not detach from local disk just because it | ||
528 | * failed a REQ_DISCARD. */ | ||
529 | mod_rq_state(req, m, RQ_LOCAL_PENDING, RQ_LOCAL_COMPLETED); | ||
530 | break; | ||
531 | |||
525 | case QUEUE_FOR_NET_READ: | 532 | case QUEUE_FOR_NET_READ: |
526 | /* READ or READA, and | 533 | /* READ or READA, and |
527 | * no local disk, | 534 | * no local disk, |
@@ -1235,6 +1242,7 @@ void do_submit(struct work_struct *ws) | |||
1235 | if (list_empty(&incoming)) | 1242 | if (list_empty(&incoming)) |
1236 | break; | 1243 | break; |
1237 | 1244 | ||
1245 | skip_fast_path: | ||
1238 | wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending)); | 1246 | wait_event(device->al_wait, prepare_al_transaction_nonblock(device, &incoming, &pending)); |
1239 | /* Maybe more was queued, while we prepared the transaction? | 1247 | /* Maybe more was queued, while we prepared the transaction? |
1240 | * Try to stuff them into this transaction as well. | 1248 | * Try to stuff them into this transaction as well. |
@@ -1273,6 +1281,25 @@ void do_submit(struct work_struct *ws) | |||
1273 | list_del_init(&req->tl_requests); | 1281 | list_del_init(&req->tl_requests); |
1274 | drbd_send_and_submit(device, req); | 1282 | drbd_send_and_submit(device, req); |
1275 | } | 1283 | } |
1284 | |||
1285 | /* If all currently hot activity log extents are kept busy by | ||
1286 | * incoming requests, we still must not totally starve new | ||
1287 | * requests to cold extents. In that case, prepare one request | ||
1288 | * in blocking mode. */ | ||
1289 | list_for_each_entry_safe(req, tmp, &incoming, tl_requests) { | ||
1290 | list_del_init(&req->tl_requests); | ||
1291 | req->rq_state |= RQ_IN_ACT_LOG; | ||
1292 | if (!drbd_al_begin_io_prepare(device, &req->i)) { | ||
1293 | /* Corresponding extent was hot after all? */ | ||
1294 | drbd_send_and_submit(device, req); | ||
1295 | } else { | ||
1296 | /* Found a request to a cold extent. | ||
1297 | * Put on "pending" list, | ||
1298 | * and try to cumulate with more. */ | ||
1299 | list_add(&req->tl_requests, &pending); | ||
1300 | goto skip_fast_path; | ||
1301 | } | ||
1302 | } | ||
1276 | } | 1303 | } |
1277 | } | 1304 | } |
1278 | 1305 | ||
@@ -1326,23 +1353,35 @@ int drbd_merge_bvec(struct request_queue *q, struct bvec_merge_data *bvm, struct | |||
1326 | return limit; | 1353 | return limit; |
1327 | } | 1354 | } |
1328 | 1355 | ||
1329 | static struct drbd_request *find_oldest_request(struct drbd_connection *connection) | 1356 | static void find_oldest_requests( |
1357 | struct drbd_connection *connection, | ||
1358 | struct drbd_device *device, | ||
1359 | struct drbd_request **oldest_req_waiting_for_peer, | ||
1360 | struct drbd_request **oldest_req_waiting_for_disk) | ||
1330 | { | 1361 | { |
1331 | /* Walk the transfer log, | ||
1332 | * and find the oldest not yet completed request */ | ||
1333 | struct drbd_request *r; | 1362 | struct drbd_request *r; |
1363 | *oldest_req_waiting_for_peer = NULL; | ||
1364 | *oldest_req_waiting_for_disk = NULL; | ||
1334 | list_for_each_entry(r, &connection->transfer_log, tl_requests) { | 1365 | list_for_each_entry(r, &connection->transfer_log, tl_requests) { |
1335 | if (atomic_read(&r->completion_ref)) | 1366 | const unsigned s = r->rq_state; |
1336 | return r; | 1367 | if (!*oldest_req_waiting_for_peer |
1368 | && ((s & RQ_NET_MASK) && !(s & RQ_NET_DONE))) | ||
1369 | *oldest_req_waiting_for_peer = r; | ||
1370 | |||
1371 | if (!*oldest_req_waiting_for_disk | ||
1372 | && (s & RQ_LOCAL_PENDING) && r->device == device) | ||
1373 | *oldest_req_waiting_for_disk = r; | ||
1374 | |||
1375 | if (*oldest_req_waiting_for_peer && *oldest_req_waiting_for_disk) | ||
1376 | break; | ||
1337 | } | 1377 | } |
1338 | return NULL; | ||
1339 | } | 1378 | } |
1340 | 1379 | ||
1341 | void request_timer_fn(unsigned long data) | 1380 | void request_timer_fn(unsigned long data) |
1342 | { | 1381 | { |
1343 | struct drbd_device *device = (struct drbd_device *) data; | 1382 | struct drbd_device *device = (struct drbd_device *) data; |
1344 | struct drbd_connection *connection = first_peer_device(device)->connection; | 1383 | struct drbd_connection *connection = first_peer_device(device)->connection; |
1345 | struct drbd_request *req; /* oldest request */ | 1384 | struct drbd_request *req_disk, *req_peer; /* oldest request */ |
1346 | struct net_conf *nc; | 1385 | struct net_conf *nc; |
1347 | unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ | 1386 | unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */ |
1348 | unsigned long now; | 1387 | unsigned long now; |
@@ -1366,8 +1405,8 @@ void request_timer_fn(unsigned long data) | |||
1366 | now = jiffies; | 1405 | now = jiffies; |
1367 | 1406 | ||
1368 | spin_lock_irq(&device->resource->req_lock); | 1407 | spin_lock_irq(&device->resource->req_lock); |
1369 | req = find_oldest_request(connection); | 1408 | find_oldest_requests(connection, device, &req_peer, &req_disk); |
1370 | if (!req) { | 1409 | if (req_peer == NULL && req_disk == NULL) { |
1371 | spin_unlock_irq(&device->resource->req_lock); | 1410 | spin_unlock_irq(&device->resource->req_lock); |
1372 | mod_timer(&device->request_timer, now + et); | 1411 | mod_timer(&device->request_timer, now + et); |
1373 | return; | 1412 | return; |
@@ -1389,19 +1428,26 @@ void request_timer_fn(unsigned long data) | |||
1389 | * ~198 days with 250 HZ, we have a window where the timeout would need | 1428 | * ~198 days with 250 HZ, we have a window where the timeout would need |
1390 | * to expire twice (worst case) to become effective. Good enough. | 1429 | * to expire twice (worst case) to become effective. Good enough. |
1391 | */ | 1430 | */ |
1392 | if (ent && req->rq_state & RQ_NET_PENDING && | 1431 | if (ent && req_peer && |
1393 | time_after(now, req->start_time + ent) && | 1432 | time_after(now, req_peer->start_time + ent) && |
1394 | !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { | 1433 | !time_in_range(now, connection->last_reconnect_jif, connection->last_reconnect_jif + ent)) { |
1395 | drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); | 1434 | drbd_warn(device, "Remote failed to finish a request within ko-count * timeout\n"); |
1396 | _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); | 1435 | _drbd_set_state(_NS(device, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL); |
1397 | } | 1436 | } |
1398 | if (dt && req->rq_state & RQ_LOCAL_PENDING && req->device == device && | 1437 | if (dt && req_disk && |
1399 | time_after(now, req->start_time + dt) && | 1438 | time_after(now, req_disk->start_time + dt) && |
1400 | !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { | 1439 | !time_in_range(now, device->last_reattach_jif, device->last_reattach_jif + dt)) { |
1401 | drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); | 1440 | drbd_warn(device, "Local backing device failed to meet the disk-timeout\n"); |
1402 | __drbd_chk_io_error(device, DRBD_FORCE_DETACH); | 1441 | __drbd_chk_io_error(device, DRBD_FORCE_DETACH); |
1403 | } | 1442 | } |
1404 | nt = (time_after(now, req->start_time + et) ? now : req->start_time) + et; | 1443 | |
1444 | /* Reschedule timer for the nearest not already expired timeout. | ||
1445 | * Fallback to now + min(effective network timeout, disk timeout). */ | ||
1446 | ent = (ent && req_peer && time_before(now, req_peer->start_time + ent)) | ||
1447 | ? req_peer->start_time + ent : now + et; | ||
1448 | dt = (dt && req_disk && time_before(now, req_disk->start_time + dt)) | ||
1449 | ? req_disk->start_time + dt : now + et; | ||
1450 | nt = time_before(ent, dt) ? ent : dt; | ||
1405 | spin_unlock_irq(&connection->resource->req_lock); | 1451 | spin_unlock_irq(&connection->resource->req_lock); |
1406 | mod_timer(&device->request_timer, nt); | 1452 | mod_timer(&device->request_timer, nt); |
1407 | } | 1453 | } |
diff --git a/drivers/block/drbd/drbd_req.h b/drivers/block/drbd/drbd_req.h index c684c963538e..8566cd5866b4 100644 --- a/drivers/block/drbd/drbd_req.h +++ b/drivers/block/drbd/drbd_req.h | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/slab.h> | 30 | #include <linux/slab.h> |
31 | #include <linux/drbd.h> | 31 | #include <linux/drbd.h> |
32 | #include "drbd_int.h" | 32 | #include "drbd_int.h" |
33 | #include "drbd_wrappers.h" | ||
34 | 33 | ||
35 | /* The request callbacks will be called in irq context by the IDE drivers, | 34 | /* The request callbacks will be called in irq context by the IDE drivers, |
36 | and in Softirqs/Tasklets/BH context by the SCSI drivers, | 35 | and in Softirqs/Tasklets/BH context by the SCSI drivers, |
@@ -111,11 +110,14 @@ enum drbd_req_event { | |||
111 | BARRIER_ACKED, /* in protocol A and B */ | 110 | BARRIER_ACKED, /* in protocol A and B */ |
112 | DATA_RECEIVED, /* (remote read) */ | 111 | DATA_RECEIVED, /* (remote read) */ |
113 | 112 | ||
113 | COMPLETED_OK, | ||
114 | READ_COMPLETED_WITH_ERROR, | 114 | READ_COMPLETED_WITH_ERROR, |
115 | READ_AHEAD_COMPLETED_WITH_ERROR, | 115 | READ_AHEAD_COMPLETED_WITH_ERROR, |
116 | WRITE_COMPLETED_WITH_ERROR, | 116 | WRITE_COMPLETED_WITH_ERROR, |
117 | DISCARD_COMPLETED_NOTSUPP, | ||
118 | DISCARD_COMPLETED_WITH_ERROR, | ||
119 | |||
117 | ABORT_DISK_IO, | 120 | ABORT_DISK_IO, |
118 | COMPLETED_OK, | ||
119 | RESEND, | 121 | RESEND, |
120 | FAIL_FROZEN_DISK_IO, | 122 | FAIL_FROZEN_DISK_IO, |
121 | RESTART_FROZEN_DISK_IO, | 123 | RESTART_FROZEN_DISK_IO, |
diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index 1a84345a3868..a5d8aae00e04 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c | |||
@@ -54,8 +54,8 @@ static void after_state_ch(struct drbd_device *device, union drbd_state os, | |||
54 | static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state); | 54 | static enum drbd_state_rv is_valid_state(struct drbd_device *, union drbd_state); |
55 | static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *); | 55 | static enum drbd_state_rv is_valid_soft_transition(union drbd_state, union drbd_state, struct drbd_connection *); |
56 | static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); | 56 | static enum drbd_state_rv is_valid_transition(union drbd_state os, union drbd_state ns); |
57 | static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns, | 57 | static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os, |
58 | enum sanitize_state_warnings *warn); | 58 | union drbd_state ns, enum sanitize_state_warnings *warn); |
59 | 59 | ||
60 | static inline bool is_susp(union drbd_state s) | 60 | static inline bool is_susp(union drbd_state s) |
61 | { | 61 | { |
@@ -287,7 +287,7 @@ _req_st_cond(struct drbd_device *device, union drbd_state mask, | |||
287 | 287 | ||
288 | spin_lock_irqsave(&device->resource->req_lock, flags); | 288 | spin_lock_irqsave(&device->resource->req_lock, flags); |
289 | os = drbd_read_state(device); | 289 | os = drbd_read_state(device); |
290 | ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL); | 290 | ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL); |
291 | rv = is_valid_transition(os, ns); | 291 | rv = is_valid_transition(os, ns); |
292 | if (rv >= SS_SUCCESS) | 292 | if (rv >= SS_SUCCESS) |
293 | rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ | 293 | rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */ |
@@ -333,7 +333,7 @@ drbd_req_state(struct drbd_device *device, union drbd_state mask, | |||
333 | 333 | ||
334 | spin_lock_irqsave(&device->resource->req_lock, flags); | 334 | spin_lock_irqsave(&device->resource->req_lock, flags); |
335 | os = drbd_read_state(device); | 335 | os = drbd_read_state(device); |
336 | ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL); | 336 | ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL); |
337 | rv = is_valid_transition(os, ns); | 337 | rv = is_valid_transition(os, ns); |
338 | if (rv < SS_SUCCESS) { | 338 | if (rv < SS_SUCCESS) { |
339 | spin_unlock_irqrestore(&device->resource->req_lock, flags); | 339 | spin_unlock_irqrestore(&device->resource->req_lock, flags); |
@@ -740,8 +740,8 @@ static void print_sanitize_warnings(struct drbd_device *device, enum sanitize_st | |||
740 | * When we loose connection, we have to set the state of the peers disk (pdsk) | 740 | * When we loose connection, we have to set the state of the peers disk (pdsk) |
741 | * to D_UNKNOWN. This rule and many more along those lines are in this function. | 741 | * to D_UNKNOWN. This rule and many more along those lines are in this function. |
742 | */ | 742 | */ |
743 | static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state ns, | 743 | static union drbd_state sanitize_state(struct drbd_device *device, union drbd_state os, |
744 | enum sanitize_state_warnings *warn) | 744 | union drbd_state ns, enum sanitize_state_warnings *warn) |
745 | { | 745 | { |
746 | enum drbd_fencing_p fp; | 746 | enum drbd_fencing_p fp; |
747 | enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; | 747 | enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max; |
@@ -882,11 +882,13 @@ static union drbd_state sanitize_state(struct drbd_device *device, union drbd_st | |||
882 | } | 882 | } |
883 | 883 | ||
884 | if (fp == FP_STONITH && | 884 | if (fp == FP_STONITH && |
885 | (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED)) | 885 | (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) && |
886 | !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED)) | ||
886 | ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ | 887 | ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */ |
887 | 888 | ||
888 | if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO && | 889 | if (device->resource->res_opts.on_no_data == OND_SUSPEND_IO && |
889 | (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)) | 890 | (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) && |
891 | !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE)) | ||
890 | ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ | 892 | ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */ |
891 | 893 | ||
892 | if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { | 894 | if (ns.aftr_isp || ns.peer_isp || ns.user_isp) { |
@@ -958,7 +960,7 @@ __drbd_set_state(struct drbd_device *device, union drbd_state ns, | |||
958 | 960 | ||
959 | os = drbd_read_state(device); | 961 | os = drbd_read_state(device); |
960 | 962 | ||
961 | ns = sanitize_state(device, ns, &ssw); | 963 | ns = sanitize_state(device, os, ns, &ssw); |
962 | if (ns.i == os.i) | 964 | if (ns.i == os.i) |
963 | return SS_NOTHING_TO_DO; | 965 | return SS_NOTHING_TO_DO; |
964 | 966 | ||
@@ -1656,7 +1658,7 @@ conn_is_valid_transition(struct drbd_connection *connection, union drbd_state ma | |||
1656 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { | 1658 | idr_for_each_entry(&connection->peer_devices, peer_device, vnr) { |
1657 | struct drbd_device *device = peer_device->device; | 1659 | struct drbd_device *device = peer_device->device; |
1658 | os = drbd_read_state(device); | 1660 | os = drbd_read_state(device); |
1659 | ns = sanitize_state(device, apply_mask_val(os, mask, val), NULL); | 1661 | ns = sanitize_state(device, os, apply_mask_val(os, mask, val), NULL); |
1660 | 1662 | ||
1661 | if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) | 1663 | if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) |
1662 | ns.disk = os.disk; | 1664 | ns.disk = os.disk; |
@@ -1718,7 +1720,7 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union | |||
1718 | number_of_volumes++; | 1720 | number_of_volumes++; |
1719 | os = drbd_read_state(device); | 1721 | os = drbd_read_state(device); |
1720 | ns = apply_mask_val(os, mask, val); | 1722 | ns = apply_mask_val(os, mask, val); |
1721 | ns = sanitize_state(device, ns, NULL); | 1723 | ns = sanitize_state(device, os, ns, NULL); |
1722 | 1724 | ||
1723 | if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) | 1725 | if (flags & CS_IGN_OUTD_FAIL && ns.disk == D_OUTDATED && os.disk < D_OUTDATED) |
1724 | ns.disk = os.disk; | 1726 | ns.disk = os.disk; |
@@ -1763,19 +1765,19 @@ conn_set_state(struct drbd_connection *connection, union drbd_state mask, union | |||
1763 | static enum drbd_state_rv | 1765 | static enum drbd_state_rv |
1764 | _conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val) | 1766 | _conn_rq_cond(struct drbd_connection *connection, union drbd_state mask, union drbd_state val) |
1765 | { | 1767 | { |
1766 | enum drbd_state_rv rv; | 1768 | enum drbd_state_rv err, rv = SS_UNKNOWN_ERROR; /* continue waiting */; |
1767 | 1769 | ||
1768 | if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags)) | 1770 | if (test_and_clear_bit(CONN_WD_ST_CHG_OKAY, &connection->flags)) |
1769 | return SS_CW_SUCCESS; | 1771 | rv = SS_CW_SUCCESS; |
1770 | 1772 | ||
1771 | if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags)) | 1773 | if (test_and_clear_bit(CONN_WD_ST_CHG_FAIL, &connection->flags)) |
1772 | return SS_CW_FAILED_BY_PEER; | 1774 | rv = SS_CW_FAILED_BY_PEER; |
1773 | 1775 | ||
1774 | rv = conn_is_valid_transition(connection, mask, val, 0); | 1776 | err = conn_is_valid_transition(connection, mask, val, 0); |
1775 | if (rv == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS) | 1777 | if (err == SS_SUCCESS && connection->cstate == C_WF_REPORT_PARAMS) |
1776 | rv = SS_UNKNOWN_ERROR; /* continue waiting */ | 1778 | return rv; |
1777 | 1779 | ||
1778 | return rv; | 1780 | return err; |
1779 | } | 1781 | } |
1780 | 1782 | ||
1781 | enum drbd_state_rv | 1783 | enum drbd_state_rv |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index 2c4ce42c3657..d8f57b6305cd 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -118,7 +118,7 @@ static void drbd_endio_read_sec_final(struct drbd_peer_request *peer_req) __rele | |||
118 | 118 | ||
119 | /* writes on behalf of the partner, or resync writes, | 119 | /* writes on behalf of the partner, or resync writes, |
120 | * "submitted" by the receiver, final stage. */ | 120 | * "submitted" by the receiver, final stage. */ |
121 | static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) | 121 | void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __releases(local) |
122 | { | 122 | { |
123 | unsigned long flags = 0; | 123 | unsigned long flags = 0; |
124 | struct drbd_peer_device *peer_device = peer_req->peer_device; | 124 | struct drbd_peer_device *peer_device = peer_req->peer_device; |
@@ -150,7 +150,9 @@ static void drbd_endio_write_sec_final(struct drbd_peer_request *peer_req) __rel | |||
150 | 150 | ||
151 | do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); | 151 | do_wake = list_empty(block_id == ID_SYNCER ? &device->sync_ee : &device->active_ee); |
152 | 152 | ||
153 | if (test_bit(__EE_WAS_ERROR, &peer_req->flags)) | 153 | /* FIXME do we want to detach for failed REQ_DISCARD? |
154 | * ((peer_req->flags & (EE_WAS_ERROR|EE_IS_TRIM)) == EE_WAS_ERROR) */ | ||
155 | if (peer_req->flags & EE_WAS_ERROR) | ||
154 | __drbd_chk_io_error(device, DRBD_WRITE_ERROR); | 156 | __drbd_chk_io_error(device, DRBD_WRITE_ERROR); |
155 | spin_unlock_irqrestore(&device->resource->req_lock, flags); | 157 | spin_unlock_irqrestore(&device->resource->req_lock, flags); |
156 | 158 | ||
@@ -176,10 +178,12 @@ void drbd_peer_request_endio(struct bio *bio, int error) | |||
176 | struct drbd_device *device = peer_req->peer_device->device; | 178 | struct drbd_device *device = peer_req->peer_device->device; |
177 | int uptodate = bio_flagged(bio, BIO_UPTODATE); | 179 | int uptodate = bio_flagged(bio, BIO_UPTODATE); |
178 | int is_write = bio_data_dir(bio) == WRITE; | 180 | int is_write = bio_data_dir(bio) == WRITE; |
181 | int is_discard = !!(bio->bi_rw & REQ_DISCARD); | ||
179 | 182 | ||
180 | if (error && __ratelimit(&drbd_ratelimit_state)) | 183 | if (error && __ratelimit(&drbd_ratelimit_state)) |
181 | drbd_warn(device, "%s: error=%d s=%llus\n", | 184 | drbd_warn(device, "%s: error=%d s=%llus\n", |
182 | is_write ? "write" : "read", error, | 185 | is_write ? (is_discard ? "discard" : "write") |
186 | : "read", error, | ||
183 | (unsigned long long)peer_req->i.sector); | 187 | (unsigned long long)peer_req->i.sector); |
184 | if (!error && !uptodate) { | 188 | if (!error && !uptodate) { |
185 | if (__ratelimit(&drbd_ratelimit_state)) | 189 | if (__ratelimit(&drbd_ratelimit_state)) |
@@ -263,7 +267,12 @@ void drbd_request_endio(struct bio *bio, int error) | |||
263 | 267 | ||
264 | /* to avoid recursion in __req_mod */ | 268 | /* to avoid recursion in __req_mod */ |
265 | if (unlikely(error)) { | 269 | if (unlikely(error)) { |
266 | what = (bio_data_dir(bio) == WRITE) | 270 | if (bio->bi_rw & REQ_DISCARD) |
271 | what = (error == -EOPNOTSUPP) | ||
272 | ? DISCARD_COMPLETED_NOTSUPP | ||
273 | : DISCARD_COMPLETED_WITH_ERROR; | ||
274 | else | ||
275 | what = (bio_data_dir(bio) == WRITE) | ||
267 | ? WRITE_COMPLETED_WITH_ERROR | 276 | ? WRITE_COMPLETED_WITH_ERROR |
268 | : (bio_rw(bio) == READ) | 277 | : (bio_rw(bio) == READ) |
269 | ? READ_COMPLETED_WITH_ERROR | 278 | ? READ_COMPLETED_WITH_ERROR |
@@ -395,7 +404,7 @@ static int read_for_csum(struct drbd_peer_device *peer_device, sector_t sector, | |||
395 | /* GFP_TRY, because if there is no memory available right now, this may | 404 | /* GFP_TRY, because if there is no memory available right now, this may |
396 | * be rescheduled for later. It is "only" background resync, after all. */ | 405 | * be rescheduled for later. It is "only" background resync, after all. */ |
397 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, | 406 | peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER /* unused */, sector, |
398 | size, GFP_TRY); | 407 | size, true /* has real payload */, GFP_TRY); |
399 | if (!peer_req) | 408 | if (!peer_req) |
400 | goto defer; | 409 | goto defer; |
401 | 410 | ||
@@ -492,10 +501,9 @@ struct fifo_buffer *fifo_alloc(int fifo_size) | |||
492 | return fb; | 501 | return fb; |
493 | } | 502 | } |
494 | 503 | ||
495 | static int drbd_rs_controller(struct drbd_device *device) | 504 | static int drbd_rs_controller(struct drbd_device *device, unsigned int sect_in) |
496 | { | 505 | { |
497 | struct disk_conf *dc; | 506 | struct disk_conf *dc; |
498 | unsigned int sect_in; /* Number of sectors that came in since the last turn */ | ||
499 | unsigned int want; /* The number of sectors we want in the proxy */ | 507 | unsigned int want; /* The number of sectors we want in the proxy */ |
500 | int req_sect; /* Number of sectors to request in this turn */ | 508 | int req_sect; /* Number of sectors to request in this turn */ |
501 | int correction; /* Number of sectors more we need in the proxy*/ | 509 | int correction; /* Number of sectors more we need in the proxy*/ |
@@ -505,9 +513,6 @@ static int drbd_rs_controller(struct drbd_device *device) | |||
505 | int max_sect; | 513 | int max_sect; |
506 | struct fifo_buffer *plan; | 514 | struct fifo_buffer *plan; |
507 | 515 | ||
508 | sect_in = atomic_xchg(&device->rs_sect_in, 0); /* Number of sectors that came in */ | ||
509 | device->rs_in_flight -= sect_in; | ||
510 | |||
511 | dc = rcu_dereference(device->ldev->disk_conf); | 516 | dc = rcu_dereference(device->ldev->disk_conf); |
512 | plan = rcu_dereference(device->rs_plan_s); | 517 | plan = rcu_dereference(device->rs_plan_s); |
513 | 518 | ||
@@ -550,11 +555,16 @@ static int drbd_rs_controller(struct drbd_device *device) | |||
550 | 555 | ||
551 | static int drbd_rs_number_requests(struct drbd_device *device) | 556 | static int drbd_rs_number_requests(struct drbd_device *device) |
552 | { | 557 | { |
553 | int number; | 558 | unsigned int sect_in; /* Number of sectors that came in since the last turn */ |
559 | int number, mxb; | ||
560 | |||
561 | sect_in = atomic_xchg(&device->rs_sect_in, 0); | ||
562 | device->rs_in_flight -= sect_in; | ||
554 | 563 | ||
555 | rcu_read_lock(); | 564 | rcu_read_lock(); |
565 | mxb = drbd_get_max_buffers(device) / 2; | ||
556 | if (rcu_dereference(device->rs_plan_s)->size) { | 566 | if (rcu_dereference(device->rs_plan_s)->size) { |
557 | number = drbd_rs_controller(device) >> (BM_BLOCK_SHIFT - 9); | 567 | number = drbd_rs_controller(device, sect_in) >> (BM_BLOCK_SHIFT - 9); |
558 | device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; | 568 | device->c_sync_rate = number * HZ * (BM_BLOCK_SIZE / 1024) / SLEEP_TIME; |
559 | } else { | 569 | } else { |
560 | device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; | 570 | device->c_sync_rate = rcu_dereference(device->ldev->disk_conf)->resync_rate; |
@@ -562,8 +572,14 @@ static int drbd_rs_number_requests(struct drbd_device *device) | |||
562 | } | 572 | } |
563 | rcu_read_unlock(); | 573 | rcu_read_unlock(); |
564 | 574 | ||
565 | /* ignore the amount of pending requests, the resync controller should | 575 | /* Don't have more than "max-buffers"/2 in-flight. |
566 | * throttle down to incoming reply rate soon enough anyways. */ | 576 | * Otherwise we may cause the remote site to stall on drbd_alloc_pages(), |
577 | * potentially causing a distributed deadlock on congestion during | ||
578 | * online-verify or (checksum-based) resync, if max-buffers, | ||
579 | * socket buffer sizes and resync rate settings are mis-configured. */ | ||
580 | if (mxb - device->rs_in_flight < number) | ||
581 | number = mxb - device->rs_in_flight; | ||
582 | |||
567 | return number; | 583 | return number; |
568 | } | 584 | } |
569 | 585 | ||
@@ -597,7 +613,7 @@ static int make_resync_request(struct drbd_device *device, int cancel) | |||
597 | 613 | ||
598 | max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; | 614 | max_bio_size = queue_max_hw_sectors(device->rq_queue) << 9; |
599 | number = drbd_rs_number_requests(device); | 615 | number = drbd_rs_number_requests(device); |
600 | if (number == 0) | 616 | if (number <= 0) |
601 | goto requeue; | 617 | goto requeue; |
602 | 618 | ||
603 | for (i = 0; i < number; i++) { | 619 | for (i = 0; i < number; i++) { |
@@ -647,7 +663,7 @@ next_sector: | |||
647 | */ | 663 | */ |
648 | align = 1; | 664 | align = 1; |
649 | rollback_i = i; | 665 | rollback_i = i; |
650 | for (;;) { | 666 | while (i < number) { |
651 | if (size + BM_BLOCK_SIZE > max_bio_size) | 667 | if (size + BM_BLOCK_SIZE > max_bio_size) |
652 | break; | 668 | break; |
653 | 669 | ||
@@ -1670,11 +1686,15 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1670 | } | 1686 | } |
1671 | clear_bit(B_RS_H_DONE, &device->flags); | 1687 | clear_bit(B_RS_H_DONE, &device->flags); |
1672 | 1688 | ||
1673 | write_lock_irq(&global_state_lock); | 1689 | /* req_lock: serialize with drbd_send_and_submit() and others |
1690 | * global_state_lock: for stable sync-after dependencies */ | ||
1691 | spin_lock_irq(&device->resource->req_lock); | ||
1692 | write_lock(&global_state_lock); | ||
1674 | /* Did some connection breakage or IO error race with us? */ | 1693 | /* Did some connection breakage or IO error race with us? */ |
1675 | if (device->state.conn < C_CONNECTED | 1694 | if (device->state.conn < C_CONNECTED |
1676 | || !get_ldev_if_state(device, D_NEGOTIATING)) { | 1695 | || !get_ldev_if_state(device, D_NEGOTIATING)) { |
1677 | write_unlock_irq(&global_state_lock); | 1696 | write_unlock(&global_state_lock); |
1697 | spin_unlock_irq(&device->resource->req_lock); | ||
1678 | mutex_unlock(device->state_mutex); | 1698 | mutex_unlock(device->state_mutex); |
1679 | return; | 1699 | return; |
1680 | } | 1700 | } |
@@ -1714,7 +1734,8 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1714 | } | 1734 | } |
1715 | _drbd_pause_after(device); | 1735 | _drbd_pause_after(device); |
1716 | } | 1736 | } |
1717 | write_unlock_irq(&global_state_lock); | 1737 | write_unlock(&global_state_lock); |
1738 | spin_unlock_irq(&device->resource->req_lock); | ||
1718 | 1739 | ||
1719 | if (r == SS_SUCCESS) { | 1740 | if (r == SS_SUCCESS) { |
1720 | /* reset rs_last_bcast when a resync or verify is started, | 1741 | /* reset rs_last_bcast when a resync or verify is started, |
@@ -1778,34 +1799,6 @@ void drbd_start_resync(struct drbd_device *device, enum drbd_conns side) | |||
1778 | mutex_unlock(device->state_mutex); | 1799 | mutex_unlock(device->state_mutex); |
1779 | } | 1800 | } |
1780 | 1801 | ||
1781 | /* If the resource already closed the current epoch, but we did not | ||
1782 | * (because we have not yet seen new requests), we should send the | ||
1783 | * corresponding barrier now. Must be checked within the same spinlock | ||
1784 | * that is used to check for new requests. */ | ||
1785 | static bool need_to_send_barrier(struct drbd_connection *connection) | ||
1786 | { | ||
1787 | if (!connection->send.seen_any_write_yet) | ||
1788 | return false; | ||
1789 | |||
1790 | /* Skip barriers that do not contain any writes. | ||
1791 | * This may happen during AHEAD mode. */ | ||
1792 | if (!connection->send.current_epoch_writes) | ||
1793 | return false; | ||
1794 | |||
1795 | /* ->req_lock is held when requests are queued on | ||
1796 | * connection->sender_work, and put into ->transfer_log. | ||
1797 | * It is also held when ->current_tle_nr is increased. | ||
1798 | * So either there are already new requests queued, | ||
1799 | * and corresponding barriers will be send there. | ||
1800 | * Or nothing new is queued yet, so the difference will be 1. | ||
1801 | */ | ||
1802 | if (atomic_read(&connection->current_tle_nr) != | ||
1803 | connection->send.current_epoch_nr + 1) | ||
1804 | return false; | ||
1805 | |||
1806 | return true; | ||
1807 | } | ||
1808 | |||
1809 | static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) | 1802 | static bool dequeue_work_batch(struct drbd_work_queue *queue, struct list_head *work_list) |
1810 | { | 1803 | { |
1811 | spin_lock_irq(&queue->q_lock); | 1804 | spin_lock_irq(&queue->q_lock); |
@@ -1864,12 +1857,22 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head * | |||
1864 | spin_unlock_irq(&connection->resource->req_lock); | 1857 | spin_unlock_irq(&connection->resource->req_lock); |
1865 | break; | 1858 | break; |
1866 | } | 1859 | } |
1867 | send_barrier = need_to_send_barrier(connection); | 1860 | |
1861 | /* We found nothing new to do, no to-be-communicated request, | ||
1862 | * no other work item. We may still need to close the last | ||
1863 | * epoch. Next incoming request epoch will be connection -> | ||
1864 | * current transfer log epoch number. If that is different | ||
1865 | * from the epoch of the last request we communicated, it is | ||
1866 | * safe to send the epoch separating barrier now. | ||
1867 | */ | ||
1868 | send_barrier = | ||
1869 | atomic_read(&connection->current_tle_nr) != | ||
1870 | connection->send.current_epoch_nr; | ||
1868 | spin_unlock_irq(&connection->resource->req_lock); | 1871 | spin_unlock_irq(&connection->resource->req_lock); |
1869 | if (send_barrier) { | 1872 | |
1870 | drbd_send_barrier(connection); | 1873 | if (send_barrier) |
1871 | connection->send.current_epoch_nr++; | 1874 | maybe_send_barrier(connection, |
1872 | } | 1875 | connection->send.current_epoch_nr + 1); |
1873 | schedule(); | 1876 | schedule(); |
1874 | /* may be woken up for other things but new work, too, | 1877 | /* may be woken up for other things but new work, too, |
1875 | * e.g. if the current epoch got closed. | 1878 | * e.g. if the current epoch got closed. |
diff --git a/drivers/block/drbd/drbd_wrappers.h b/drivers/block/drbd/drbd_wrappers.h deleted file mode 100644 index 3db9ebaf64f6..000000000000 --- a/drivers/block/drbd/drbd_wrappers.h +++ /dev/null | |||
@@ -1,54 +0,0 @@ | |||
1 | #ifndef _DRBD_WRAPPERS_H | ||
2 | #define _DRBD_WRAPPERS_H | ||
3 | |||
4 | #include <linux/ctype.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include "drbd_int.h" | ||
7 | |||
8 | /* see get_sb_bdev and bd_claim */ | ||
9 | extern char *drbd_sec_holder; | ||
10 | |||
11 | /* sets the number of 512 byte sectors of our virtual device */ | ||
12 | static inline void drbd_set_my_capacity(struct drbd_device *device, | ||
13 | sector_t size) | ||
14 | { | ||
15 | /* set_capacity(device->this_bdev->bd_disk, size); */ | ||
16 | set_capacity(device->vdisk, size); | ||
17 | device->this_bdev->bd_inode->i_size = (loff_t)size << 9; | ||
18 | } | ||
19 | |||
20 | #define drbd_bio_uptodate(bio) bio_flagged(bio, BIO_UPTODATE) | ||
21 | |||
22 | /* bi_end_io handlers */ | ||
23 | extern void drbd_md_io_complete(struct bio *bio, int error); | ||
24 | extern void drbd_peer_request_endio(struct bio *bio, int error); | ||
25 | extern void drbd_request_endio(struct bio *bio, int error); | ||
26 | |||
27 | /* | ||
28 | * used to submit our private bio | ||
29 | */ | ||
30 | static inline void drbd_generic_make_request(struct drbd_device *device, | ||
31 | int fault_type, struct bio *bio) | ||
32 | { | ||
33 | __release(local); | ||
34 | if (!bio->bi_bdev) { | ||
35 | printk(KERN_ERR "drbd%d: drbd_generic_make_request: " | ||
36 | "bio->bi_bdev == NULL\n", | ||
37 | device_to_minor(device)); | ||
38 | dump_stack(); | ||
39 | bio_endio(bio, -ENODEV); | ||
40 | return; | ||
41 | } | ||
42 | |||
43 | if (drbd_insert_fault(device, fault_type)) | ||
44 | bio_endio(bio, -EIO); | ||
45 | else | ||
46 | generic_make_request(bio); | ||
47 | } | ||
48 | |||
49 | #ifndef __CHECKER__ | ||
50 | # undef __cond_lock | ||
51 | # define __cond_lock(x,c) (c) | ||
52 | #endif | ||
53 | |||
54 | #endif | ||
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 8f5565bf34cd..8e767bb7995e 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c | |||
@@ -2351,7 +2351,7 @@ static void rw_interrupt(void) | |||
2351 | } | 2351 | } |
2352 | 2352 | ||
2353 | if (CT(COMMAND) != FD_READ || | 2353 | if (CT(COMMAND) != FD_READ || |
2354 | raw_cmd->kernel_data == current_req->buffer) { | 2354 | raw_cmd->kernel_data == bio_data(current_req->bio)) { |
2355 | /* transfer directly from buffer */ | 2355 | /* transfer directly from buffer */ |
2356 | cont->done(1); | 2356 | cont->done(1); |
2357 | } else if (CT(COMMAND) == FD_READ) { | 2357 | } else if (CT(COMMAND) == FD_READ) { |
@@ -2640,7 +2640,7 @@ static int make_raw_rw_request(void) | |||
2640 | raw_cmd->flags &= ~FD_RAW_WRITE; | 2640 | raw_cmd->flags &= ~FD_RAW_WRITE; |
2641 | raw_cmd->flags |= FD_RAW_READ; | 2641 | raw_cmd->flags |= FD_RAW_READ; |
2642 | COMMAND = FM_MODE(_floppy, FD_READ); | 2642 | COMMAND = FM_MODE(_floppy, FD_READ); |
2643 | } else if ((unsigned long)current_req->buffer < MAX_DMA_ADDRESS) { | 2643 | } else if ((unsigned long)bio_data(current_req->bio) < MAX_DMA_ADDRESS) { |
2644 | unsigned long dma_limit; | 2644 | unsigned long dma_limit; |
2645 | int direct, indirect; | 2645 | int direct, indirect; |
2646 | 2646 | ||
@@ -2654,13 +2654,13 @@ static int make_raw_rw_request(void) | |||
2654 | */ | 2654 | */ |
2655 | max_size = buffer_chain_size(); | 2655 | max_size = buffer_chain_size(); |
2656 | dma_limit = (MAX_DMA_ADDRESS - | 2656 | dma_limit = (MAX_DMA_ADDRESS - |
2657 | ((unsigned long)current_req->buffer)) >> 9; | 2657 | ((unsigned long)bio_data(current_req->bio))) >> 9; |
2658 | if ((unsigned long)max_size > dma_limit) | 2658 | if ((unsigned long)max_size > dma_limit) |
2659 | max_size = dma_limit; | 2659 | max_size = dma_limit; |
2660 | /* 64 kb boundaries */ | 2660 | /* 64 kb boundaries */ |
2661 | if (CROSS_64KB(current_req->buffer, max_size << 9)) | 2661 | if (CROSS_64KB(bio_data(current_req->bio), max_size << 9)) |
2662 | max_size = (K_64 - | 2662 | max_size = (K_64 - |
2663 | ((unsigned long)current_req->buffer) % | 2663 | ((unsigned long)bio_data(current_req->bio)) % |
2664 | K_64) >> 9; | 2664 | K_64) >> 9; |
2665 | direct = transfer_size(ssize, max_sector, max_size) - fsector_t; | 2665 | direct = transfer_size(ssize, max_sector, max_size) - fsector_t; |
2666 | /* | 2666 | /* |
@@ -2677,7 +2677,7 @@ static int make_raw_rw_request(void) | |||
2677 | (DP->read_track & (1 << DRS->probed_format)))))) { | 2677 | (DP->read_track & (1 << DRS->probed_format)))))) { |
2678 | max_size = blk_rq_sectors(current_req); | 2678 | max_size = blk_rq_sectors(current_req); |
2679 | } else { | 2679 | } else { |
2680 | raw_cmd->kernel_data = current_req->buffer; | 2680 | raw_cmd->kernel_data = bio_data(current_req->bio); |
2681 | raw_cmd->length = current_count_sectors << 9; | 2681 | raw_cmd->length = current_count_sectors << 9; |
2682 | if (raw_cmd->length == 0) { | 2682 | if (raw_cmd->length == 0) { |
2683 | DPRINT("%s: zero dma transfer attempted\n", __func__); | 2683 | DPRINT("%s: zero dma transfer attempted\n", __func__); |
@@ -2731,7 +2731,7 @@ static int make_raw_rw_request(void) | |||
2731 | raw_cmd->length = ((raw_cmd->length - 1) | (ssize - 1)) + 1; | 2731 | raw_cmd->length = ((raw_cmd->length - 1) | (ssize - 1)) + 1; |
2732 | raw_cmd->length <<= 9; | 2732 | raw_cmd->length <<= 9; |
2733 | if ((raw_cmd->length < current_count_sectors << 9) || | 2733 | if ((raw_cmd->length < current_count_sectors << 9) || |
2734 | (raw_cmd->kernel_data != current_req->buffer && | 2734 | (raw_cmd->kernel_data != bio_data(current_req->bio) && |
2735 | CT(COMMAND) == FD_WRITE && | 2735 | CT(COMMAND) == FD_WRITE && |
2736 | (aligned_sector_t + (raw_cmd->length >> 9) > buffer_max || | 2736 | (aligned_sector_t + (raw_cmd->length >> 9) > buffer_max || |
2737 | aligned_sector_t < buffer_min)) || | 2737 | aligned_sector_t < buffer_min)) || |
@@ -2739,7 +2739,7 @@ static int make_raw_rw_request(void) | |||
2739 | raw_cmd->length <= 0 || current_count_sectors <= 0) { | 2739 | raw_cmd->length <= 0 || current_count_sectors <= 0) { |
2740 | DPRINT("fractionary current count b=%lx s=%lx\n", | 2740 | DPRINT("fractionary current count b=%lx s=%lx\n", |
2741 | raw_cmd->length, current_count_sectors); | 2741 | raw_cmd->length, current_count_sectors); |
2742 | if (raw_cmd->kernel_data != current_req->buffer) | 2742 | if (raw_cmd->kernel_data != bio_data(current_req->bio)) |
2743 | pr_info("addr=%d, length=%ld\n", | 2743 | pr_info("addr=%d, length=%ld\n", |
2744 | (int)((raw_cmd->kernel_data - | 2744 | (int)((raw_cmd->kernel_data - |
2745 | floppy_track_buffer) >> 9), | 2745 | floppy_track_buffer) >> 9), |
@@ -2756,7 +2756,7 @@ static int make_raw_rw_request(void) | |||
2756 | return 0; | 2756 | return 0; |
2757 | } | 2757 | } |
2758 | 2758 | ||
2759 | if (raw_cmd->kernel_data != current_req->buffer) { | 2759 | if (raw_cmd->kernel_data != bio_data(current_req->bio)) { |
2760 | if (raw_cmd->kernel_data < floppy_track_buffer || | 2760 | if (raw_cmd->kernel_data < floppy_track_buffer || |
2761 | current_count_sectors < 0 || | 2761 | current_count_sectors < 0 || |
2762 | raw_cmd->length < 0 || | 2762 | raw_cmd->length < 0 || |
@@ -3809,7 +3809,7 @@ static int __floppy_read_block_0(struct block_device *bdev, int drive) | |||
3809 | bio.bi_iter.bi_size = size; | 3809 | bio.bi_iter.bi_size = size; |
3810 | bio.bi_bdev = bdev; | 3810 | bio.bi_bdev = bdev; |
3811 | bio.bi_iter.bi_sector = 0; | 3811 | bio.bi_iter.bi_sector = 0; |
3812 | bio.bi_flags = (1 << BIO_QUIET); | 3812 | bio.bi_flags |= (1 << BIO_QUIET); |
3813 | bio.bi_private = &cbdata; | 3813 | bio.bi_private = &cbdata; |
3814 | bio.bi_end_io = floppy_rb0_cb; | 3814 | bio.bi_end_io = floppy_rb0_cb; |
3815 | 3815 | ||
diff --git a/drivers/block/hd.c b/drivers/block/hd.c index bf397bf108b7..8a290c08262f 100644 --- a/drivers/block/hd.c +++ b/drivers/block/hd.c | |||
@@ -464,11 +464,11 @@ static void read_intr(void) | |||
464 | 464 | ||
465 | ok_to_read: | 465 | ok_to_read: |
466 | req = hd_req; | 466 | req = hd_req; |
467 | insw(HD_DATA, req->buffer, 256); | 467 | insw(HD_DATA, bio_data(req->bio), 256); |
468 | #ifdef DEBUG | 468 | #ifdef DEBUG |
469 | printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", | 469 | printk("%s: read: sector %ld, remaining = %u, buffer=%p\n", |
470 | req->rq_disk->disk_name, blk_rq_pos(req) + 1, | 470 | req->rq_disk->disk_name, blk_rq_pos(req) + 1, |
471 | blk_rq_sectors(req) - 1, req->buffer+512); | 471 | blk_rq_sectors(req) - 1, bio_data(req->bio)+512); |
472 | #endif | 472 | #endif |
473 | if (hd_end_request(0, 512)) { | 473 | if (hd_end_request(0, 512)) { |
474 | SET_HANDLER(&read_intr); | 474 | SET_HANDLER(&read_intr); |
@@ -505,7 +505,7 @@ static void write_intr(void) | |||
505 | ok_to_write: | 505 | ok_to_write: |
506 | if (hd_end_request(0, 512)) { | 506 | if (hd_end_request(0, 512)) { |
507 | SET_HANDLER(&write_intr); | 507 | SET_HANDLER(&write_intr); |
508 | outsw(HD_DATA, req->buffer, 256); | 508 | outsw(HD_DATA, bio_data(req->bio), 256); |
509 | return; | 509 | return; |
510 | } | 510 | } |
511 | 511 | ||
@@ -624,7 +624,7 @@ repeat: | |||
624 | printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", | 624 | printk("%s: %sing: CHS=%d/%d/%d, sectors=%d, buffer=%p\n", |
625 | req->rq_disk->disk_name, | 625 | req->rq_disk->disk_name, |
626 | req_data_dir(req) == READ ? "read" : "writ", | 626 | req_data_dir(req) == READ ? "read" : "writ", |
627 | cyl, head, sec, nsect, req->buffer); | 627 | cyl, head, sec, nsect, bio_data(req->bio)); |
628 | #endif | 628 | #endif |
629 | if (req->cmd_type == REQ_TYPE_FS) { | 629 | if (req->cmd_type == REQ_TYPE_FS) { |
630 | switch (rq_data_dir(req)) { | 630 | switch (rq_data_dir(req)) { |
@@ -643,7 +643,7 @@ repeat: | |||
643 | bad_rw_intr(); | 643 | bad_rw_intr(); |
644 | goto repeat; | 644 | goto repeat; |
645 | } | 645 | } |
646 | outsw(HD_DATA, req->buffer, 256); | 646 | outsw(HD_DATA, bio_data(req->bio), 256); |
647 | break; | 647 | break; |
648 | default: | 648 | default: |
649 | printk("unknown hd-command\n"); | 649 | printk("unknown hd-command\n"); |
diff --git a/drivers/block/mg_disk.c b/drivers/block/mg_disk.c index eb59b1241366..e352cac707e8 100644 --- a/drivers/block/mg_disk.c +++ b/drivers/block/mg_disk.c | |||
@@ -479,7 +479,7 @@ static unsigned int mg_out(struct mg_host *host, | |||
479 | 479 | ||
480 | static void mg_read_one(struct mg_host *host, struct request *req) | 480 | static void mg_read_one(struct mg_host *host, struct request *req) |
481 | { | 481 | { |
482 | u16 *buff = (u16 *)req->buffer; | 482 | u16 *buff = (u16 *)bio_data(req->bio); |
483 | u32 i; | 483 | u32 i; |
484 | 484 | ||
485 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | 485 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) |
@@ -496,7 +496,7 @@ static void mg_read(struct request *req) | |||
496 | mg_bad_rw_intr(host); | 496 | mg_bad_rw_intr(host); |
497 | 497 | ||
498 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", | 498 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", |
499 | blk_rq_sectors(req), blk_rq_pos(req), req->buffer); | 499 | blk_rq_sectors(req), blk_rq_pos(req), bio_data(req->bio)); |
500 | 500 | ||
501 | do { | 501 | do { |
502 | if (mg_wait(host, ATA_DRQ, | 502 | if (mg_wait(host, ATA_DRQ, |
@@ -514,7 +514,7 @@ static void mg_read(struct request *req) | |||
514 | 514 | ||
515 | static void mg_write_one(struct mg_host *host, struct request *req) | 515 | static void mg_write_one(struct mg_host *host, struct request *req) |
516 | { | 516 | { |
517 | u16 *buff = (u16 *)req->buffer; | 517 | u16 *buff = (u16 *)bio_data(req->bio); |
518 | u32 i; | 518 | u32 i; |
519 | 519 | ||
520 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) | 520 | for (i = 0; i < MG_SECTOR_SIZE >> 1; i++) |
@@ -534,7 +534,7 @@ static void mg_write(struct request *req) | |||
534 | } | 534 | } |
535 | 535 | ||
536 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", | 536 | MG_DBG("requested %d sects (from %ld), buffer=0x%p\n", |
537 | rem, blk_rq_pos(req), req->buffer); | 537 | rem, blk_rq_pos(req), bio_data(req->bio)); |
538 | 538 | ||
539 | if (mg_wait(host, ATA_DRQ, | 539 | if (mg_wait(host, ATA_DRQ, |
540 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { | 540 | MG_TMAX_WAIT_WR_DRQ) != MG_ERR_NONE) { |
@@ -585,7 +585,7 @@ ok_to_read: | |||
585 | mg_read_one(host, req); | 585 | mg_read_one(host, req); |
586 | 586 | ||
587 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", | 587 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", |
588 | blk_rq_pos(req), blk_rq_sectors(req) - 1, req->buffer); | 588 | blk_rq_pos(req), blk_rq_sectors(req) - 1, bio_data(req->bio)); |
589 | 589 | ||
590 | /* send read confirm */ | 590 | /* send read confirm */ |
591 | outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); | 591 | outb(MG_CMD_RD_CONF, (unsigned long)host->dev_base + MG_REG_COMMAND); |
@@ -624,7 +624,7 @@ ok_to_write: | |||
624 | /* write 1 sector and set handler if remains */ | 624 | /* write 1 sector and set handler if remains */ |
625 | mg_write_one(host, req); | 625 | mg_write_one(host, req); |
626 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", | 626 | MG_DBG("sector %ld, remaining=%ld, buffer=0x%p\n", |
627 | blk_rq_pos(req), blk_rq_sectors(req), req->buffer); | 627 | blk_rq_pos(req), blk_rq_sectors(req), bio_data(req->bio)); |
628 | host->mg_do_intr = mg_write_intr; | 628 | host->mg_do_intr = mg_write_intr; |
629 | mod_timer(&host->timer, jiffies + 3 * HZ); | 629 | mod_timer(&host->timer, jiffies + 3 * HZ); |
630 | } | 630 | } |
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 59c5abe32f06..74abd49fabdc 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/module.h> | 31 | #include <linux/module.h> |
32 | #include <linux/genhd.h> | 32 | #include <linux/genhd.h> |
33 | #include <linux/blkdev.h> | 33 | #include <linux/blkdev.h> |
34 | #include <linux/blk-mq.h> | ||
34 | #include <linux/bio.h> | 35 | #include <linux/bio.h> |
35 | #include <linux/dma-mapping.h> | 36 | #include <linux/dma-mapping.h> |
36 | #include <linux/idr.h> | 37 | #include <linux/idr.h> |
@@ -173,60 +174,36 @@ static bool mtip_check_surprise_removal(struct pci_dev *pdev) | |||
173 | return false; /* device present */ | 174 | return false; /* device present */ |
174 | } | 175 | } |
175 | 176 | ||
176 | /* | 177 | static struct mtip_cmd *mtip_get_int_command(struct driver_data *dd) |
177 | * Obtain an empty command slot. | ||
178 | * | ||
179 | * This function needs to be reentrant since it could be called | ||
180 | * at the same time on multiple CPUs. The allocation of the | ||
181 | * command slot must be atomic. | ||
182 | * | ||
183 | * @port Pointer to the port data structure. | ||
184 | * | ||
185 | * return value | ||
186 | * >= 0 Index of command slot obtained. | ||
187 | * -1 No command slots available. | ||
188 | */ | ||
189 | static int get_slot(struct mtip_port *port) | ||
190 | { | 178 | { |
191 | int slot, i; | 179 | struct request *rq; |
192 | unsigned int num_command_slots = port->dd->slot_groups * 32; | ||
193 | 180 | ||
194 | /* | 181 | rq = blk_mq_alloc_request(dd->queue, 0, __GFP_WAIT, true); |
195 | * Try 10 times, because there is a small race here. | 182 | return blk_mq_rq_to_pdu(rq); |
196 | * that's ok, because it's still cheaper than a lock. | 183 | } |
197 | * | ||
198 | * Race: Since this section is not protected by lock, same bit | ||
199 | * could be chosen by different process contexts running in | ||
200 | * different processor. So instead of costly lock, we are going | ||
201 | * with loop. | ||
202 | */ | ||
203 | for (i = 0; i < 10; i++) { | ||
204 | slot = find_next_zero_bit(port->allocated, | ||
205 | num_command_slots, 1); | ||
206 | if ((slot < num_command_slots) && | ||
207 | (!test_and_set_bit(slot, port->allocated))) | ||
208 | return slot; | ||
209 | } | ||
210 | dev_warn(&port->dd->pdev->dev, "Failed to get a tag.\n"); | ||
211 | 184 | ||
212 | mtip_check_surprise_removal(port->dd->pdev); | 185 | static void mtip_put_int_command(struct driver_data *dd, struct mtip_cmd *cmd) |
213 | return -1; | 186 | { |
187 | blk_put_request(blk_mq_rq_from_pdu(cmd)); | ||
214 | } | 188 | } |
215 | 189 | ||
216 | /* | 190 | /* |
217 | * Release a command slot. | 191 | * Once we add support for one hctx per mtip group, this will change a bit |
218 | * | ||
219 | * @port Pointer to the port data structure. | ||
220 | * @tag Tag of command to release | ||
221 | * | ||
222 | * return value | ||
223 | * None | ||
224 | */ | 192 | */ |
225 | static inline void release_slot(struct mtip_port *port, int tag) | 193 | static struct request *mtip_rq_from_tag(struct driver_data *dd, |
194 | unsigned int tag) | ||
195 | { | ||
196 | struct blk_mq_hw_ctx *hctx = dd->queue->queue_hw_ctx[0]; | ||
197 | |||
198 | return blk_mq_tag_to_rq(hctx->tags, tag); | ||
199 | } | ||
200 | |||
201 | static struct mtip_cmd *mtip_cmd_from_tag(struct driver_data *dd, | ||
202 | unsigned int tag) | ||
226 | { | 203 | { |
227 | smp_mb__before_clear_bit(); | 204 | struct request *rq = mtip_rq_from_tag(dd, tag); |
228 | clear_bit(tag, port->allocated); | 205 | |
229 | smp_mb__after_clear_bit(); | 206 | return blk_mq_rq_to_pdu(rq); |
230 | } | 207 | } |
231 | 208 | ||
232 | /* | 209 | /* |
@@ -248,93 +225,28 @@ static inline void release_slot(struct mtip_port *port, int tag) | |||
248 | * None | 225 | * None |
249 | */ | 226 | */ |
250 | static void mtip_async_complete(struct mtip_port *port, | 227 | static void mtip_async_complete(struct mtip_port *port, |
251 | int tag, | 228 | int tag, struct mtip_cmd *cmd, int status) |
252 | void *data, | ||
253 | int status) | ||
254 | { | 229 | { |
255 | struct mtip_cmd *cmd; | 230 | struct driver_data *dd = port->dd; |
256 | struct driver_data *dd = data; | 231 | struct request *rq; |
257 | int unaligned, cb_status = status ? -EIO : 0; | ||
258 | void (*func)(void *, int); | ||
259 | 232 | ||
260 | if (unlikely(!dd) || unlikely(!port)) | 233 | if (unlikely(!dd) || unlikely(!port)) |
261 | return; | 234 | return; |
262 | 235 | ||
263 | cmd = &port->commands[tag]; | ||
264 | |||
265 | if (unlikely(status == PORT_IRQ_TF_ERR)) { | 236 | if (unlikely(status == PORT_IRQ_TF_ERR)) { |
266 | dev_warn(&port->dd->pdev->dev, | 237 | dev_warn(&port->dd->pdev->dev, |
267 | "Command tag %d failed due to TFE\n", tag); | 238 | "Command tag %d failed due to TFE\n", tag); |
268 | } | 239 | } |
269 | 240 | ||
270 | /* Clear the active flag */ | 241 | /* Unmap the DMA scatter list entries */ |
271 | atomic_set(&port->commands[tag].active, 0); | 242 | dma_unmap_sg(&dd->pdev->dev, cmd->sg, cmd->scatter_ents, cmd->direction); |
272 | |||
273 | /* Upper layer callback */ | ||
274 | func = cmd->async_callback; | ||
275 | if (likely(func && cmpxchg(&cmd->async_callback, func, 0) == func)) { | ||
276 | 243 | ||
277 | /* Unmap the DMA scatter list entries */ | 244 | rq = mtip_rq_from_tag(dd, tag); |
278 | dma_unmap_sg(&dd->pdev->dev, | ||
279 | cmd->sg, | ||
280 | cmd->scatter_ents, | ||
281 | cmd->direction); | ||
282 | 245 | ||
283 | func(cmd->async_data, cb_status); | 246 | if (unlikely(cmd->unaligned)) |
284 | unaligned = cmd->unaligned; | 247 | up(&port->cmd_slot_unal); |
285 | 248 | ||
286 | /* Clear the allocated bit for the command */ | 249 | blk_mq_end_io(rq, status ? -EIO : 0); |
287 | release_slot(port, tag); | ||
288 | |||
289 | if (unlikely(unaligned)) | ||
290 | up(&port->cmd_slot_unal); | ||
291 | else | ||
292 | up(&port->cmd_slot); | ||
293 | } | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * This function is called for clean the pending command in the | ||
298 | * command slot during the surprise removal of device and return | ||
299 | * error to the upper layer. | ||
300 | * | ||
301 | * @dd Pointer to the DRIVER_DATA structure. | ||
302 | * | ||
303 | * return value | ||
304 | * None | ||
305 | */ | ||
306 | static void mtip_command_cleanup(struct driver_data *dd) | ||
307 | { | ||
308 | int tag = 0; | ||
309 | struct mtip_cmd *cmd; | ||
310 | struct mtip_port *port = dd->port; | ||
311 | unsigned int num_cmd_slots = dd->slot_groups * 32; | ||
312 | |||
313 | if (!test_bit(MTIP_DDF_INIT_DONE_BIT, &dd->dd_flag)) | ||
314 | return; | ||
315 | |||
316 | if (!port) | ||
317 | return; | ||
318 | |||
319 | cmd = &port->commands[MTIP_TAG_INTERNAL]; | ||
320 | if (atomic_read(&cmd->active)) | ||
321 | if (readl(port->cmd_issue[MTIP_TAG_INTERNAL]) & | ||
322 | (1 << MTIP_TAG_INTERNAL)) | ||
323 | if (cmd->comp_func) | ||
324 | cmd->comp_func(port, MTIP_TAG_INTERNAL, | ||
325 | cmd->comp_data, -ENODEV); | ||
326 | |||
327 | while (1) { | ||
328 | tag = find_next_bit(port->allocated, num_cmd_slots, tag); | ||
329 | if (tag >= num_cmd_slots) | ||
330 | break; | ||
331 | |||
332 | cmd = &port->commands[tag]; | ||
333 | if (atomic_read(&cmd->active)) | ||
334 | mtip_async_complete(port, tag, dd, -ENODEV); | ||
335 | } | ||
336 | |||
337 | set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag); | ||
338 | } | 250 | } |
339 | 251 | ||
340 | /* | 252 | /* |
@@ -388,8 +300,6 @@ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) | |||
388 | { | 300 | { |
389 | int group = tag >> 5; | 301 | int group = tag >> 5; |
390 | 302 | ||
391 | atomic_set(&port->commands[tag].active, 1); | ||
392 | |||
393 | /* guard SACT and CI registers */ | 303 | /* guard SACT and CI registers */ |
394 | spin_lock(&port->cmd_issue_lock[group]); | 304 | spin_lock(&port->cmd_issue_lock[group]); |
395 | writel((1 << MTIP_TAG_BIT(tag)), | 305 | writel((1 << MTIP_TAG_BIT(tag)), |
@@ -397,10 +307,6 @@ static inline void mtip_issue_ncq_command(struct mtip_port *port, int tag) | |||
397 | writel((1 << MTIP_TAG_BIT(tag)), | 307 | writel((1 << MTIP_TAG_BIT(tag)), |
398 | port->cmd_issue[MTIP_TAG_INDEX(tag)]); | 308 | port->cmd_issue[MTIP_TAG_INDEX(tag)]); |
399 | spin_unlock(&port->cmd_issue_lock[group]); | 309 | spin_unlock(&port->cmd_issue_lock[group]); |
400 | |||
401 | /* Set the command's timeout value.*/ | ||
402 | port->commands[tag].comp_time = jiffies + msecs_to_jiffies( | ||
403 | MTIP_NCQ_COMMAND_TIMEOUT_MS); | ||
404 | } | 310 | } |
405 | 311 | ||
406 | /* | 312 | /* |
@@ -648,132 +554,13 @@ static void print_tags(struct driver_data *dd, | |||
648 | 554 | ||
649 | memset(tagmap, 0, sizeof(tagmap)); | 555 | memset(tagmap, 0, sizeof(tagmap)); |
650 | for (group = SLOTBITS_IN_LONGS; group > 0; group--) | 556 | for (group = SLOTBITS_IN_LONGS; group > 0; group--) |
651 | tagmap_len = sprintf(tagmap + tagmap_len, "%016lX ", | 557 | tagmap_len += sprintf(tagmap + tagmap_len, "%016lX ", |
652 | tagbits[group-1]); | 558 | tagbits[group-1]); |
653 | dev_warn(&dd->pdev->dev, | 559 | dev_warn(&dd->pdev->dev, |
654 | "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap); | 560 | "%d command(s) %s: tagmap [%s]", cnt, msg, tagmap); |
655 | } | 561 | } |
656 | 562 | ||
657 | /* | 563 | /* |
658 | * Called periodically to see if any read/write commands are | ||
659 | * taking too long to complete. | ||
660 | * | ||
661 | * @data Pointer to the PORT data structure. | ||
662 | * | ||
663 | * return value | ||
664 | * None | ||
665 | */ | ||
666 | static void mtip_timeout_function(unsigned long int data) | ||
667 | { | ||
668 | struct mtip_port *port = (struct mtip_port *) data; | ||
669 | struct host_to_dev_fis *fis; | ||
670 | struct mtip_cmd *cmd; | ||
671 | int unaligned, tag, cmdto_cnt = 0; | ||
672 | unsigned int bit, group; | ||
673 | unsigned int num_command_slots; | ||
674 | unsigned long to, tagaccum[SLOTBITS_IN_LONGS]; | ||
675 | void (*func)(void *, int); | ||
676 | |||
677 | if (unlikely(!port)) | ||
678 | return; | ||
679 | |||
680 | if (unlikely(port->dd->sr)) | ||
681 | return; | ||
682 | |||
683 | if (test_bit(MTIP_DDF_RESUME_BIT, &port->dd->dd_flag)) { | ||
684 | mod_timer(&port->cmd_timer, | ||
685 | jiffies + msecs_to_jiffies(30000)); | ||
686 | return; | ||
687 | } | ||
688 | /* clear the tag accumulator */ | ||
689 | memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long)); | ||
690 | num_command_slots = port->dd->slot_groups * 32; | ||
691 | |||
692 | for (tag = 0; tag < num_command_slots; tag++) { | ||
693 | /* | ||
694 | * Skip internal command slot as it has | ||
695 | * its own timeout mechanism | ||
696 | */ | ||
697 | if (tag == MTIP_TAG_INTERNAL) | ||
698 | continue; | ||
699 | |||
700 | if (atomic_read(&port->commands[tag].active) && | ||
701 | (time_after(jiffies, port->commands[tag].comp_time))) { | ||
702 | group = tag >> 5; | ||
703 | bit = tag & 0x1F; | ||
704 | |||
705 | cmd = &port->commands[tag]; | ||
706 | fis = (struct host_to_dev_fis *) cmd->command; | ||
707 | |||
708 | set_bit(tag, tagaccum); | ||
709 | cmdto_cnt++; | ||
710 | if (cmdto_cnt == 1) | ||
711 | set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); | ||
712 | |||
713 | /* | ||
714 | * Clear the completed bit. This should prevent | ||
715 | * any interrupt handlers from trying to retire | ||
716 | * the command. | ||
717 | */ | ||
718 | writel(1 << bit, port->completed[group]); | ||
719 | |||
720 | /* Clear the active flag for the command */ | ||
721 | atomic_set(&port->commands[tag].active, 0); | ||
722 | |||
723 | func = cmd->async_callback; | ||
724 | if (func && | ||
725 | cmpxchg(&cmd->async_callback, func, 0) == func) { | ||
726 | |||
727 | /* Unmap the DMA scatter list entries */ | ||
728 | dma_unmap_sg(&port->dd->pdev->dev, | ||
729 | cmd->sg, | ||
730 | cmd->scatter_ents, | ||
731 | cmd->direction); | ||
732 | |||
733 | func(cmd->async_data, -EIO); | ||
734 | unaligned = cmd->unaligned; | ||
735 | |||
736 | /* Clear the allocated bit for the command. */ | ||
737 | release_slot(port, tag); | ||
738 | |||
739 | if (unaligned) | ||
740 | up(&port->cmd_slot_unal); | ||
741 | else | ||
742 | up(&port->cmd_slot); | ||
743 | } | ||
744 | } | ||
745 | } | ||
746 | |||
747 | if (cmdto_cnt) { | ||
748 | print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); | ||
749 | if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { | ||
750 | mtip_device_reset(port->dd); | ||
751 | wake_up_interruptible(&port->svc_wait); | ||
752 | } | ||
753 | clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); | ||
754 | } | ||
755 | |||
756 | if (port->ic_pause_timer) { | ||
757 | to = port->ic_pause_timer + msecs_to_jiffies(1000); | ||
758 | if (time_after(jiffies, to)) { | ||
759 | if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { | ||
760 | port->ic_pause_timer = 0; | ||
761 | clear_bit(MTIP_PF_SE_ACTIVE_BIT, &port->flags); | ||
762 | clear_bit(MTIP_PF_DM_ACTIVE_BIT, &port->flags); | ||
763 | clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); | ||
764 | wake_up_interruptible(&port->svc_wait); | ||
765 | } | ||
766 | |||
767 | |||
768 | } | ||
769 | } | ||
770 | |||
771 | /* Restart the timer */ | ||
772 | mod_timer(&port->cmd_timer, | ||
773 | jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); | ||
774 | } | ||
775 | |||
776 | /* | ||
777 | * Internal command completion callback function. | 564 | * Internal command completion callback function. |
778 | * | 565 | * |
779 | * This function is normally called by the driver ISR when an internal | 566 | * This function is normally called by the driver ISR when an internal |
@@ -789,28 +576,19 @@ static void mtip_timeout_function(unsigned long int data) | |||
789 | * None | 576 | * None |
790 | */ | 577 | */ |
791 | static void mtip_completion(struct mtip_port *port, | 578 | static void mtip_completion(struct mtip_port *port, |
792 | int tag, | 579 | int tag, struct mtip_cmd *command, int status) |
793 | void *data, | ||
794 | int status) | ||
795 | { | 580 | { |
796 | struct mtip_cmd *command = &port->commands[tag]; | 581 | struct completion *waiting = command->comp_data; |
797 | struct completion *waiting = data; | ||
798 | if (unlikely(status == PORT_IRQ_TF_ERR)) | 582 | if (unlikely(status == PORT_IRQ_TF_ERR)) |
799 | dev_warn(&port->dd->pdev->dev, | 583 | dev_warn(&port->dd->pdev->dev, |
800 | "Internal command %d completed with TFE\n", tag); | 584 | "Internal command %d completed with TFE\n", tag); |
801 | 585 | ||
802 | command->async_callback = NULL; | ||
803 | command->comp_func = NULL; | ||
804 | |||
805 | complete(waiting); | 586 | complete(waiting); |
806 | } | 587 | } |
807 | 588 | ||
808 | static void mtip_null_completion(struct mtip_port *port, | 589 | static void mtip_null_completion(struct mtip_port *port, |
809 | int tag, | 590 | int tag, struct mtip_cmd *command, int status) |
810 | void *data, | ||
811 | int status) | ||
812 | { | 591 | { |
813 | return; | ||
814 | } | 592 | } |
815 | 593 | ||
816 | static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, | 594 | static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, |
@@ -842,19 +620,16 @@ static void mtip_handle_tfe(struct driver_data *dd) | |||
842 | 620 | ||
843 | port = dd->port; | 621 | port = dd->port; |
844 | 622 | ||
845 | /* Stop the timer to prevent command timeouts. */ | ||
846 | del_timer(&port->cmd_timer); | ||
847 | set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); | 623 | set_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); |
848 | 624 | ||
849 | if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && | 625 | if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && |
850 | test_bit(MTIP_TAG_INTERNAL, port->allocated)) { | 626 | test_bit(MTIP_TAG_INTERNAL, port->allocated)) { |
851 | cmd = &port->commands[MTIP_TAG_INTERNAL]; | 627 | cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); |
852 | dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); | 628 | dbg_printk(MTIP_DRV_NAME " TFE for the internal command\n"); |
853 | 629 | ||
854 | atomic_inc(&cmd->active); /* active > 1 indicates error */ | ||
855 | if (cmd->comp_data && cmd->comp_func) { | 630 | if (cmd->comp_data && cmd->comp_func) { |
856 | cmd->comp_func(port, MTIP_TAG_INTERNAL, | 631 | cmd->comp_func(port, MTIP_TAG_INTERNAL, |
857 | cmd->comp_data, PORT_IRQ_TF_ERR); | 632 | cmd, PORT_IRQ_TF_ERR); |
858 | } | 633 | } |
859 | goto handle_tfe_exit; | 634 | goto handle_tfe_exit; |
860 | } | 635 | } |
@@ -866,6 +641,8 @@ static void mtip_handle_tfe(struct driver_data *dd) | |||
866 | for (group = 0; group < dd->slot_groups; group++) { | 641 | for (group = 0; group < dd->slot_groups; group++) { |
867 | completed = readl(port->completed[group]); | 642 | completed = readl(port->completed[group]); |
868 | 643 | ||
644 | dev_warn(&dd->pdev->dev, "g=%u, comp=%x\n", group, completed); | ||
645 | |||
869 | /* clear completed status register in the hardware.*/ | 646 | /* clear completed status register in the hardware.*/ |
870 | writel(completed, port->completed[group]); | 647 | writel(completed, port->completed[group]); |
871 | 648 | ||
@@ -879,15 +656,11 @@ static void mtip_handle_tfe(struct driver_data *dd) | |||
879 | if (tag == MTIP_TAG_INTERNAL) | 656 | if (tag == MTIP_TAG_INTERNAL) |
880 | continue; | 657 | continue; |
881 | 658 | ||
882 | cmd = &port->commands[tag]; | 659 | cmd = mtip_cmd_from_tag(dd, tag); |
883 | if (likely(cmd->comp_func)) { | 660 | if (likely(cmd->comp_func)) { |
884 | set_bit(tag, tagaccum); | 661 | set_bit(tag, tagaccum); |
885 | cmd_cnt++; | 662 | cmd_cnt++; |
886 | atomic_set(&cmd->active, 0); | 663 | cmd->comp_func(port, tag, cmd, 0); |
887 | cmd->comp_func(port, | ||
888 | tag, | ||
889 | cmd->comp_data, | ||
890 | 0); | ||
891 | } else { | 664 | } else { |
892 | dev_err(&port->dd->pdev->dev, | 665 | dev_err(&port->dd->pdev->dev, |
893 | "Missing completion func for tag %d", | 666 | "Missing completion func for tag %d", |
@@ -947,11 +720,7 @@ static void mtip_handle_tfe(struct driver_data *dd) | |||
947 | for (bit = 0; bit < 32; bit++) { | 720 | for (bit = 0; bit < 32; bit++) { |
948 | reissue = 1; | 721 | reissue = 1; |
949 | tag = (group << 5) + bit; | 722 | tag = (group << 5) + bit; |
950 | cmd = &port->commands[tag]; | 723 | cmd = mtip_cmd_from_tag(dd, tag); |
951 | |||
952 | /* If the active bit is set re-issue the command */ | ||
953 | if (atomic_read(&cmd->active) == 0) | ||
954 | continue; | ||
955 | 724 | ||
956 | fis = (struct host_to_dev_fis *)cmd->command; | 725 | fis = (struct host_to_dev_fis *)cmd->command; |
957 | 726 | ||
@@ -970,11 +739,9 @@ static void mtip_handle_tfe(struct driver_data *dd) | |||
970 | tag, | 739 | tag, |
971 | fail_reason != NULL ? | 740 | fail_reason != NULL ? |
972 | fail_reason : "unknown"); | 741 | fail_reason : "unknown"); |
973 | atomic_set(&cmd->active, 0); | ||
974 | if (cmd->comp_func) { | 742 | if (cmd->comp_func) { |
975 | cmd->comp_func(port, tag, | 743 | cmd->comp_func(port, tag, |
976 | cmd->comp_data, | 744 | cmd, -ENODATA); |
977 | -ENODATA); | ||
978 | } | 745 | } |
979 | continue; | 746 | continue; |
980 | } | 747 | } |
@@ -997,14 +764,9 @@ static void mtip_handle_tfe(struct driver_data *dd) | |||
997 | /* Retire a command that will not be reissued */ | 764 | /* Retire a command that will not be reissued */ |
998 | dev_warn(&port->dd->pdev->dev, | 765 | dev_warn(&port->dd->pdev->dev, |
999 | "retiring tag %d\n", tag); | 766 | "retiring tag %d\n", tag); |
1000 | atomic_set(&cmd->active, 0); | ||
1001 | 767 | ||
1002 | if (cmd->comp_func) | 768 | if (cmd->comp_func) |
1003 | cmd->comp_func( | 769 | cmd->comp_func(port, tag, cmd, PORT_IRQ_TF_ERR); |
1004 | port, | ||
1005 | tag, | ||
1006 | cmd->comp_data, | ||
1007 | PORT_IRQ_TF_ERR); | ||
1008 | else | 770 | else |
1009 | dev_warn(&port->dd->pdev->dev, | 771 | dev_warn(&port->dd->pdev->dev, |
1010 | "Bad completion for tag %d\n", | 772 | "Bad completion for tag %d\n", |
@@ -1017,9 +779,6 @@ handle_tfe_exit: | |||
1017 | /* clear eh_active */ | 779 | /* clear eh_active */ |
1018 | clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); | 780 | clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); |
1019 | wake_up_interruptible(&port->svc_wait); | 781 | wake_up_interruptible(&port->svc_wait); |
1020 | |||
1021 | mod_timer(&port->cmd_timer, | ||
1022 | jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); | ||
1023 | } | 782 | } |
1024 | 783 | ||
1025 | /* | 784 | /* |
@@ -1048,15 +807,10 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, | |||
1048 | if (unlikely(tag == MTIP_TAG_INTERNAL)) | 807 | if (unlikely(tag == MTIP_TAG_INTERNAL)) |
1049 | continue; | 808 | continue; |
1050 | 809 | ||
1051 | command = &port->commands[tag]; | 810 | command = mtip_cmd_from_tag(dd, tag); |
1052 | /* make internal callback */ | 811 | if (likely(command->comp_func)) |
1053 | if (likely(command->comp_func)) { | 812 | command->comp_func(port, tag, command, 0); |
1054 | command->comp_func( | 813 | else { |
1055 | port, | ||
1056 | tag, | ||
1057 | command->comp_data, | ||
1058 | 0); | ||
1059 | } else { | ||
1060 | dev_dbg(&dd->pdev->dev, | 814 | dev_dbg(&dd->pdev->dev, |
1061 | "Null completion for tag %d", | 815 | "Null completion for tag %d", |
1062 | tag); | 816 | tag); |
@@ -1081,16 +835,13 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group, | |||
1081 | static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) | 835 | static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) |
1082 | { | 836 | { |
1083 | struct mtip_port *port = dd->port; | 837 | struct mtip_port *port = dd->port; |
1084 | struct mtip_cmd *cmd = &port->commands[MTIP_TAG_INTERNAL]; | 838 | struct mtip_cmd *cmd = mtip_cmd_from_tag(dd, MTIP_TAG_INTERNAL); |
1085 | 839 | ||
1086 | if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && | 840 | if (test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags) && |
1087 | (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) | 841 | (cmd != NULL) && !(readl(port->cmd_issue[MTIP_TAG_INTERNAL]) |
1088 | & (1 << MTIP_TAG_INTERNAL))) { | 842 | & (1 << MTIP_TAG_INTERNAL))) { |
1089 | if (cmd->comp_func) { | 843 | if (cmd->comp_func) { |
1090 | cmd->comp_func(port, | 844 | cmd->comp_func(port, MTIP_TAG_INTERNAL, cmd, 0); |
1091 | MTIP_TAG_INTERNAL, | ||
1092 | cmd->comp_data, | ||
1093 | 0); | ||
1094 | return; | 845 | return; |
1095 | } | 846 | } |
1096 | } | 847 | } |
@@ -1103,8 +854,6 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat) | |||
1103 | */ | 854 | */ |
1104 | static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) | 855 | static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) |
1105 | { | 856 | { |
1106 | if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) | ||
1107 | mtip_handle_tfe(dd); | ||
1108 | 857 | ||
1109 | if (unlikely(port_stat & PORT_IRQ_CONNECT)) { | 858 | if (unlikely(port_stat & PORT_IRQ_CONNECT)) { |
1110 | dev_warn(&dd->pdev->dev, | 859 | dev_warn(&dd->pdev->dev, |
@@ -1122,6 +871,12 @@ static inline void mtip_process_errors(struct driver_data *dd, u32 port_stat) | |||
1122 | dev_warn(&dd->pdev->dev, | 871 | dev_warn(&dd->pdev->dev, |
1123 | "Port stat errors %x unhandled\n", | 872 | "Port stat errors %x unhandled\n", |
1124 | (port_stat & ~PORT_IRQ_HANDLED)); | 873 | (port_stat & ~PORT_IRQ_HANDLED)); |
874 | if (mtip_check_surprise_removal(dd->pdev)) | ||
875 | return; | ||
876 | } | ||
877 | if (likely(port_stat & (PORT_IRQ_TF_ERR | PORT_IRQ_IF_ERR))) { | ||
878 | set_bit(MTIP_PF_EH_ACTIVE_BIT, &dd->port->flags); | ||
879 | wake_up_interruptible(&dd->port->svc_wait); | ||
1125 | } | 880 | } |
1126 | } | 881 | } |
1127 | 882 | ||
@@ -1222,7 +977,6 @@ static irqreturn_t mtip_irq_handler(int irq, void *instance) | |||
1222 | 977 | ||
1223 | static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) | 978 | static void mtip_issue_non_ncq_command(struct mtip_port *port, int tag) |
1224 | { | 979 | { |
1225 | atomic_set(&port->commands[tag].active, 1); | ||
1226 | writel(1 << MTIP_TAG_BIT(tag), | 980 | writel(1 << MTIP_TAG_BIT(tag), |
1227 | port->cmd_issue[MTIP_TAG_INDEX(tag)]); | 981 | port->cmd_issue[MTIP_TAG_INDEX(tag)]); |
1228 | } | 982 | } |
@@ -1280,6 +1034,8 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) | |||
1280 | unsigned int n; | 1034 | unsigned int n; |
1281 | unsigned int active = 1; | 1035 | unsigned int active = 1; |
1282 | 1036 | ||
1037 | blk_mq_stop_hw_queues(port->dd->queue); | ||
1038 | |||
1283 | to = jiffies + msecs_to_jiffies(timeout); | 1039 | to = jiffies + msecs_to_jiffies(timeout); |
1284 | do { | 1040 | do { |
1285 | if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) && | 1041 | if (test_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags) && |
@@ -1287,8 +1043,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) | |||
1287 | msleep(20); | 1043 | msleep(20); |
1288 | continue; /* svc thd is actively issuing commands */ | 1044 | continue; /* svc thd is actively issuing commands */ |
1289 | } | 1045 | } |
1046 | |||
1047 | msleep(100); | ||
1048 | if (mtip_check_surprise_removal(port->dd->pdev)) | ||
1049 | goto err_fault; | ||
1290 | if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) | 1050 | if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &port->dd->dd_flag)) |
1291 | return -EFAULT; | 1051 | goto err_fault; |
1052 | |||
1292 | /* | 1053 | /* |
1293 | * Ignore s_active bit 0 of array element 0. | 1054 | * Ignore s_active bit 0 of array element 0. |
1294 | * This bit will always be set | 1055 | * This bit will always be set |
@@ -1299,11 +1060,13 @@ static int mtip_quiesce_io(struct mtip_port *port, unsigned long timeout) | |||
1299 | 1060 | ||
1300 | if (!active) | 1061 | if (!active) |
1301 | break; | 1062 | break; |
1302 | |||
1303 | msleep(20); | ||
1304 | } while (time_before(jiffies, to)); | 1063 | } while (time_before(jiffies, to)); |
1305 | 1064 | ||
1065 | blk_mq_start_stopped_hw_queues(port->dd->queue, true); | ||
1306 | return active ? -EBUSY : 0; | 1066 | return active ? -EBUSY : 0; |
1067 | err_fault: | ||
1068 | blk_mq_start_stopped_hw_queues(port->dd->queue, true); | ||
1069 | return -EFAULT; | ||
1307 | } | 1070 | } |
1308 | 1071 | ||
1309 | /* | 1072 | /* |
@@ -1335,10 +1098,9 @@ static int mtip_exec_internal_command(struct mtip_port *port, | |||
1335 | { | 1098 | { |
1336 | struct mtip_cmd_sg *command_sg; | 1099 | struct mtip_cmd_sg *command_sg; |
1337 | DECLARE_COMPLETION_ONSTACK(wait); | 1100 | DECLARE_COMPLETION_ONSTACK(wait); |
1338 | int rv = 0, ready2go = 1; | 1101 | struct mtip_cmd *int_cmd; |
1339 | struct mtip_cmd *int_cmd = &port->commands[MTIP_TAG_INTERNAL]; | ||
1340 | unsigned long to; | ||
1341 | struct driver_data *dd = port->dd; | 1102 | struct driver_data *dd = port->dd; |
1103 | int rv = 0; | ||
1342 | 1104 | ||
1343 | /* Make sure the buffer is 8 byte aligned. This is asic specific. */ | 1105 | /* Make sure the buffer is 8 byte aligned. This is asic specific. */ |
1344 | if (buffer & 0x00000007) { | 1106 | if (buffer & 0x00000007) { |
@@ -1346,19 +1108,8 @@ static int mtip_exec_internal_command(struct mtip_port *port, | |||
1346 | return -EFAULT; | 1108 | return -EFAULT; |
1347 | } | 1109 | } |
1348 | 1110 | ||
1349 | to = jiffies + msecs_to_jiffies(timeout); | 1111 | int_cmd = mtip_get_int_command(dd); |
1350 | do { | 1112 | |
1351 | ready2go = !test_and_set_bit(MTIP_TAG_INTERNAL, | ||
1352 | port->allocated); | ||
1353 | if (ready2go) | ||
1354 | break; | ||
1355 | mdelay(100); | ||
1356 | } while (time_before(jiffies, to)); | ||
1357 | if (!ready2go) { | ||
1358 | dev_warn(&dd->pdev->dev, | ||
1359 | "Internal cmd active. new cmd [%02X]\n", fis->command); | ||
1360 | return -EBUSY; | ||
1361 | } | ||
1362 | set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); | 1113 | set_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); |
1363 | port->ic_pause_timer = 0; | 1114 | port->ic_pause_timer = 0; |
1364 | 1115 | ||
@@ -1368,10 +1119,11 @@ static int mtip_exec_internal_command(struct mtip_port *port, | |||
1368 | if (atomic == GFP_KERNEL) { | 1119 | if (atomic == GFP_KERNEL) { |
1369 | if (fis->command != ATA_CMD_STANDBYNOW1) { | 1120 | if (fis->command != ATA_CMD_STANDBYNOW1) { |
1370 | /* wait for io to complete if non atomic */ | 1121 | /* wait for io to complete if non atomic */ |
1371 | if (mtip_quiesce_io(port, 5000) < 0) { | 1122 | if (mtip_quiesce_io(port, |
1123 | MTIP_QUIESCE_IO_TIMEOUT_MS) < 0) { | ||
1372 | dev_warn(&dd->pdev->dev, | 1124 | dev_warn(&dd->pdev->dev, |
1373 | "Failed to quiesce IO\n"); | 1125 | "Failed to quiesce IO\n"); |
1374 | release_slot(port, MTIP_TAG_INTERNAL); | 1126 | mtip_put_int_command(dd, int_cmd); |
1375 | clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); | 1127 | clear_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags); |
1376 | wake_up_interruptible(&port->svc_wait); | 1128 | wake_up_interruptible(&port->svc_wait); |
1377 | return -EBUSY; | 1129 | return -EBUSY; |
@@ -1416,9 +1168,9 @@ static int mtip_exec_internal_command(struct mtip_port *port, | |||
1416 | 1168 | ||
1417 | if (atomic == GFP_KERNEL) { | 1169 | if (atomic == GFP_KERNEL) { |
1418 | /* Wait for the command to complete or timeout. */ | 1170 | /* Wait for the command to complete or timeout. */ |
1419 | if (wait_for_completion_interruptible_timeout( | 1171 | if ((rv = wait_for_completion_interruptible_timeout( |
1420 | &wait, | 1172 | &wait, |
1421 | msecs_to_jiffies(timeout)) <= 0) { | 1173 | msecs_to_jiffies(timeout))) <= 0) { |
1422 | if (rv == -ERESTARTSYS) { /* interrupted */ | 1174 | if (rv == -ERESTARTSYS) { /* interrupted */ |
1423 | dev_err(&dd->pdev->dev, | 1175 | dev_err(&dd->pdev->dev, |
1424 | "Internal command [%02X] was interrupted after %lu ms\n", | 1176 | "Internal command [%02X] was interrupted after %lu ms\n", |
@@ -1497,8 +1249,7 @@ static int mtip_exec_internal_command(struct mtip_port *port, | |||
1497 | } | 1249 | } |
1498 | exec_ic_exit: | 1250 | exec_ic_exit: |
1499 | /* Clear the allocated and active bits for the internal command. */ | 1251 | /* Clear the allocated and active bits for the internal command. */ |
1500 | atomic_set(&int_cmd->active, 0); | 1252 | mtip_put_int_command(dd, int_cmd); |
1501 | release_slot(port, MTIP_TAG_INTERNAL); | ||
1502 | if (rv >= 0 && mtip_pause_ncq(port, fis)) { | 1253 | if (rv >= 0 && mtip_pause_ncq(port, fis)) { |
1503 | /* NCQ paused */ | 1254 | /* NCQ paused */ |
1504 | return rv; | 1255 | return rv; |
@@ -1529,6 +1280,37 @@ static inline void ata_swap_string(u16 *buf, unsigned int len) | |||
1529 | be16_to_cpus(&buf[i]); | 1280 | be16_to_cpus(&buf[i]); |
1530 | } | 1281 | } |
1531 | 1282 | ||
1283 | static void mtip_set_timeout(struct driver_data *dd, | ||
1284 | struct host_to_dev_fis *fis, | ||
1285 | unsigned int *timeout, u8 erasemode) | ||
1286 | { | ||
1287 | switch (fis->command) { | ||
1288 | case ATA_CMD_DOWNLOAD_MICRO: | ||
1289 | *timeout = 120000; /* 2 minutes */ | ||
1290 | break; | ||
1291 | case ATA_CMD_SEC_ERASE_UNIT: | ||
1292 | case 0xFC: | ||
1293 | if (erasemode) | ||
1294 | *timeout = ((*(dd->port->identify + 90) * 2) * 60000); | ||
1295 | else | ||
1296 | *timeout = ((*(dd->port->identify + 89) * 2) * 60000); | ||
1297 | break; | ||
1298 | case ATA_CMD_STANDBYNOW1: | ||
1299 | *timeout = 120000; /* 2 minutes */ | ||
1300 | break; | ||
1301 | case 0xF7: | ||
1302 | case 0xFA: | ||
1303 | *timeout = 60000; /* 60 seconds */ | ||
1304 | break; | ||
1305 | case ATA_CMD_SMART: | ||
1306 | *timeout = 15000; /* 15 seconds */ | ||
1307 | break; | ||
1308 | default: | ||
1309 | *timeout = MTIP_IOCTL_CMD_TIMEOUT_MS; | ||
1310 | break; | ||
1311 | } | ||
1312 | } | ||
1313 | |||
1532 | /* | 1314 | /* |
1533 | * Request the device identity information. | 1315 | * Request the device identity information. |
1534 | * | 1316 | * |
@@ -1576,7 +1358,7 @@ static int mtip_get_identify(struct mtip_port *port, void __user *user_buffer) | |||
1576 | sizeof(u16) * ATA_ID_WORDS, | 1358 | sizeof(u16) * ATA_ID_WORDS, |
1577 | 0, | 1359 | 0, |
1578 | GFP_KERNEL, | 1360 | GFP_KERNEL, |
1579 | MTIP_INTERNAL_COMMAND_TIMEOUT_MS) | 1361 | MTIP_INT_CMD_TIMEOUT_MS) |
1580 | < 0) { | 1362 | < 0) { |
1581 | rv = -1; | 1363 | rv = -1; |
1582 | goto out; | 1364 | goto out; |
@@ -1644,6 +1426,7 @@ static int mtip_standby_immediate(struct mtip_port *port) | |||
1644 | int rv; | 1426 | int rv; |
1645 | struct host_to_dev_fis fis; | 1427 | struct host_to_dev_fis fis; |
1646 | unsigned long start; | 1428 | unsigned long start; |
1429 | unsigned int timeout; | ||
1647 | 1430 | ||
1648 | /* Build the FIS. */ | 1431 | /* Build the FIS. */ |
1649 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | 1432 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); |
@@ -1651,6 +1434,8 @@ static int mtip_standby_immediate(struct mtip_port *port) | |||
1651 | fis.opts = 1 << 7; | 1434 | fis.opts = 1 << 7; |
1652 | fis.command = ATA_CMD_STANDBYNOW1; | 1435 | fis.command = ATA_CMD_STANDBYNOW1; |
1653 | 1436 | ||
1437 | mtip_set_timeout(port->dd, &fis, &timeout, 0); | ||
1438 | |||
1654 | start = jiffies; | 1439 | start = jiffies; |
1655 | rv = mtip_exec_internal_command(port, | 1440 | rv = mtip_exec_internal_command(port, |
1656 | &fis, | 1441 | &fis, |
@@ -1659,7 +1444,7 @@ static int mtip_standby_immediate(struct mtip_port *port) | |||
1659 | 0, | 1444 | 0, |
1660 | 0, | 1445 | 0, |
1661 | GFP_ATOMIC, | 1446 | GFP_ATOMIC, |
1662 | 15000); | 1447 | timeout); |
1663 | dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n", | 1448 | dbg_printk(MTIP_DRV_NAME "Time taken to complete standby cmd: %d ms\n", |
1664 | jiffies_to_msecs(jiffies - start)); | 1449 | jiffies_to_msecs(jiffies - start)); |
1665 | if (rv) | 1450 | if (rv) |
@@ -1705,7 +1490,7 @@ static int mtip_read_log_page(struct mtip_port *port, u8 page, u16 *buffer, | |||
1705 | sectors * ATA_SECT_SIZE, | 1490 | sectors * ATA_SECT_SIZE, |
1706 | 0, | 1491 | 0, |
1707 | GFP_ATOMIC, | 1492 | GFP_ATOMIC, |
1708 | MTIP_INTERNAL_COMMAND_TIMEOUT_MS); | 1493 | MTIP_INT_CMD_TIMEOUT_MS); |
1709 | } | 1494 | } |
1710 | 1495 | ||
1711 | /* | 1496 | /* |
@@ -1998,6 +1783,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command) | |||
1998 | { | 1783 | { |
1999 | struct host_to_dev_fis fis; | 1784 | struct host_to_dev_fis fis; |
2000 | struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG); | 1785 | struct host_to_dev_fis *reply = (port->rxfis + RX_FIS_D2H_REG); |
1786 | unsigned int to; | ||
2001 | 1787 | ||
2002 | /* Build the FIS. */ | 1788 | /* Build the FIS. */ |
2003 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); | 1789 | memset(&fis, 0, sizeof(struct host_to_dev_fis)); |
@@ -2011,6 +1797,8 @@ static int exec_drive_task(struct mtip_port *port, u8 *command) | |||
2011 | fis.cyl_hi = command[5]; | 1797 | fis.cyl_hi = command[5]; |
2012 | fis.device = command[6] & ~0x10; /* Clear the dev bit*/ | 1798 | fis.device = command[6] & ~0x10; /* Clear the dev bit*/ |
2013 | 1799 | ||
1800 | mtip_set_timeout(port->dd, &fis, &to, 0); | ||
1801 | |||
2014 | dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n", | 1802 | dbg_printk(MTIP_DRV_NAME " %s: User Command: cmd %x, feat %x, nsect %x, sect %x, lcyl %x, hcyl %x, sel %x\n", |
2015 | __func__, | 1803 | __func__, |
2016 | command[0], | 1804 | command[0], |
@@ -2029,7 +1817,7 @@ static int exec_drive_task(struct mtip_port *port, u8 *command) | |||
2029 | 0, | 1817 | 0, |
2030 | 0, | 1818 | 0, |
2031 | GFP_KERNEL, | 1819 | GFP_KERNEL, |
2032 | MTIP_IOCTL_COMMAND_TIMEOUT_MS) < 0) { | 1820 | to) < 0) { |
2033 | return -1; | 1821 | return -1; |
2034 | } | 1822 | } |
2035 | 1823 | ||
@@ -2069,6 +1857,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, | |||
2069 | u8 *buf = NULL; | 1857 | u8 *buf = NULL; |
2070 | dma_addr_t dma_addr = 0; | 1858 | dma_addr_t dma_addr = 0; |
2071 | int rv = 0, xfer_sz = command[3]; | 1859 | int rv = 0, xfer_sz = command[3]; |
1860 | unsigned int to; | ||
2072 | 1861 | ||
2073 | if (xfer_sz) { | 1862 | if (xfer_sz) { |
2074 | if (!user_buffer) | 1863 | if (!user_buffer) |
@@ -2100,6 +1889,8 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, | |||
2100 | fis.cyl_hi = 0xC2; | 1889 | fis.cyl_hi = 0xC2; |
2101 | } | 1890 | } |
2102 | 1891 | ||
1892 | mtip_set_timeout(port->dd, &fis, &to, 0); | ||
1893 | |||
2103 | if (xfer_sz) | 1894 | if (xfer_sz) |
2104 | reply = (port->rxfis + RX_FIS_PIO_SETUP); | 1895 | reply = (port->rxfis + RX_FIS_PIO_SETUP); |
2105 | else | 1896 | else |
@@ -2122,7 +1913,7 @@ static int exec_drive_command(struct mtip_port *port, u8 *command, | |||
2122 | (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0), | 1913 | (xfer_sz ? ATA_SECT_SIZE * xfer_sz : 0), |
2123 | 0, | 1914 | 0, |
2124 | GFP_KERNEL, | 1915 | GFP_KERNEL, |
2125 | MTIP_IOCTL_COMMAND_TIMEOUT_MS) | 1916 | to) |
2126 | < 0) { | 1917 | < 0) { |
2127 | rv = -EFAULT; | 1918 | rv = -EFAULT; |
2128 | goto exit_drive_command; | 1919 | goto exit_drive_command; |
@@ -2202,36 +1993,6 @@ static unsigned int implicit_sector(unsigned char command, | |||
2202 | } | 1993 | } |
2203 | return rv; | 1994 | return rv; |
2204 | } | 1995 | } |
2205 | static void mtip_set_timeout(struct driver_data *dd, | ||
2206 | struct host_to_dev_fis *fis, | ||
2207 | unsigned int *timeout, u8 erasemode) | ||
2208 | { | ||
2209 | switch (fis->command) { | ||
2210 | case ATA_CMD_DOWNLOAD_MICRO: | ||
2211 | *timeout = 120000; /* 2 minutes */ | ||
2212 | break; | ||
2213 | case ATA_CMD_SEC_ERASE_UNIT: | ||
2214 | case 0xFC: | ||
2215 | if (erasemode) | ||
2216 | *timeout = ((*(dd->port->identify + 90) * 2) * 60000); | ||
2217 | else | ||
2218 | *timeout = ((*(dd->port->identify + 89) * 2) * 60000); | ||
2219 | break; | ||
2220 | case ATA_CMD_STANDBYNOW1: | ||
2221 | *timeout = 120000; /* 2 minutes */ | ||
2222 | break; | ||
2223 | case 0xF7: | ||
2224 | case 0xFA: | ||
2225 | *timeout = 60000; /* 60 seconds */ | ||
2226 | break; | ||
2227 | case ATA_CMD_SMART: | ||
2228 | *timeout = 15000; /* 15 seconds */ | ||
2229 | break; | ||
2230 | default: | ||
2231 | *timeout = MTIP_IOCTL_COMMAND_TIMEOUT_MS; | ||
2232 | break; | ||
2233 | } | ||
2234 | } | ||
2235 | 1996 | ||
2236 | /* | 1997 | /* |
2237 | * Executes a taskfile | 1998 | * Executes a taskfile |
@@ -2606,22 +2367,21 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd, | |||
2606 | * return value | 2367 | * return value |
2607 | * None | 2368 | * None |
2608 | */ | 2369 | */ |
2609 | static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector, | 2370 | static void mtip_hw_submit_io(struct driver_data *dd, struct request *rq, |
2610 | int nsect, int nents, int tag, void *callback, | 2371 | struct mtip_cmd *command, int nents, |
2611 | void *data, int dir, int unaligned) | 2372 | struct blk_mq_hw_ctx *hctx) |
2612 | { | 2373 | { |
2613 | struct host_to_dev_fis *fis; | 2374 | struct host_to_dev_fis *fis; |
2614 | struct mtip_port *port = dd->port; | 2375 | struct mtip_port *port = dd->port; |
2615 | struct mtip_cmd *command = &port->commands[tag]; | 2376 | int dma_dir = rq_data_dir(rq) == READ ? DMA_FROM_DEVICE : DMA_TO_DEVICE; |
2616 | int dma_dir = (dir == READ) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; | 2377 | u64 start = blk_rq_pos(rq); |
2617 | u64 start = sector; | 2378 | unsigned int nsect = blk_rq_sectors(rq); |
2618 | 2379 | ||
2619 | /* Map the scatter list for DMA access */ | 2380 | /* Map the scatter list for DMA access */ |
2620 | nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir); | 2381 | nents = dma_map_sg(&dd->pdev->dev, command->sg, nents, dma_dir); |
2621 | 2382 | ||
2622 | command->scatter_ents = nents; | 2383 | command->scatter_ents = nents; |
2623 | 2384 | ||
2624 | command->unaligned = unaligned; | ||
2625 | /* | 2385 | /* |
2626 | * The number of retries for this command before it is | 2386 | * The number of retries for this command before it is |
2627 | * reported as a failure to the upper layers. | 2387 | * reported as a failure to the upper layers. |
@@ -2632,8 +2392,10 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector, | |||
2632 | fis = command->command; | 2392 | fis = command->command; |
2633 | fis->type = 0x27; | 2393 | fis->type = 0x27; |
2634 | fis->opts = 1 << 7; | 2394 | fis->opts = 1 << 7; |
2635 | fis->command = | 2395 | if (rq_data_dir(rq) == READ) |
2636 | (dir == READ ? ATA_CMD_FPDMA_READ : ATA_CMD_FPDMA_WRITE); | 2396 | fis->command = ATA_CMD_FPDMA_READ; |
2397 | else | ||
2398 | fis->command = ATA_CMD_FPDMA_WRITE; | ||
2637 | fis->lba_low = start & 0xFF; | 2399 | fis->lba_low = start & 0xFF; |
2638 | fis->lba_mid = (start >> 8) & 0xFF; | 2400 | fis->lba_mid = (start >> 8) & 0xFF; |
2639 | fis->lba_hi = (start >> 16) & 0xFF; | 2401 | fis->lba_hi = (start >> 16) & 0xFF; |
@@ -2643,14 +2405,14 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector, | |||
2643 | fis->device = 1 << 6; | 2405 | fis->device = 1 << 6; |
2644 | fis->features = nsect & 0xFF; | 2406 | fis->features = nsect & 0xFF; |
2645 | fis->features_ex = (nsect >> 8) & 0xFF; | 2407 | fis->features_ex = (nsect >> 8) & 0xFF; |
2646 | fis->sect_count = ((tag << 3) | (tag >> 5)); | 2408 | fis->sect_count = ((rq->tag << 3) | (rq->tag >> 5)); |
2647 | fis->sect_cnt_ex = 0; | 2409 | fis->sect_cnt_ex = 0; |
2648 | fis->control = 0; | 2410 | fis->control = 0; |
2649 | fis->res2 = 0; | 2411 | fis->res2 = 0; |
2650 | fis->res3 = 0; | 2412 | fis->res3 = 0; |
2651 | fill_command_sg(dd, command, nents); | 2413 | fill_command_sg(dd, command, nents); |
2652 | 2414 | ||
2653 | if (unaligned) | 2415 | if (command->unaligned) |
2654 | fis->device |= 1 << 7; | 2416 | fis->device |= 1 << 7; |
2655 | 2417 | ||
2656 | /* Populate the command header */ | 2418 | /* Populate the command header */ |
@@ -2668,81 +2430,17 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector, | |||
2668 | command->direction = dma_dir; | 2430 | command->direction = dma_dir; |
2669 | 2431 | ||
2670 | /* | 2432 | /* |
2671 | * Set the completion function and data for the command passed | ||
2672 | * from the upper layer. | ||
2673 | */ | ||
2674 | command->async_data = data; | ||
2675 | command->async_callback = callback; | ||
2676 | |||
2677 | /* | ||
2678 | * To prevent this command from being issued | 2433 | * To prevent this command from being issued |
2679 | * if an internal command is in progress or error handling is active. | 2434 | * if an internal command is in progress or error handling is active. |
2680 | */ | 2435 | */ |
2681 | if (port->flags & MTIP_PF_PAUSE_IO) { | 2436 | if (port->flags & MTIP_PF_PAUSE_IO) { |
2682 | set_bit(tag, port->cmds_to_issue); | 2437 | set_bit(rq->tag, port->cmds_to_issue); |
2683 | set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags); | 2438 | set_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags); |
2684 | return; | 2439 | return; |
2685 | } | 2440 | } |
2686 | 2441 | ||
2687 | /* Issue the command to the hardware */ | 2442 | /* Issue the command to the hardware */ |
2688 | mtip_issue_ncq_command(port, tag); | 2443 | mtip_issue_ncq_command(port, rq->tag); |
2689 | |||
2690 | return; | ||
2691 | } | ||
2692 | |||
2693 | /* | ||
2694 | * Release a command slot. | ||
2695 | * | ||
2696 | * @dd Pointer to the driver data structure. | ||
2697 | * @tag Slot tag | ||
2698 | * | ||
2699 | * return value | ||
2700 | * None | ||
2701 | */ | ||
2702 | static void mtip_hw_release_scatterlist(struct driver_data *dd, int tag, | ||
2703 | int unaligned) | ||
2704 | { | ||
2705 | struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal : | ||
2706 | &dd->port->cmd_slot; | ||
2707 | release_slot(dd->port, tag); | ||
2708 | up(sem); | ||
2709 | } | ||
2710 | |||
2711 | /* | ||
2712 | * Obtain a command slot and return its associated scatter list. | ||
2713 | * | ||
2714 | * @dd Pointer to the driver data structure. | ||
2715 | * @tag Pointer to an int that will receive the allocated command | ||
2716 | * slot tag. | ||
2717 | * | ||
2718 | * return value | ||
2719 | * Pointer to the scatter list for the allocated command slot | ||
2720 | * or NULL if no command slots are available. | ||
2721 | */ | ||
2722 | static struct scatterlist *mtip_hw_get_scatterlist(struct driver_data *dd, | ||
2723 | int *tag, int unaligned) | ||
2724 | { | ||
2725 | struct semaphore *sem = unaligned ? &dd->port->cmd_slot_unal : | ||
2726 | &dd->port->cmd_slot; | ||
2727 | |||
2728 | /* | ||
2729 | * It is possible that, even with this semaphore, a thread | ||
2730 | * may think that no command slots are available. Therefore, we | ||
2731 | * need to make an attempt to get_slot(). | ||
2732 | */ | ||
2733 | down(sem); | ||
2734 | *tag = get_slot(dd->port); | ||
2735 | |||
2736 | if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag))) { | ||
2737 | up(sem); | ||
2738 | return NULL; | ||
2739 | } | ||
2740 | if (unlikely(*tag < 0)) { | ||
2741 | up(sem); | ||
2742 | return NULL; | ||
2743 | } | ||
2744 | |||
2745 | return dd->port->commands[*tag].sg; | ||
2746 | } | 2444 | } |
2747 | 2445 | ||
2748 | /* | 2446 | /* |
@@ -3113,6 +2811,7 @@ static int mtip_free_orphan(struct driver_data *dd) | |||
3113 | if (dd->queue) { | 2811 | if (dd->queue) { |
3114 | dd->queue->queuedata = NULL; | 2812 | dd->queue->queuedata = NULL; |
3115 | blk_cleanup_queue(dd->queue); | 2813 | blk_cleanup_queue(dd->queue); |
2814 | blk_mq_free_tag_set(&dd->tags); | ||
3116 | dd->queue = NULL; | 2815 | dd->queue = NULL; |
3117 | } | 2816 | } |
3118 | } | 2817 | } |
@@ -3270,6 +2969,11 @@ static int mtip_service_thread(void *data) | |||
3270 | int ret; | 2969 | int ret; |
3271 | 2970 | ||
3272 | while (1) { | 2971 | while (1) { |
2972 | if (kthread_should_stop() || | ||
2973 | test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) | ||
2974 | goto st_out; | ||
2975 | clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); | ||
2976 | |||
3273 | /* | 2977 | /* |
3274 | * the condition is to check neither an internal command is | 2978 | * the condition is to check neither an internal command is |
3275 | * is in progress nor error handling is active | 2979 | * is in progress nor error handling is active |
@@ -3277,11 +2981,12 @@ static int mtip_service_thread(void *data) | |||
3277 | wait_event_interruptible(port->svc_wait, (port->flags) && | 2981 | wait_event_interruptible(port->svc_wait, (port->flags) && |
3278 | !(port->flags & MTIP_PF_PAUSE_IO)); | 2982 | !(port->flags & MTIP_PF_PAUSE_IO)); |
3279 | 2983 | ||
3280 | if (kthread_should_stop()) | ||
3281 | goto st_out; | ||
3282 | |||
3283 | set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); | 2984 | set_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); |
3284 | 2985 | ||
2986 | if (kthread_should_stop() || | ||
2987 | test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) | ||
2988 | goto st_out; | ||
2989 | |||
3285 | /* If I am an orphan, start self cleanup */ | 2990 | /* If I am an orphan, start self cleanup */ |
3286 | if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags)) | 2991 | if (test_bit(MTIP_PF_SR_CLEANUP_BIT, &port->flags)) |
3287 | break; | 2992 | break; |
@@ -3290,6 +2995,16 @@ static int mtip_service_thread(void *data) | |||
3290 | &dd->dd_flag))) | 2995 | &dd->dd_flag))) |
3291 | goto st_out; | 2996 | goto st_out; |
3292 | 2997 | ||
2998 | restart_eh: | ||
2999 | /* Demux bits: start with error handling */ | ||
3000 | if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) { | ||
3001 | mtip_handle_tfe(dd); | ||
3002 | clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); | ||
3003 | } | ||
3004 | |||
3005 | if (test_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags)) | ||
3006 | goto restart_eh; | ||
3007 | |||
3293 | if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { | 3008 | if (test_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags)) { |
3294 | slot = 1; | 3009 | slot = 1; |
3295 | /* used to restrict the loop to one iteration */ | 3010 | /* used to restrict the loop to one iteration */ |
@@ -3319,16 +3034,14 @@ static int mtip_service_thread(void *data) | |||
3319 | } | 3034 | } |
3320 | 3035 | ||
3321 | clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags); | 3036 | clear_bit(MTIP_PF_ISSUE_CMDS_BIT, &port->flags); |
3322 | } else if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) { | 3037 | } |
3038 | |||
3039 | if (test_bit(MTIP_PF_REBUILD_BIT, &port->flags)) { | ||
3323 | if (mtip_ftl_rebuild_poll(dd) < 0) | 3040 | if (mtip_ftl_rebuild_poll(dd) < 0) |
3324 | set_bit(MTIP_DDF_REBUILD_FAILED_BIT, | 3041 | set_bit(MTIP_DDF_REBUILD_FAILED_BIT, |
3325 | &dd->dd_flag); | 3042 | &dd->dd_flag); |
3326 | clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); | 3043 | clear_bit(MTIP_PF_REBUILD_BIT, &port->flags); |
3327 | } | 3044 | } |
3328 | clear_bit(MTIP_PF_SVC_THD_ACTIVE_BIT, &port->flags); | ||
3329 | |||
3330 | if (test_bit(MTIP_PF_SVC_THD_STOP_BIT, &port->flags)) | ||
3331 | goto st_out; | ||
3332 | } | 3045 | } |
3333 | 3046 | ||
3334 | /* wait for pci remove to exit */ | 3047 | /* wait for pci remove to exit */ |
@@ -3365,7 +3078,6 @@ st_out: | |||
3365 | */ | 3078 | */ |
3366 | static void mtip_dma_free(struct driver_data *dd) | 3079 | static void mtip_dma_free(struct driver_data *dd) |
3367 | { | 3080 | { |
3368 | int i; | ||
3369 | struct mtip_port *port = dd->port; | 3081 | struct mtip_port *port = dd->port; |
3370 | 3082 | ||
3371 | if (port->block1) | 3083 | if (port->block1) |
@@ -3376,13 +3088,6 @@ static void mtip_dma_free(struct driver_data *dd) | |||
3376 | dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ, | 3088 | dmam_free_coherent(&dd->pdev->dev, AHCI_CMD_TBL_SZ, |
3377 | port->command_list, port->command_list_dma); | 3089 | port->command_list, port->command_list_dma); |
3378 | } | 3090 | } |
3379 | |||
3380 | for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) { | ||
3381 | if (port->commands[i].command) | ||
3382 | dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, | ||
3383 | port->commands[i].command, | ||
3384 | port->commands[i].command_dma); | ||
3385 | } | ||
3386 | } | 3091 | } |
3387 | 3092 | ||
3388 | /* | 3093 | /* |
@@ -3396,8 +3101,6 @@ static void mtip_dma_free(struct driver_data *dd) | |||
3396 | static int mtip_dma_alloc(struct driver_data *dd) | 3101 | static int mtip_dma_alloc(struct driver_data *dd) |
3397 | { | 3102 | { |
3398 | struct mtip_port *port = dd->port; | 3103 | struct mtip_port *port = dd->port; |
3399 | int i, rv = 0; | ||
3400 | u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; | ||
3401 | 3104 | ||
3402 | /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */ | 3105 | /* Allocate dma memory for RX Fis, Identify, and Sector Bufffer */ |
3403 | port->block1 = | 3106 | port->block1 = |
@@ -3430,41 +3133,63 @@ static int mtip_dma_alloc(struct driver_data *dd) | |||
3430 | port->smart_buf = port->block1 + AHCI_SMARTBUF_OFFSET; | 3133 | port->smart_buf = port->block1 + AHCI_SMARTBUF_OFFSET; |
3431 | port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET; | 3134 | port->smart_buf_dma = port->block1_dma + AHCI_SMARTBUF_OFFSET; |
3432 | 3135 | ||
3433 | /* Setup per command SGL DMA region */ | 3136 | return 0; |
3434 | 3137 | } | |
3435 | /* Point the command headers at the command tables */ | ||
3436 | for (i = 0; i < MTIP_MAX_COMMAND_SLOTS; i++) { | ||
3437 | port->commands[i].command = | ||
3438 | dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, | ||
3439 | &port->commands[i].command_dma, GFP_KERNEL); | ||
3440 | if (!port->commands[i].command) { | ||
3441 | rv = -ENOMEM; | ||
3442 | mtip_dma_free(dd); | ||
3443 | return rv; | ||
3444 | } | ||
3445 | memset(port->commands[i].command, 0, CMD_DMA_ALLOC_SZ); | ||
3446 | |||
3447 | port->commands[i].command_header = port->command_list + | ||
3448 | (sizeof(struct mtip_cmd_hdr) * i); | ||
3449 | port->commands[i].command_header_dma = | ||
3450 | dd->port->command_list_dma + | ||
3451 | (sizeof(struct mtip_cmd_hdr) * i); | ||
3452 | 3138 | ||
3453 | if (host_cap_64) | 3139 | static int mtip_hw_get_identify(struct driver_data *dd) |
3454 | port->commands[i].command_header->ctbau = | 3140 | { |
3455 | __force_bit2int cpu_to_le32( | 3141 | struct smart_attr attr242; |
3456 | (port->commands[i].command_dma >> 16) >> 16); | 3142 | unsigned char *buf; |
3143 | int rv; | ||
3457 | 3144 | ||
3458 | port->commands[i].command_header->ctba = | 3145 | if (mtip_get_identify(dd->port, NULL) < 0) |
3459 | __force_bit2int cpu_to_le32( | 3146 | return -EFAULT; |
3460 | port->commands[i].command_dma & 0xFFFFFFFF); | ||
3461 | 3147 | ||
3462 | sg_init_table(port->commands[i].sg, MTIP_MAX_SG); | 3148 | if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == |
3149 | MTIP_FTL_REBUILD_MAGIC) { | ||
3150 | set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags); | ||
3151 | return MTIP_FTL_REBUILD_MAGIC; | ||
3152 | } | ||
3153 | mtip_dump_identify(dd->port); | ||
3463 | 3154 | ||
3464 | /* Mark command as currently inactive */ | 3155 | /* check write protect, over temp and rebuild statuses */ |
3465 | atomic_set(&dd->port->commands[i].active, 0); | 3156 | rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ, |
3157 | dd->port->log_buf, | ||
3158 | dd->port->log_buf_dma, 1); | ||
3159 | if (rv) { | ||
3160 | dev_warn(&dd->pdev->dev, | ||
3161 | "Error in READ LOG EXT (10h) command\n"); | ||
3162 | /* non-critical error, don't fail the load */ | ||
3163 | } else { | ||
3164 | buf = (unsigned char *)dd->port->log_buf; | ||
3165 | if (buf[259] & 0x1) { | ||
3166 | dev_info(&dd->pdev->dev, | ||
3167 | "Write protect bit is set.\n"); | ||
3168 | set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag); | ||
3169 | } | ||
3170 | if (buf[288] == 0xF7) { | ||
3171 | dev_info(&dd->pdev->dev, | ||
3172 | "Exceeded Tmax, drive in thermal shutdown.\n"); | ||
3173 | set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag); | ||
3174 | } | ||
3175 | if (buf[288] == 0xBF) { | ||
3176 | dev_info(&dd->pdev->dev, | ||
3177 | "Drive indicates rebuild has failed.\n"); | ||
3178 | /* TODO */ | ||
3179 | } | ||
3466 | } | 3180 | } |
3467 | return 0; | 3181 | |
3182 | /* get write protect progess */ | ||
3183 | memset(&attr242, 0, sizeof(struct smart_attr)); | ||
3184 | if (mtip_get_smart_attr(dd->port, 242, &attr242)) | ||
3185 | dev_warn(&dd->pdev->dev, | ||
3186 | "Unable to check write protect progress\n"); | ||
3187 | else | ||
3188 | dev_info(&dd->pdev->dev, | ||
3189 | "Write protect progress: %u%% (%u blocks)\n", | ||
3190 | attr242.cur, le32_to_cpu(attr242.data)); | ||
3191 | |||
3192 | return rv; | ||
3468 | } | 3193 | } |
3469 | 3194 | ||
3470 | /* | 3195 | /* |
@@ -3481,8 +3206,6 @@ static int mtip_hw_init(struct driver_data *dd) | |||
3481 | int rv; | 3206 | int rv; |
3482 | unsigned int num_command_slots; | 3207 | unsigned int num_command_slots; |
3483 | unsigned long timeout, timetaken; | 3208 | unsigned long timeout, timetaken; |
3484 | unsigned char *buf; | ||
3485 | struct smart_attr attr242; | ||
3486 | 3209 | ||
3487 | dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR]; | 3210 | dd->mmio = pcim_iomap_table(dd->pdev)[MTIP_ABAR]; |
3488 | 3211 | ||
@@ -3513,8 +3236,6 @@ static int mtip_hw_init(struct driver_data *dd) | |||
3513 | else | 3236 | else |
3514 | dd->unal_qdepth = 0; | 3237 | dd->unal_qdepth = 0; |
3515 | 3238 | ||
3516 | /* Counting semaphore to track command slot usage */ | ||
3517 | sema_init(&dd->port->cmd_slot, num_command_slots - 1 - dd->unal_qdepth); | ||
3518 | sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth); | 3239 | sema_init(&dd->port->cmd_slot_unal, dd->unal_qdepth); |
3519 | 3240 | ||
3520 | /* Spinlock to prevent concurrent issue */ | 3241 | /* Spinlock to prevent concurrent issue */ |
@@ -3599,73 +3320,16 @@ static int mtip_hw_init(struct driver_data *dd) | |||
3599 | writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, | 3320 | writel(readl(dd->mmio + HOST_CTL) | HOST_IRQ_EN, |
3600 | dd->mmio + HOST_CTL); | 3321 | dd->mmio + HOST_CTL); |
3601 | 3322 | ||
3602 | init_timer(&dd->port->cmd_timer); | ||
3603 | init_waitqueue_head(&dd->port->svc_wait); | 3323 | init_waitqueue_head(&dd->port->svc_wait); |
3604 | 3324 | ||
3605 | dd->port->cmd_timer.data = (unsigned long int) dd->port; | ||
3606 | dd->port->cmd_timer.function = mtip_timeout_function; | ||
3607 | mod_timer(&dd->port->cmd_timer, | ||
3608 | jiffies + msecs_to_jiffies(MTIP_TIMEOUT_CHECK_PERIOD)); | ||
3609 | |||
3610 | |||
3611 | if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { | 3325 | if (test_bit(MTIP_DDF_REMOVE_PENDING_BIT, &dd->dd_flag)) { |
3612 | rv = -EFAULT; | 3326 | rv = -EFAULT; |
3613 | goto out3; | 3327 | goto out3; |
3614 | } | 3328 | } |
3615 | 3329 | ||
3616 | if (mtip_get_identify(dd->port, NULL) < 0) { | ||
3617 | rv = -EFAULT; | ||
3618 | goto out3; | ||
3619 | } | ||
3620 | mtip_dump_identify(dd->port); | ||
3621 | |||
3622 | if (*(dd->port->identify + MTIP_FTL_REBUILD_OFFSET) == | ||
3623 | MTIP_FTL_REBUILD_MAGIC) { | ||
3624 | set_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags); | ||
3625 | return MTIP_FTL_REBUILD_MAGIC; | ||
3626 | } | ||
3627 | |||
3628 | /* check write protect, over temp and rebuild statuses */ | ||
3629 | rv = mtip_read_log_page(dd->port, ATA_LOG_SATA_NCQ, | ||
3630 | dd->port->log_buf, | ||
3631 | dd->port->log_buf_dma, 1); | ||
3632 | if (rv) { | ||
3633 | dev_warn(&dd->pdev->dev, | ||
3634 | "Error in READ LOG EXT (10h) command\n"); | ||
3635 | /* non-critical error, don't fail the load */ | ||
3636 | } else { | ||
3637 | buf = (unsigned char *)dd->port->log_buf; | ||
3638 | if (buf[259] & 0x1) { | ||
3639 | dev_info(&dd->pdev->dev, | ||
3640 | "Write protect bit is set.\n"); | ||
3641 | set_bit(MTIP_DDF_WRITE_PROTECT_BIT, &dd->dd_flag); | ||
3642 | } | ||
3643 | if (buf[288] == 0xF7) { | ||
3644 | dev_info(&dd->pdev->dev, | ||
3645 | "Exceeded Tmax, drive in thermal shutdown.\n"); | ||
3646 | set_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag); | ||
3647 | } | ||
3648 | if (buf[288] == 0xBF) { | ||
3649 | dev_info(&dd->pdev->dev, | ||
3650 | "Drive is in security locked state.\n"); | ||
3651 | set_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag); | ||
3652 | } | ||
3653 | } | ||
3654 | |||
3655 | /* get write protect progess */ | ||
3656 | memset(&attr242, 0, sizeof(struct smart_attr)); | ||
3657 | if (mtip_get_smart_attr(dd->port, 242, &attr242)) | ||
3658 | dev_warn(&dd->pdev->dev, | ||
3659 | "Unable to check write protect progress\n"); | ||
3660 | else | ||
3661 | dev_info(&dd->pdev->dev, | ||
3662 | "Write protect progress: %u%% (%u blocks)\n", | ||
3663 | attr242.cur, le32_to_cpu(attr242.data)); | ||
3664 | return rv; | 3330 | return rv; |
3665 | 3331 | ||
3666 | out3: | 3332 | out3: |
3667 | del_timer_sync(&dd->port->cmd_timer); | ||
3668 | |||
3669 | /* Disable interrupts on the HBA. */ | 3333 | /* Disable interrupts on the HBA. */ |
3670 | writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, | 3334 | writel(readl(dd->mmio + HOST_CTL) & ~HOST_IRQ_EN, |
3671 | dd->mmio + HOST_CTL); | 3335 | dd->mmio + HOST_CTL); |
@@ -3685,6 +3349,22 @@ out1: | |||
3685 | return rv; | 3349 | return rv; |
3686 | } | 3350 | } |
3687 | 3351 | ||
3352 | static void mtip_standby_drive(struct driver_data *dd) | ||
3353 | { | ||
3354 | if (dd->sr) | ||
3355 | return; | ||
3356 | |||
3357 | /* | ||
3358 | * Send standby immediate (E0h) to the drive so that it | ||
3359 | * saves its state. | ||
3360 | */ | ||
3361 | if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) && | ||
3362 | !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) | ||
3363 | if (mtip_standby_immediate(dd->port)) | ||
3364 | dev_warn(&dd->pdev->dev, | ||
3365 | "STANDBY IMMEDIATE failed\n"); | ||
3366 | } | ||
3367 | |||
3688 | /* | 3368 | /* |
3689 | * Called to deinitialize an interface. | 3369 | * Called to deinitialize an interface. |
3690 | * | 3370 | * |
@@ -3700,12 +3380,6 @@ static int mtip_hw_exit(struct driver_data *dd) | |||
3700 | * saves its state. | 3380 | * saves its state. |
3701 | */ | 3381 | */ |
3702 | if (!dd->sr) { | 3382 | if (!dd->sr) { |
3703 | if (!test_bit(MTIP_PF_REBUILD_BIT, &dd->port->flags) && | ||
3704 | !test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag)) | ||
3705 | if (mtip_standby_immediate(dd->port)) | ||
3706 | dev_warn(&dd->pdev->dev, | ||
3707 | "STANDBY IMMEDIATE failed\n"); | ||
3708 | |||
3709 | /* de-initialize the port. */ | 3383 | /* de-initialize the port. */ |
3710 | mtip_deinit_port(dd->port); | 3384 | mtip_deinit_port(dd->port); |
3711 | 3385 | ||
@@ -3714,8 +3388,6 @@ static int mtip_hw_exit(struct driver_data *dd) | |||
3714 | dd->mmio + HOST_CTL); | 3388 | dd->mmio + HOST_CTL); |
3715 | } | 3389 | } |
3716 | 3390 | ||
3717 | del_timer_sync(&dd->port->cmd_timer); | ||
3718 | |||
3719 | /* Release the IRQ. */ | 3391 | /* Release the IRQ. */ |
3720 | irq_set_affinity_hint(dd->pdev->irq, NULL); | 3392 | irq_set_affinity_hint(dd->pdev->irq, NULL); |
3721 | devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); | 3393 | devm_free_irq(&dd->pdev->dev, dd->pdev->irq, dd); |
@@ -4032,100 +3704,138 @@ static const struct block_device_operations mtip_block_ops = { | |||
4032 | * | 3704 | * |
4033 | * @queue Pointer to the request queue. Unused other than to obtain | 3705 | * @queue Pointer to the request queue. Unused other than to obtain |
4034 | * the driver data structure. | 3706 | * the driver data structure. |
4035 | * @bio Pointer to the BIO. | 3707 | * @rq Pointer to the request. |
4036 | * | 3708 | * |
4037 | */ | 3709 | */ |
4038 | static void mtip_make_request(struct request_queue *queue, struct bio *bio) | 3710 | static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq) |
4039 | { | 3711 | { |
4040 | struct driver_data *dd = queue->queuedata; | 3712 | struct driver_data *dd = hctx->queue->queuedata; |
4041 | struct scatterlist *sg; | 3713 | struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); |
4042 | struct bio_vec bvec; | 3714 | unsigned int nents; |
4043 | struct bvec_iter iter; | ||
4044 | int nents = 0; | ||
4045 | int tag = 0, unaligned = 0; | ||
4046 | 3715 | ||
4047 | if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { | 3716 | if (unlikely(dd->dd_flag & MTIP_DDF_STOP_IO)) { |
4048 | if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, | 3717 | if (unlikely(test_bit(MTIP_DDF_REMOVE_PENDING_BIT, |
4049 | &dd->dd_flag))) { | 3718 | &dd->dd_flag))) { |
4050 | bio_endio(bio, -ENXIO); | 3719 | return -ENXIO; |
4051 | return; | ||
4052 | } | 3720 | } |
4053 | if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) { | 3721 | if (unlikely(test_bit(MTIP_DDF_OVER_TEMP_BIT, &dd->dd_flag))) { |
4054 | bio_endio(bio, -ENODATA); | 3722 | return -ENODATA; |
4055 | return; | ||
4056 | } | 3723 | } |
4057 | if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT, | 3724 | if (unlikely(test_bit(MTIP_DDF_WRITE_PROTECT_BIT, |
4058 | &dd->dd_flag) && | 3725 | &dd->dd_flag) && |
4059 | bio_data_dir(bio))) { | 3726 | rq_data_dir(rq))) { |
4060 | bio_endio(bio, -ENODATA); | 3727 | return -ENODATA; |
4061 | return; | ||
4062 | } | ||
4063 | if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) { | ||
4064 | bio_endio(bio, -ENODATA); | ||
4065 | return; | ||
4066 | } | ||
4067 | if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) { | ||
4068 | bio_endio(bio, -ENXIO); | ||
4069 | return; | ||
4070 | } | 3728 | } |
3729 | if (unlikely(test_bit(MTIP_DDF_SEC_LOCK_BIT, &dd->dd_flag))) | ||
3730 | return -ENODATA; | ||
3731 | if (test_bit(MTIP_DDF_REBUILD_FAILED_BIT, &dd->dd_flag)) | ||
3732 | return -ENXIO; | ||
4071 | } | 3733 | } |
4072 | 3734 | ||
4073 | if (unlikely(bio->bi_rw & REQ_DISCARD)) { | 3735 | if (rq->cmd_flags & REQ_DISCARD) { |
4074 | bio_endio(bio, mtip_send_trim(dd, bio->bi_iter.bi_sector, | 3736 | int err; |
4075 | bio_sectors(bio))); | ||
4076 | return; | ||
4077 | } | ||
4078 | 3737 | ||
4079 | if (unlikely(!bio_has_data(bio))) { | 3738 | err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq)); |
4080 | blk_queue_flush(queue, 0); | 3739 | blk_mq_end_io(rq, err); |
4081 | bio_endio(bio, 0); | 3740 | return 0; |
4082 | return; | ||
4083 | } | 3741 | } |
4084 | 3742 | ||
4085 | if (bio_data_dir(bio) == WRITE && bio_sectors(bio) <= 64 && | 3743 | /* Create the scatter list for this request. */ |
4086 | dd->unal_qdepth) { | 3744 | nents = blk_rq_map_sg(hctx->queue, rq, cmd->sg); |
4087 | if (bio->bi_iter.bi_sector % 8 != 0) | 3745 | |
4088 | /* Unaligned on 4k boundaries */ | 3746 | /* Issue the read/write. */ |
4089 | unaligned = 1; | 3747 | mtip_hw_submit_io(dd, rq, cmd, nents, hctx); |
4090 | else if (bio_sectors(bio) % 8 != 0) /* Aligned but not 4k/8k */ | 3748 | return 0; |
4091 | unaligned = 1; | 3749 | } |
3750 | |||
3751 | static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, | ||
3752 | struct request *rq) | ||
3753 | { | ||
3754 | struct driver_data *dd = hctx->queue->queuedata; | ||
3755 | struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); | ||
3756 | |||
3757 | if (!dd->unal_qdepth || rq_data_dir(rq) == READ) | ||
3758 | return false; | ||
3759 | |||
3760 | /* | ||
3761 | * If unaligned depth must be limited on this controller, mark it | ||
3762 | * as unaligned if the IO isn't on a 4k boundary (start of length). | ||
3763 | */ | ||
3764 | if (blk_rq_sectors(rq) <= 64) { | ||
3765 | if ((blk_rq_pos(rq) & 7) || (blk_rq_sectors(rq) & 7)) | ||
3766 | cmd->unaligned = 1; | ||
4092 | } | 3767 | } |
4093 | 3768 | ||
4094 | sg = mtip_hw_get_scatterlist(dd, &tag, unaligned); | 3769 | if (cmd->unaligned && down_trylock(&dd->port->cmd_slot_unal)) |
4095 | if (likely(sg != NULL)) { | 3770 | return true; |
4096 | blk_queue_bounce(queue, &bio); | ||
4097 | 3771 | ||
4098 | if (unlikely((bio)->bi_vcnt > MTIP_MAX_SG)) { | 3772 | return false; |
4099 | dev_warn(&dd->pdev->dev, | 3773 | } |
4100 | "Maximum number of SGL entries exceeded\n"); | ||
4101 | bio_io_error(bio); | ||
4102 | mtip_hw_release_scatterlist(dd, tag, unaligned); | ||
4103 | return; | ||
4104 | } | ||
4105 | 3774 | ||
4106 | /* Create the scatter list for this bio. */ | 3775 | static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) |
4107 | bio_for_each_segment(bvec, bio, iter) { | 3776 | { |
4108 | sg_set_page(&sg[nents], | 3777 | int ret; |
4109 | bvec.bv_page, | ||
4110 | bvec.bv_len, | ||
4111 | bvec.bv_offset); | ||
4112 | nents++; | ||
4113 | } | ||
4114 | 3778 | ||
4115 | /* Issue the read/write. */ | 3779 | if (mtip_check_unal_depth(hctx, rq)) |
4116 | mtip_hw_submit_io(dd, | 3780 | return BLK_MQ_RQ_QUEUE_BUSY; |
4117 | bio->bi_iter.bi_sector, | 3781 | |
4118 | bio_sectors(bio), | 3782 | ret = mtip_submit_request(hctx, rq); |
4119 | nents, | 3783 | if (!ret) |
4120 | tag, | 3784 | return BLK_MQ_RQ_QUEUE_OK; |
4121 | bio_endio, | 3785 | |
4122 | bio, | 3786 | rq->errors = ret; |
4123 | bio_data_dir(bio), | 3787 | return BLK_MQ_RQ_QUEUE_ERROR; |
4124 | unaligned); | 3788 | } |
4125 | } else | 3789 | |
4126 | bio_io_error(bio); | 3790 | static void mtip_free_cmd(void *data, struct request *rq, |
3791 | unsigned int hctx_idx, unsigned int request_idx) | ||
3792 | { | ||
3793 | struct driver_data *dd = data; | ||
3794 | struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); | ||
3795 | |||
3796 | if (!cmd->command) | ||
3797 | return; | ||
3798 | |||
3799 | dmam_free_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, | ||
3800 | cmd->command, cmd->command_dma); | ||
3801 | } | ||
3802 | |||
3803 | static int mtip_init_cmd(void *data, struct request *rq, unsigned int hctx_idx, | ||
3804 | unsigned int request_idx, unsigned int numa_node) | ||
3805 | { | ||
3806 | struct driver_data *dd = data; | ||
3807 | struct mtip_cmd *cmd = blk_mq_rq_to_pdu(rq); | ||
3808 | u32 host_cap_64 = readl(dd->mmio + HOST_CAP) & HOST_CAP_64; | ||
3809 | |||
3810 | cmd->command = dmam_alloc_coherent(&dd->pdev->dev, CMD_DMA_ALLOC_SZ, | ||
3811 | &cmd->command_dma, GFP_KERNEL); | ||
3812 | if (!cmd->command) | ||
3813 | return -ENOMEM; | ||
3814 | |||
3815 | memset(cmd->command, 0, CMD_DMA_ALLOC_SZ); | ||
3816 | |||
3817 | /* Point the command headers at the command tables. */ | ||
3818 | cmd->command_header = dd->port->command_list + | ||
3819 | (sizeof(struct mtip_cmd_hdr) * request_idx); | ||
3820 | cmd->command_header_dma = dd->port->command_list_dma + | ||
3821 | (sizeof(struct mtip_cmd_hdr) * request_idx); | ||
3822 | |||
3823 | if (host_cap_64) | ||
3824 | cmd->command_header->ctbau = __force_bit2int cpu_to_le32((cmd->command_dma >> 16) >> 16); | ||
3825 | |||
3826 | cmd->command_header->ctba = __force_bit2int cpu_to_le32(cmd->command_dma & 0xFFFFFFFF); | ||
3827 | |||
3828 | sg_init_table(cmd->sg, MTIP_MAX_SG); | ||
3829 | return 0; | ||
4127 | } | 3830 | } |
4128 | 3831 | ||
3832 | static struct blk_mq_ops mtip_mq_ops = { | ||
3833 | .queue_rq = mtip_queue_rq, | ||
3834 | .map_queue = blk_mq_map_queue, | ||
3835 | .init_request = mtip_init_cmd, | ||
3836 | .exit_request = mtip_free_cmd, | ||
3837 | }; | ||
3838 | |||
4129 | /* | 3839 | /* |
4130 | * Block layer initialization function. | 3840 | * Block layer initialization function. |
4131 | * | 3841 | * |
@@ -4148,11 +3858,7 @@ static int mtip_block_initialize(struct driver_data *dd) | |||
4148 | if (dd->disk) | 3858 | if (dd->disk) |
4149 | goto skip_create_disk; /* hw init done, before rebuild */ | 3859 | goto skip_create_disk; /* hw init done, before rebuild */ |
4150 | 3860 | ||
4151 | /* Initialize the protocol layer. */ | 3861 | if (mtip_hw_init(dd)) { |
4152 | wait_for_rebuild = mtip_hw_init(dd); | ||
4153 | if (wait_for_rebuild < 0) { | ||
4154 | dev_err(&dd->pdev->dev, | ||
4155 | "Protocol layer initialization failed\n"); | ||
4156 | rv = -EINVAL; | 3862 | rv = -EINVAL; |
4157 | goto protocol_init_error; | 3863 | goto protocol_init_error; |
4158 | } | 3864 | } |
@@ -4194,29 +3900,53 @@ static int mtip_block_initialize(struct driver_data *dd) | |||
4194 | 3900 | ||
4195 | mtip_hw_debugfs_init(dd); | 3901 | mtip_hw_debugfs_init(dd); |
4196 | 3902 | ||
4197 | /* | ||
4198 | * if rebuild pending, start the service thread, and delay the block | ||
4199 | * queue creation and add_disk() | ||
4200 | */ | ||
4201 | if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) | ||
4202 | goto start_service_thread; | ||
4203 | |||
4204 | skip_create_disk: | 3903 | skip_create_disk: |
4205 | /* Allocate the request queue. */ | 3904 | memset(&dd->tags, 0, sizeof(dd->tags)); |
4206 | dd->queue = blk_alloc_queue_node(GFP_KERNEL, dd->numa_node); | 3905 | dd->tags.ops = &mtip_mq_ops; |
4207 | if (dd->queue == NULL) { | 3906 | dd->tags.nr_hw_queues = 1; |
3907 | dd->tags.queue_depth = MTIP_MAX_COMMAND_SLOTS; | ||
3908 | dd->tags.reserved_tags = 1; | ||
3909 | dd->tags.cmd_size = sizeof(struct mtip_cmd); | ||
3910 | dd->tags.numa_node = dd->numa_node; | ||
3911 | dd->tags.flags = BLK_MQ_F_SHOULD_MERGE; | ||
3912 | dd->tags.driver_data = dd; | ||
3913 | |||
3914 | rv = blk_mq_alloc_tag_set(&dd->tags); | ||
3915 | if (rv) { | ||
4208 | dev_err(&dd->pdev->dev, | 3916 | dev_err(&dd->pdev->dev, |
4209 | "Unable to allocate request queue\n"); | 3917 | "Unable to allocate request queue\n"); |
4210 | rv = -ENOMEM; | 3918 | rv = -ENOMEM; |
4211 | goto block_queue_alloc_init_error; | 3919 | goto block_queue_alloc_init_error; |
4212 | } | 3920 | } |
4213 | 3921 | ||
4214 | /* Attach our request function to the request queue. */ | 3922 | /* Allocate the request queue. */ |
4215 | blk_queue_make_request(dd->queue, mtip_make_request); | 3923 | dd->queue = blk_mq_init_queue(&dd->tags); |
3924 | if (IS_ERR(dd->queue)) { | ||
3925 | dev_err(&dd->pdev->dev, | ||
3926 | "Unable to allocate request queue\n"); | ||
3927 | rv = -ENOMEM; | ||
3928 | goto block_queue_alloc_init_error; | ||
3929 | } | ||
4216 | 3930 | ||
4217 | dd->disk->queue = dd->queue; | 3931 | dd->disk->queue = dd->queue; |
4218 | dd->queue->queuedata = dd; | 3932 | dd->queue->queuedata = dd; |
4219 | 3933 | ||
3934 | /* Initialize the protocol layer. */ | ||
3935 | wait_for_rebuild = mtip_hw_get_identify(dd); | ||
3936 | if (wait_for_rebuild < 0) { | ||
3937 | dev_err(&dd->pdev->dev, | ||
3938 | "Protocol layer initialization failed\n"); | ||
3939 | rv = -EINVAL; | ||
3940 | goto init_hw_cmds_error; | ||
3941 | } | ||
3942 | |||
3943 | /* | ||
3944 | * if rebuild pending, start the service thread, and delay the block | ||
3945 | * queue creation and add_disk() | ||
3946 | */ | ||
3947 | if (wait_for_rebuild == MTIP_FTL_REBUILD_MAGIC) | ||
3948 | goto start_service_thread; | ||
3949 | |||
4220 | /* Set device limits. */ | 3950 | /* Set device limits. */ |
4221 | set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); | 3951 | set_bit(QUEUE_FLAG_NONROT, &dd->queue->queue_flags); |
4222 | blk_queue_max_segments(dd->queue, MTIP_MAX_SG); | 3952 | blk_queue_max_segments(dd->queue, MTIP_MAX_SG); |
@@ -4295,8 +4025,9 @@ kthread_run_error: | |||
4295 | del_gendisk(dd->disk); | 4025 | del_gendisk(dd->disk); |
4296 | 4026 | ||
4297 | read_capacity_error: | 4027 | read_capacity_error: |
4028 | init_hw_cmds_error: | ||
4298 | blk_cleanup_queue(dd->queue); | 4029 | blk_cleanup_queue(dd->queue); |
4299 | 4030 | blk_mq_free_tag_set(&dd->tags); | |
4300 | block_queue_alloc_init_error: | 4031 | block_queue_alloc_init_error: |
4301 | mtip_hw_debugfs_exit(dd); | 4032 | mtip_hw_debugfs_exit(dd); |
4302 | disk_index_error: | 4033 | disk_index_error: |
@@ -4345,6 +4076,9 @@ static int mtip_block_remove(struct driver_data *dd) | |||
4345 | kobject_put(kobj); | 4076 | kobject_put(kobj); |
4346 | } | 4077 | } |
4347 | } | 4078 | } |
4079 | |||
4080 | mtip_standby_drive(dd); | ||
4081 | |||
4348 | /* | 4082 | /* |
4349 | * Delete our gendisk structure. This also removes the device | 4083 | * Delete our gendisk structure. This also removes the device |
4350 | * from /dev | 4084 | * from /dev |
@@ -4357,6 +4091,7 @@ static int mtip_block_remove(struct driver_data *dd) | |||
4357 | if (dd->disk->queue) { | 4091 | if (dd->disk->queue) { |
4358 | del_gendisk(dd->disk); | 4092 | del_gendisk(dd->disk); |
4359 | blk_cleanup_queue(dd->queue); | 4093 | blk_cleanup_queue(dd->queue); |
4094 | blk_mq_free_tag_set(&dd->tags); | ||
4360 | dd->queue = NULL; | 4095 | dd->queue = NULL; |
4361 | } else | 4096 | } else |
4362 | put_disk(dd->disk); | 4097 | put_disk(dd->disk); |
@@ -4391,6 +4126,8 @@ static int mtip_block_remove(struct driver_data *dd) | |||
4391 | */ | 4126 | */ |
4392 | static int mtip_block_shutdown(struct driver_data *dd) | 4127 | static int mtip_block_shutdown(struct driver_data *dd) |
4393 | { | 4128 | { |
4129 | mtip_hw_shutdown(dd); | ||
4130 | |||
4394 | /* Delete our gendisk structure, and cleanup the blk queue. */ | 4131 | /* Delete our gendisk structure, and cleanup the blk queue. */ |
4395 | if (dd->disk) { | 4132 | if (dd->disk) { |
4396 | dev_info(&dd->pdev->dev, | 4133 | dev_info(&dd->pdev->dev, |
@@ -4399,6 +4136,7 @@ static int mtip_block_shutdown(struct driver_data *dd) | |||
4399 | if (dd->disk->queue) { | 4136 | if (dd->disk->queue) { |
4400 | del_gendisk(dd->disk); | 4137 | del_gendisk(dd->disk); |
4401 | blk_cleanup_queue(dd->queue); | 4138 | blk_cleanup_queue(dd->queue); |
4139 | blk_mq_free_tag_set(&dd->tags); | ||
4402 | } else | 4140 | } else |
4403 | put_disk(dd->disk); | 4141 | put_disk(dd->disk); |
4404 | dd->disk = NULL; | 4142 | dd->disk = NULL; |
@@ -4408,8 +4146,6 @@ static int mtip_block_shutdown(struct driver_data *dd) | |||
4408 | spin_lock(&rssd_index_lock); | 4146 | spin_lock(&rssd_index_lock); |
4409 | ida_remove(&rssd_index_ida, dd->index); | 4147 | ida_remove(&rssd_index_ida, dd->index); |
4410 | spin_unlock(&rssd_index_lock); | 4148 | spin_unlock(&rssd_index_lock); |
4411 | |||
4412 | mtip_hw_shutdown(dd); | ||
4413 | return 0; | 4149 | return 0; |
4414 | } | 4150 | } |
4415 | 4151 | ||
@@ -4479,6 +4215,57 @@ static DEFINE_HANDLER(5); | |||
4479 | static DEFINE_HANDLER(6); | 4215 | static DEFINE_HANDLER(6); |
4480 | static DEFINE_HANDLER(7); | 4216 | static DEFINE_HANDLER(7); |
4481 | 4217 | ||
4218 | static void mtip_disable_link_opts(struct driver_data *dd, struct pci_dev *pdev) | ||
4219 | { | ||
4220 | int pos; | ||
4221 | unsigned short pcie_dev_ctrl; | ||
4222 | |||
4223 | pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); | ||
4224 | if (pos) { | ||
4225 | pci_read_config_word(pdev, | ||
4226 | pos + PCI_EXP_DEVCTL, | ||
4227 | &pcie_dev_ctrl); | ||
4228 | if (pcie_dev_ctrl & (1 << 11) || | ||
4229 | pcie_dev_ctrl & (1 << 4)) { | ||
4230 | dev_info(&dd->pdev->dev, | ||
4231 | "Disabling ERO/No-Snoop on bridge device %04x:%04x\n", | ||
4232 | pdev->vendor, pdev->device); | ||
4233 | pcie_dev_ctrl &= ~(PCI_EXP_DEVCTL_NOSNOOP_EN | | ||
4234 | PCI_EXP_DEVCTL_RELAX_EN); | ||
4235 | pci_write_config_word(pdev, | ||
4236 | pos + PCI_EXP_DEVCTL, | ||
4237 | pcie_dev_ctrl); | ||
4238 | } | ||
4239 | } | ||
4240 | } | ||
4241 | |||
4242 | static void mtip_fix_ero_nosnoop(struct driver_data *dd, struct pci_dev *pdev) | ||
4243 | { | ||
4244 | /* | ||
4245 | * This workaround is specific to AMD/ATI chipset with a PCI upstream | ||
4246 | * device with device id 0x5aXX | ||
4247 | */ | ||
4248 | if (pdev->bus && pdev->bus->self) { | ||
4249 | if (pdev->bus->self->vendor == PCI_VENDOR_ID_ATI && | ||
4250 | ((pdev->bus->self->device & 0xff00) == 0x5a00)) { | ||
4251 | mtip_disable_link_opts(dd, pdev->bus->self); | ||
4252 | } else { | ||
4253 | /* Check further up the topology */ | ||
4254 | struct pci_dev *parent_dev = pdev->bus->self; | ||
4255 | if (parent_dev->bus && | ||
4256 | parent_dev->bus->parent && | ||
4257 | parent_dev->bus->parent->self && | ||
4258 | parent_dev->bus->parent->self->vendor == | ||
4259 | PCI_VENDOR_ID_ATI && | ||
4260 | (parent_dev->bus->parent->self->device & | ||
4261 | 0xff00) == 0x5a00) { | ||
4262 | mtip_disable_link_opts(dd, | ||
4263 | parent_dev->bus->parent->self); | ||
4264 | } | ||
4265 | } | ||
4266 | } | ||
4267 | } | ||
4268 | |||
4482 | /* | 4269 | /* |
4483 | * Called for each supported PCI device detected. | 4270 | * Called for each supported PCI device detected. |
4484 | * | 4271 | * |
@@ -4630,6 +4417,8 @@ static int mtip_pci_probe(struct pci_dev *pdev, | |||
4630 | goto msi_initialize_err; | 4417 | goto msi_initialize_err; |
4631 | } | 4418 | } |
4632 | 4419 | ||
4420 | mtip_fix_ero_nosnoop(dd, pdev); | ||
4421 | |||
4633 | /* Initialize the block layer. */ | 4422 | /* Initialize the block layer. */ |
4634 | rv = mtip_block_initialize(dd); | 4423 | rv = mtip_block_initialize(dd); |
4635 | if (rv < 0) { | 4424 | if (rv < 0) { |
@@ -4710,8 +4499,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) | |||
4710 | dev_warn(&dd->pdev->dev, | 4499 | dev_warn(&dd->pdev->dev, |
4711 | "Completion workers still active!\n"); | 4500 | "Completion workers still active!\n"); |
4712 | } | 4501 | } |
4713 | /* Cleanup the outstanding commands */ | ||
4714 | mtip_command_cleanup(dd); | ||
4715 | 4502 | ||
4716 | /* Clean up the block layer. */ | 4503 | /* Clean up the block layer. */ |
4717 | mtip_block_remove(dd); | 4504 | mtip_block_remove(dd); |
@@ -4737,8 +4524,6 @@ static void mtip_pci_remove(struct pci_dev *pdev) | |||
4737 | 4524 | ||
4738 | pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); | 4525 | pcim_iounmap_regions(pdev, 1 << MTIP_ABAR); |
4739 | pci_set_drvdata(pdev, NULL); | 4526 | pci_set_drvdata(pdev, NULL); |
4740 | pci_dev_put(pdev); | ||
4741 | |||
4742 | } | 4527 | } |
4743 | 4528 | ||
4744 | /* | 4529 | /* |
@@ -4935,13 +4720,13 @@ static int __init mtip_init(void) | |||
4935 | */ | 4720 | */ |
4936 | static void __exit mtip_exit(void) | 4721 | static void __exit mtip_exit(void) |
4937 | { | 4722 | { |
4938 | debugfs_remove_recursive(dfs_parent); | ||
4939 | |||
4940 | /* Release the allocated major block device number. */ | 4723 | /* Release the allocated major block device number. */ |
4941 | unregister_blkdev(mtip_major, MTIP_DRV_NAME); | 4724 | unregister_blkdev(mtip_major, MTIP_DRV_NAME); |
4942 | 4725 | ||
4943 | /* Unregister the PCI driver. */ | 4726 | /* Unregister the PCI driver. */ |
4944 | pci_unregister_driver(&mtip_pci_driver); | 4727 | pci_unregister_driver(&mtip_pci_driver); |
4728 | |||
4729 | debugfs_remove_recursive(dfs_parent); | ||
4945 | } | 4730 | } |
4946 | 4731 | ||
4947 | MODULE_AUTHOR("Micron Technology, Inc"); | 4732 | MODULE_AUTHOR("Micron Technology, Inc"); |
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h index ffb955e7ccb9..4b9b554234bc 100644 --- a/drivers/block/mtip32xx/mtip32xx.h +++ b/drivers/block/mtip32xx/mtip32xx.h | |||
@@ -40,9 +40,11 @@ | |||
40 | #define MTIP_MAX_RETRIES 2 | 40 | #define MTIP_MAX_RETRIES 2 |
41 | 41 | ||
42 | /* Various timeout values in ms */ | 42 | /* Various timeout values in ms */ |
43 | #define MTIP_NCQ_COMMAND_TIMEOUT_MS 5000 | 43 | #define MTIP_NCQ_CMD_TIMEOUT_MS 15000 |
44 | #define MTIP_IOCTL_COMMAND_TIMEOUT_MS 5000 | 44 | #define MTIP_IOCTL_CMD_TIMEOUT_MS 5000 |
45 | #define MTIP_INTERNAL_COMMAND_TIMEOUT_MS 5000 | 45 | #define MTIP_INT_CMD_TIMEOUT_MS 5000 |
46 | #define MTIP_QUIESCE_IO_TIMEOUT_MS (MTIP_NCQ_CMD_TIMEOUT_MS * \ | ||
47 | (MTIP_MAX_RETRIES + 1)) | ||
46 | 48 | ||
47 | /* check for timeouts every 500ms */ | 49 | /* check for timeouts every 500ms */ |
48 | #define MTIP_TIMEOUT_CHECK_PERIOD 500 | 50 | #define MTIP_TIMEOUT_CHECK_PERIOD 500 |
@@ -331,12 +333,8 @@ struct mtip_cmd { | |||
331 | */ | 333 | */ |
332 | void (*comp_func)(struct mtip_port *port, | 334 | void (*comp_func)(struct mtip_port *port, |
333 | int tag, | 335 | int tag, |
334 | void *data, | 336 | struct mtip_cmd *cmd, |
335 | int status); | 337 | int status); |
336 | /* Additional callback function that may be called by comp_func() */ | ||
337 | void (*async_callback)(void *data, int status); | ||
338 | |||
339 | void *async_data; /* Addl. data passed to async_callback() */ | ||
340 | 338 | ||
341 | int scatter_ents; /* Number of scatter list entries used */ | 339 | int scatter_ents; /* Number of scatter list entries used */ |
342 | 340 | ||
@@ -347,10 +345,6 @@ struct mtip_cmd { | |||
347 | int retries; /* The number of retries left for this command. */ | 345 | int retries; /* The number of retries left for this command. */ |
348 | 346 | ||
349 | int direction; /* Data transfer direction */ | 347 | int direction; /* Data transfer direction */ |
350 | |||
351 | unsigned long comp_time; /* command completion time, in jiffies */ | ||
352 | |||
353 | atomic_t active; /* declares if this command sent to the drive. */ | ||
354 | }; | 348 | }; |
355 | 349 | ||
356 | /* Structure used to describe a port. */ | 350 | /* Structure used to describe a port. */ |
@@ -436,12 +430,6 @@ struct mtip_port { | |||
436 | * or error handling is active | 430 | * or error handling is active |
437 | */ | 431 | */ |
438 | unsigned long cmds_to_issue[SLOTBITS_IN_LONGS]; | 432 | unsigned long cmds_to_issue[SLOTBITS_IN_LONGS]; |
439 | /* | ||
440 | * Array of command slots. Structure includes pointers to the | ||
441 | * command header and command table, and completion function and data | ||
442 | * pointers. | ||
443 | */ | ||
444 | struct mtip_cmd commands[MTIP_MAX_COMMAND_SLOTS]; | ||
445 | /* Used by mtip_service_thread to wait for an event */ | 433 | /* Used by mtip_service_thread to wait for an event */ |
446 | wait_queue_head_t svc_wait; | 434 | wait_queue_head_t svc_wait; |
447 | /* | 435 | /* |
@@ -452,13 +440,7 @@ struct mtip_port { | |||
452 | /* | 440 | /* |
453 | * Timer used to complete commands that have been active for too long. | 441 | * Timer used to complete commands that have been active for too long. |
454 | */ | 442 | */ |
455 | struct timer_list cmd_timer; | ||
456 | unsigned long ic_pause_timer; | 443 | unsigned long ic_pause_timer; |
457 | /* | ||
458 | * Semaphore used to block threads if there are no | ||
459 | * command slots available. | ||
460 | */ | ||
461 | struct semaphore cmd_slot; | ||
462 | 444 | ||
463 | /* Semaphore to control queue depth of unaligned IOs */ | 445 | /* Semaphore to control queue depth of unaligned IOs */ |
464 | struct semaphore cmd_slot_unal; | 446 | struct semaphore cmd_slot_unal; |
@@ -485,6 +467,8 @@ struct driver_data { | |||
485 | 467 | ||
486 | struct request_queue *queue; /* Our request queue. */ | 468 | struct request_queue *queue; /* Our request queue. */ |
487 | 469 | ||
470 | struct blk_mq_tag_set tags; /* blk_mq tags */ | ||
471 | |||
488 | struct mtip_port *port; /* Pointer to the port data structure. */ | 472 | struct mtip_port *port; /* Pointer to the port data structure. */ |
489 | 473 | ||
490 | unsigned product_type; /* magic value declaring the product type */ | 474 | unsigned product_type; /* magic value declaring the product type */ |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 091b9ea14feb..77087a29b127 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
@@ -32,6 +32,7 @@ struct nullb { | |||
32 | unsigned int index; | 32 | unsigned int index; |
33 | struct request_queue *q; | 33 | struct request_queue *q; |
34 | struct gendisk *disk; | 34 | struct gendisk *disk; |
35 | struct blk_mq_tag_set tag_set; | ||
35 | struct hrtimer timer; | 36 | struct hrtimer timer; |
36 | unsigned int queue_depth; | 37 | unsigned int queue_depth; |
37 | spinlock_t lock; | 38 | spinlock_t lock; |
@@ -202,8 +203,8 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) | |||
202 | entry = llist_reverse_order(entry); | 203 | entry = llist_reverse_order(entry); |
203 | do { | 204 | do { |
204 | cmd = container_of(entry, struct nullb_cmd, ll_list); | 205 | cmd = container_of(entry, struct nullb_cmd, ll_list); |
205 | end_cmd(cmd); | ||
206 | entry = entry->next; | 206 | entry = entry->next; |
207 | end_cmd(cmd); | ||
207 | } while (entry); | 208 | } while (entry); |
208 | } | 209 | } |
209 | 210 | ||
@@ -226,7 +227,7 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) | |||
226 | 227 | ||
227 | static void null_softirq_done_fn(struct request *rq) | 228 | static void null_softirq_done_fn(struct request *rq) |
228 | { | 229 | { |
229 | end_cmd(rq->special); | 230 | end_cmd(blk_mq_rq_to_pdu(rq)); |
230 | } | 231 | } |
231 | 232 | ||
232 | static inline void null_handle_cmd(struct nullb_cmd *cmd) | 233 | static inline void null_handle_cmd(struct nullb_cmd *cmd) |
@@ -311,7 +312,7 @@ static void null_request_fn(struct request_queue *q) | |||
311 | 312 | ||
312 | static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) | 313 | static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) |
313 | { | 314 | { |
314 | struct nullb_cmd *cmd = rq->special; | 315 | struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); |
315 | 316 | ||
316 | cmd->rq = rq; | 317 | cmd->rq = rq; |
317 | cmd->nq = hctx->driver_data; | 318 | cmd->nq = hctx->driver_data; |
@@ -320,46 +321,6 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) | |||
320 | return BLK_MQ_RQ_QUEUE_OK; | 321 | return BLK_MQ_RQ_QUEUE_OK; |
321 | } | 322 | } |
322 | 323 | ||
323 | static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) | ||
324 | { | ||
325 | int b_size = DIV_ROUND_UP(reg->nr_hw_queues, nr_online_nodes); | ||
326 | int tip = (reg->nr_hw_queues % nr_online_nodes); | ||
327 | int node = 0, i, n; | ||
328 | |||
329 | /* | ||
330 | * Split submit queues evenly wrt to the number of nodes. If uneven, | ||
331 | * fill the first buckets with one extra, until the rest is filled with | ||
332 | * no extra. | ||
333 | */ | ||
334 | for (i = 0, n = 1; i < hctx_index; i++, n++) { | ||
335 | if (n % b_size == 0) { | ||
336 | n = 0; | ||
337 | node++; | ||
338 | |||
339 | tip--; | ||
340 | if (!tip) | ||
341 | b_size = reg->nr_hw_queues / nr_online_nodes; | ||
342 | } | ||
343 | } | ||
344 | |||
345 | /* | ||
346 | * A node might not be online, therefore map the relative node id to the | ||
347 | * real node id. | ||
348 | */ | ||
349 | for_each_online_node(n) { | ||
350 | if (!node) | ||
351 | break; | ||
352 | node--; | ||
353 | } | ||
354 | |||
355 | return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, n); | ||
356 | } | ||
357 | |||
358 | static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) | ||
359 | { | ||
360 | kfree(hctx); | ||
361 | } | ||
362 | |||
363 | static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) | 324 | static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) |
364 | { | 325 | { |
365 | BUG_ON(!nullb); | 326 | BUG_ON(!nullb); |
@@ -389,19 +350,14 @@ static struct blk_mq_ops null_mq_ops = { | |||
389 | .complete = null_softirq_done_fn, | 350 | .complete = null_softirq_done_fn, |
390 | }; | 351 | }; |
391 | 352 | ||
392 | static struct blk_mq_reg null_mq_reg = { | ||
393 | .ops = &null_mq_ops, | ||
394 | .queue_depth = 64, | ||
395 | .cmd_size = sizeof(struct nullb_cmd), | ||
396 | .flags = BLK_MQ_F_SHOULD_MERGE, | ||
397 | }; | ||
398 | |||
399 | static void null_del_dev(struct nullb *nullb) | 353 | static void null_del_dev(struct nullb *nullb) |
400 | { | 354 | { |
401 | list_del_init(&nullb->list); | 355 | list_del_init(&nullb->list); |
402 | 356 | ||
403 | del_gendisk(nullb->disk); | 357 | del_gendisk(nullb->disk); |
404 | blk_cleanup_queue(nullb->q); | 358 | blk_cleanup_queue(nullb->q); |
359 | if (queue_mode == NULL_Q_MQ) | ||
360 | blk_mq_free_tag_set(&nullb->tag_set); | ||
405 | put_disk(nullb->disk); | 361 | put_disk(nullb->disk); |
406 | kfree(nullb); | 362 | kfree(nullb); |
407 | } | 363 | } |
@@ -506,7 +462,7 @@ static int null_add_dev(void) | |||
506 | 462 | ||
507 | nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); | 463 | nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); |
508 | if (!nullb) | 464 | if (!nullb) |
509 | return -ENOMEM; | 465 | goto out; |
510 | 466 | ||
511 | spin_lock_init(&nullb->lock); | 467 | spin_lock_init(&nullb->lock); |
512 | 468 | ||
@@ -514,49 +470,44 @@ static int null_add_dev(void) | |||
514 | submit_queues = nr_online_nodes; | 470 | submit_queues = nr_online_nodes; |
515 | 471 | ||
516 | if (setup_queues(nullb)) | 472 | if (setup_queues(nullb)) |
517 | goto err; | 473 | goto out_free_nullb; |
518 | 474 | ||
519 | if (queue_mode == NULL_Q_MQ) { | 475 | if (queue_mode == NULL_Q_MQ) { |
520 | null_mq_reg.numa_node = home_node; | 476 | nullb->tag_set.ops = &null_mq_ops; |
521 | null_mq_reg.queue_depth = hw_queue_depth; | 477 | nullb->tag_set.nr_hw_queues = submit_queues; |
522 | null_mq_reg.nr_hw_queues = submit_queues; | 478 | nullb->tag_set.queue_depth = hw_queue_depth; |
523 | 479 | nullb->tag_set.numa_node = home_node; | |
524 | if (use_per_node_hctx) { | 480 | nullb->tag_set.cmd_size = sizeof(struct nullb_cmd); |
525 | null_mq_reg.ops->alloc_hctx = null_alloc_hctx; | 481 | nullb->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; |
526 | null_mq_reg.ops->free_hctx = null_free_hctx; | 482 | nullb->tag_set.driver_data = nullb; |
527 | } else { | 483 | |
528 | null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue; | 484 | if (blk_mq_alloc_tag_set(&nullb->tag_set)) |
529 | null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue; | 485 | goto out_cleanup_queues; |
530 | } | 486 | |
531 | 487 | nullb->q = blk_mq_init_queue(&nullb->tag_set); | |
532 | nullb->q = blk_mq_init_queue(&null_mq_reg, nullb); | 488 | if (!nullb->q) |
489 | goto out_cleanup_tags; | ||
533 | } else if (queue_mode == NULL_Q_BIO) { | 490 | } else if (queue_mode == NULL_Q_BIO) { |
534 | nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); | 491 | nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); |
492 | if (!nullb->q) | ||
493 | goto out_cleanup_queues; | ||
535 | blk_queue_make_request(nullb->q, null_queue_bio); | 494 | blk_queue_make_request(nullb->q, null_queue_bio); |
536 | init_driver_queues(nullb); | 495 | init_driver_queues(nullb); |
537 | } else { | 496 | } else { |
538 | nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); | 497 | nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); |
498 | if (!nullb->q) | ||
499 | goto out_cleanup_queues; | ||
539 | blk_queue_prep_rq(nullb->q, null_rq_prep_fn); | 500 | blk_queue_prep_rq(nullb->q, null_rq_prep_fn); |
540 | if (nullb->q) | 501 | blk_queue_softirq_done(nullb->q, null_softirq_done_fn); |
541 | blk_queue_softirq_done(nullb->q, null_softirq_done_fn); | ||
542 | init_driver_queues(nullb); | 502 | init_driver_queues(nullb); |
543 | } | 503 | } |
544 | 504 | ||
545 | if (!nullb->q) | ||
546 | goto queue_fail; | ||
547 | |||
548 | nullb->q->queuedata = nullb; | 505 | nullb->q->queuedata = nullb; |
549 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); | 506 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); |
550 | 507 | ||
551 | disk = nullb->disk = alloc_disk_node(1, home_node); | 508 | disk = nullb->disk = alloc_disk_node(1, home_node); |
552 | if (!disk) { | 509 | if (!disk) |
553 | queue_fail: | 510 | goto out_cleanup_blk_queue; |
554 | blk_cleanup_queue(nullb->q); | ||
555 | cleanup_queues(nullb); | ||
556 | err: | ||
557 | kfree(nullb); | ||
558 | return -ENOMEM; | ||
559 | } | ||
560 | 511 | ||
561 | mutex_lock(&lock); | 512 | mutex_lock(&lock); |
562 | list_add_tail(&nullb->list, &nullb_list); | 513 | list_add_tail(&nullb->list, &nullb_list); |
@@ -579,6 +530,18 @@ err: | |||
579 | sprintf(disk->disk_name, "nullb%d", nullb->index); | 530 | sprintf(disk->disk_name, "nullb%d", nullb->index); |
580 | add_disk(disk); | 531 | add_disk(disk); |
581 | return 0; | 532 | return 0; |
533 | |||
534 | out_cleanup_blk_queue: | ||
535 | blk_cleanup_queue(nullb->q); | ||
536 | out_cleanup_tags: | ||
537 | if (queue_mode == NULL_Q_MQ) | ||
538 | blk_mq_free_tag_set(&nullb->tag_set); | ||
539 | out_cleanup_queues: | ||
540 | cleanup_queues(nullb); | ||
541 | out_free_nullb: | ||
542 | kfree(nullb); | ||
543 | out: | ||
544 | return -ENOMEM; | ||
582 | } | 545 | } |
583 | 546 | ||
584 | static int __init null_init(void) | 547 | static int __init null_init(void) |
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index e76bdc074dbe..719cb1bc1640 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c | |||
@@ -747,7 +747,7 @@ static void do_pcd_request(struct request_queue * q) | |||
747 | pcd_current = cd; | 747 | pcd_current = cd; |
748 | pcd_sector = blk_rq_pos(pcd_req); | 748 | pcd_sector = blk_rq_pos(pcd_req); |
749 | pcd_count = blk_rq_cur_sectors(pcd_req); | 749 | pcd_count = blk_rq_cur_sectors(pcd_req); |
750 | pcd_buf = pcd_req->buffer; | 750 | pcd_buf = bio_data(pcd_req->bio); |
751 | pcd_busy = 1; | 751 | pcd_busy = 1; |
752 | ps_set_intr(do_pcd_read, NULL, 0, nice); | 752 | ps_set_intr(do_pcd_read, NULL, 0, nice); |
753 | return; | 753 | return; |
diff --git a/drivers/block/paride/pd.c b/drivers/block/paride/pd.c index 19ad8f0c83ef..fea7e76a00de 100644 --- a/drivers/block/paride/pd.c +++ b/drivers/block/paride/pd.c | |||
@@ -454,7 +454,7 @@ static enum action do_pd_io_start(void) | |||
454 | if (pd_block + pd_count > get_capacity(pd_req->rq_disk)) | 454 | if (pd_block + pd_count > get_capacity(pd_req->rq_disk)) |
455 | return Fail; | 455 | return Fail; |
456 | pd_run = blk_rq_sectors(pd_req); | 456 | pd_run = blk_rq_sectors(pd_req); |
457 | pd_buf = pd_req->buffer; | 457 | pd_buf = bio_data(pd_req->bio); |
458 | pd_retries = 0; | 458 | pd_retries = 0; |
459 | if (pd_cmd == READ) | 459 | if (pd_cmd == READ) |
460 | return do_pd_read_start(); | 460 | return do_pd_read_start(); |
@@ -485,7 +485,7 @@ static int pd_next_buf(void) | |||
485 | spin_lock_irqsave(&pd_lock, saved_flags); | 485 | spin_lock_irqsave(&pd_lock, saved_flags); |
486 | __blk_end_request_cur(pd_req, 0); | 486 | __blk_end_request_cur(pd_req, 0); |
487 | pd_count = blk_rq_cur_sectors(pd_req); | 487 | pd_count = blk_rq_cur_sectors(pd_req); |
488 | pd_buf = pd_req->buffer; | 488 | pd_buf = bio_data(pd_req->bio); |
489 | spin_unlock_irqrestore(&pd_lock, saved_flags); | 489 | spin_unlock_irqrestore(&pd_lock, saved_flags); |
490 | return 0; | 490 | return 0; |
491 | } | 491 | } |
diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index f5c86d523ba0..9a15fd3c9349 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c | |||
@@ -795,7 +795,7 @@ repeat: | |||
795 | } | 795 | } |
796 | 796 | ||
797 | pf_cmd = rq_data_dir(pf_req); | 797 | pf_cmd = rq_data_dir(pf_req); |
798 | pf_buf = pf_req->buffer; | 798 | pf_buf = bio_data(pf_req->bio); |
799 | pf_retries = 0; | 799 | pf_retries = 0; |
800 | 800 | ||
801 | pf_busy = 1; | 801 | pf_busy = 1; |
@@ -827,7 +827,7 @@ static int pf_next_buf(void) | |||
827 | if (!pf_req) | 827 | if (!pf_req) |
828 | return 1; | 828 | return 1; |
829 | pf_count = blk_rq_cur_sectors(pf_req); | 829 | pf_count = blk_rq_cur_sectors(pf_req); |
830 | pf_buf = pf_req->buffer; | 830 | pf_buf = bio_data(pf_req->bio); |
831 | } | 831 | } |
832 | return 0; | 832 | return 0; |
833 | } | 833 | } |
diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index a69dd93d1bd5..608532d3f8c9 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c | |||
@@ -563,7 +563,6 @@ skd_prep_discard_cdb(struct skd_scsi_request *scsi_req, | |||
563 | 563 | ||
564 | req = skreq->req; | 564 | req = skreq->req; |
565 | blk_add_request_payload(req, page, len); | 565 | blk_add_request_payload(req, page, len); |
566 | req->buffer = buf; | ||
567 | } | 566 | } |
568 | 567 | ||
569 | static void skd_request_fn_not_online(struct request_queue *q); | 568 | static void skd_request_fn_not_online(struct request_queue *q); |
@@ -744,6 +743,7 @@ static void skd_request_fn(struct request_queue *q) | |||
744 | break; | 743 | break; |
745 | } | 744 | } |
746 | skreq->discard_page = 1; | 745 | skreq->discard_page = 1; |
746 | req->completion_data = page; | ||
747 | skd_prep_discard_cdb(scsi_req, skreq, page, lba, count); | 747 | skd_prep_discard_cdb(scsi_req, skreq, page, lba, count); |
748 | 748 | ||
749 | } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) { | 749 | } else if (flush == SKD_FLUSH_ZERO_SIZE_FIRST) { |
@@ -858,8 +858,7 @@ static void skd_end_request(struct skd_device *skdev, | |||
858 | (skreq->discard_page == 1)) { | 858 | (skreq->discard_page == 1)) { |
859 | pr_debug("%s:%s:%d, free the page!", | 859 | pr_debug("%s:%s:%d, free the page!", |
860 | skdev->name, __func__, __LINE__); | 860 | skdev->name, __func__, __LINE__); |
861 | free_page((unsigned long)req->buffer); | 861 | __free_page(req->completion_data); |
862 | req->buffer = NULL; | ||
863 | } | 862 | } |
864 | 863 | ||
865 | if (unlikely(error)) { | 864 | if (unlikely(error)) { |
@@ -3945,15 +3944,14 @@ static int skd_acquire_msix(struct skd_device *skdev) | |||
3945 | for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) | 3944 | for (i = 0; i < SKD_MAX_MSIX_COUNT; i++) |
3946 | entries[i].entry = i; | 3945 | entries[i].entry = i; |
3947 | 3946 | ||
3948 | rc = pci_enable_msix_range(pdev, entries, | 3947 | rc = pci_enable_msix_exact(pdev, entries, SKD_MAX_MSIX_COUNT); |
3949 | SKD_MIN_MSIX_COUNT, SKD_MAX_MSIX_COUNT); | 3948 | if (rc) { |
3950 | if (rc < 0) { | ||
3951 | pr_err("(%s): failed to enable MSI-X %d\n", | 3949 | pr_err("(%s): failed to enable MSI-X %d\n", |
3952 | skd_name(skdev), rc); | 3950 | skd_name(skdev), rc); |
3953 | goto msix_out; | 3951 | goto msix_out; |
3954 | } | 3952 | } |
3955 | 3953 | ||
3956 | skdev->msix_count = rc; | 3954 | skdev->msix_count = SKD_MAX_MSIX_COUNT; |
3957 | skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) * | 3955 | skdev->msix_entries = kzalloc(sizeof(struct skd_msix_entry) * |
3958 | skdev->msix_count, GFP_KERNEL); | 3956 | skdev->msix_count, GFP_KERNEL); |
3959 | if (!skdev->msix_entries) { | 3957 | if (!skdev->msix_entries) { |
diff --git a/drivers/block/swim.c b/drivers/block/swim.c index b02d53a399f3..6b44bbe528b7 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c | |||
@@ -549,7 +549,7 @@ static void redo_fd_request(struct request_queue *q) | |||
549 | case READ: | 549 | case READ: |
550 | err = floppy_read_sectors(fs, blk_rq_pos(req), | 550 | err = floppy_read_sectors(fs, blk_rq_pos(req), |
551 | blk_rq_cur_sectors(req), | 551 | blk_rq_cur_sectors(req), |
552 | req->buffer); | 552 | bio_data(req->bio)); |
553 | break; | 553 | break; |
554 | } | 554 | } |
555 | done: | 555 | done: |
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index c74f7b56e7c4..523ee8fd4c15 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c | |||
@@ -342,7 +342,7 @@ static void start_request(struct floppy_state *fs) | |||
342 | swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n", | 342 | swim3_dbg("do_fd_req: dev=%s cmd=%d sec=%ld nr_sec=%u buf=%p\n", |
343 | req->rq_disk->disk_name, req->cmd, | 343 | req->rq_disk->disk_name, req->cmd, |
344 | (long)blk_rq_pos(req), blk_rq_sectors(req), | 344 | (long)blk_rq_pos(req), blk_rq_sectors(req), |
345 | req->buffer); | 345 | bio_data(req->bio)); |
346 | swim3_dbg(" errors=%d current_nr_sectors=%u\n", | 346 | swim3_dbg(" errors=%d current_nr_sectors=%u\n", |
347 | req->errors, blk_rq_cur_sectors(req)); | 347 | req->errors, blk_rq_cur_sectors(req)); |
348 | #endif | 348 | #endif |
@@ -479,11 +479,11 @@ static inline void setup_transfer(struct floppy_state *fs) | |||
479 | /* Set up 3 dma commands: write preamble, data, postamble */ | 479 | /* Set up 3 dma commands: write preamble, data, postamble */ |
480 | init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble)); | 480 | init_dma(cp, OUTPUT_MORE, write_preamble, sizeof(write_preamble)); |
481 | ++cp; | 481 | ++cp; |
482 | init_dma(cp, OUTPUT_MORE, req->buffer, 512); | 482 | init_dma(cp, OUTPUT_MORE, bio_data(req->bio), 512); |
483 | ++cp; | 483 | ++cp; |
484 | init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble)); | 484 | init_dma(cp, OUTPUT_LAST, write_postamble, sizeof(write_postamble)); |
485 | } else { | 485 | } else { |
486 | init_dma(cp, INPUT_LAST, req->buffer, n * 512); | 486 | init_dma(cp, INPUT_LAST, bio_data(req->bio), n * 512); |
487 | } | 487 | } |
488 | ++cp; | 488 | ++cp; |
489 | out_le16(&cp->command, DBDMA_STOP); | 489 | out_le16(&cp->command, DBDMA_STOP); |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6d8a87f252de..c8f286e8d80f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -30,6 +30,9 @@ struct virtio_blk | |||
30 | /* The disk structure for the kernel. */ | 30 | /* The disk structure for the kernel. */ |
31 | struct gendisk *disk; | 31 | struct gendisk *disk; |
32 | 32 | ||
33 | /* Block layer tags. */ | ||
34 | struct blk_mq_tag_set tag_set; | ||
35 | |||
33 | /* Process context for config space updates */ | 36 | /* Process context for config space updates */ |
34 | struct work_struct config_work; | 37 | struct work_struct config_work; |
35 | 38 | ||
@@ -112,7 +115,7 @@ static int __virtblk_add_req(struct virtqueue *vq, | |||
112 | 115 | ||
113 | static inline void virtblk_request_done(struct request *req) | 116 | static inline void virtblk_request_done(struct request *req) |
114 | { | 117 | { |
115 | struct virtblk_req *vbr = req->special; | 118 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); |
116 | int error = virtblk_result(vbr); | 119 | int error = virtblk_result(vbr); |
117 | 120 | ||
118 | if (req->cmd_type == REQ_TYPE_BLOCK_PC) { | 121 | if (req->cmd_type == REQ_TYPE_BLOCK_PC) { |
@@ -144,17 +147,17 @@ static void virtblk_done(struct virtqueue *vq) | |||
144 | if (unlikely(virtqueue_is_broken(vq))) | 147 | if (unlikely(virtqueue_is_broken(vq))) |
145 | break; | 148 | break; |
146 | } while (!virtqueue_enable_cb(vq)); | 149 | } while (!virtqueue_enable_cb(vq)); |
147 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | ||
148 | 150 | ||
149 | /* In case queue is stopped waiting for more buffers. */ | 151 | /* In case queue is stopped waiting for more buffers. */ |
150 | if (req_done) | 152 | if (req_done) |
151 | blk_mq_start_stopped_hw_queues(vblk->disk->queue); | 153 | blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); |
154 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | ||
152 | } | 155 | } |
153 | 156 | ||
154 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | 157 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) |
155 | { | 158 | { |
156 | struct virtio_blk *vblk = hctx->queue->queuedata; | 159 | struct virtio_blk *vblk = hctx->queue->queuedata; |
157 | struct virtblk_req *vbr = req->special; | 160 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); |
158 | unsigned long flags; | 161 | unsigned long flags; |
159 | unsigned int num; | 162 | unsigned int num; |
160 | const bool last = (req->cmd_flags & REQ_END) != 0; | 163 | const bool last = (req->cmd_flags & REQ_END) != 0; |
@@ -202,8 +205,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req) | |||
202 | err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num); | 205 | err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num); |
203 | if (err) { | 206 | if (err) { |
204 | virtqueue_kick(vblk->vq); | 207 | virtqueue_kick(vblk->vq); |
205 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | ||
206 | blk_mq_stop_hw_queue(hctx); | 208 | blk_mq_stop_hw_queue(hctx); |
209 | spin_unlock_irqrestore(&vblk->vq_lock, flags); | ||
207 | /* Out of mem doesn't actually happen, since we fall back | 210 | /* Out of mem doesn't actually happen, since we fall back |
208 | * to direct descriptors */ | 211 | * to direct descriptors */ |
209 | if (err == -ENOMEM || err == -ENOSPC) | 212 | if (err == -ENOMEM || err == -ENOSPC) |
@@ -480,33 +483,27 @@ static const struct device_attribute dev_attr_cache_type_rw = | |||
480 | __ATTR(cache_type, S_IRUGO|S_IWUSR, | 483 | __ATTR(cache_type, S_IRUGO|S_IWUSR, |
481 | virtblk_cache_type_show, virtblk_cache_type_store); | 484 | virtblk_cache_type_show, virtblk_cache_type_store); |
482 | 485 | ||
483 | static struct blk_mq_ops virtio_mq_ops = { | 486 | static int virtblk_init_request(void *data, struct request *rq, |
484 | .queue_rq = virtio_queue_rq, | 487 | unsigned int hctx_idx, unsigned int request_idx, |
485 | .map_queue = blk_mq_map_queue, | 488 | unsigned int numa_node) |
486 | .alloc_hctx = blk_mq_alloc_single_hw_queue, | ||
487 | .free_hctx = blk_mq_free_single_hw_queue, | ||
488 | .complete = virtblk_request_done, | ||
489 | }; | ||
490 | |||
491 | static struct blk_mq_reg virtio_mq_reg = { | ||
492 | .ops = &virtio_mq_ops, | ||
493 | .nr_hw_queues = 1, | ||
494 | .queue_depth = 0, /* Set in virtblk_probe */ | ||
495 | .numa_node = NUMA_NO_NODE, | ||
496 | .flags = BLK_MQ_F_SHOULD_MERGE, | ||
497 | }; | ||
498 | module_param_named(queue_depth, virtio_mq_reg.queue_depth, uint, 0444); | ||
499 | |||
500 | static int virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx, | ||
501 | struct request *rq, unsigned int nr) | ||
502 | { | 489 | { |
503 | struct virtio_blk *vblk = data; | 490 | struct virtio_blk *vblk = data; |
504 | struct virtblk_req *vbr = rq->special; | 491 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); |
505 | 492 | ||
506 | sg_init_table(vbr->sg, vblk->sg_elems); | 493 | sg_init_table(vbr->sg, vblk->sg_elems); |
507 | return 0; | 494 | return 0; |
508 | } | 495 | } |
509 | 496 | ||
497 | static struct blk_mq_ops virtio_mq_ops = { | ||
498 | .queue_rq = virtio_queue_rq, | ||
499 | .map_queue = blk_mq_map_queue, | ||
500 | .complete = virtblk_request_done, | ||
501 | .init_request = virtblk_init_request, | ||
502 | }; | ||
503 | |||
504 | static unsigned int virtblk_queue_depth; | ||
505 | module_param_named(queue_depth, virtblk_queue_depth, uint, 0444); | ||
506 | |||
510 | static int virtblk_probe(struct virtio_device *vdev) | 507 | static int virtblk_probe(struct virtio_device *vdev) |
511 | { | 508 | { |
512 | struct virtio_blk *vblk; | 509 | struct virtio_blk *vblk; |
@@ -561,24 +558,34 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
561 | } | 558 | } |
562 | 559 | ||
563 | /* Default queue sizing is to fill the ring. */ | 560 | /* Default queue sizing is to fill the ring. */ |
564 | if (!virtio_mq_reg.queue_depth) { | 561 | if (!virtblk_queue_depth) { |
565 | virtio_mq_reg.queue_depth = vblk->vq->num_free; | 562 | virtblk_queue_depth = vblk->vq->num_free; |
566 | /* ... but without indirect descs, we use 2 descs per req */ | 563 | /* ... but without indirect descs, we use 2 descs per req */ |
567 | if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) | 564 | if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC)) |
568 | virtio_mq_reg.queue_depth /= 2; | 565 | virtblk_queue_depth /= 2; |
569 | } | 566 | } |
570 | virtio_mq_reg.cmd_size = | 567 | |
568 | memset(&vblk->tag_set, 0, sizeof(vblk->tag_set)); | ||
569 | vblk->tag_set.ops = &virtio_mq_ops; | ||
570 | vblk->tag_set.nr_hw_queues = 1; | ||
571 | vblk->tag_set.queue_depth = virtblk_queue_depth; | ||
572 | vblk->tag_set.numa_node = NUMA_NO_NODE; | ||
573 | vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; | ||
574 | vblk->tag_set.cmd_size = | ||
571 | sizeof(struct virtblk_req) + | 575 | sizeof(struct virtblk_req) + |
572 | sizeof(struct scatterlist) * sg_elems; | 576 | sizeof(struct scatterlist) * sg_elems; |
577 | vblk->tag_set.driver_data = vblk; | ||
573 | 578 | ||
574 | q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk); | 579 | err = blk_mq_alloc_tag_set(&vblk->tag_set); |
580 | if (err) | ||
581 | goto out_put_disk; | ||
582 | |||
583 | q = vblk->disk->queue = blk_mq_init_queue(&vblk->tag_set); | ||
575 | if (!q) { | 584 | if (!q) { |
576 | err = -ENOMEM; | 585 | err = -ENOMEM; |
577 | goto out_put_disk; | 586 | goto out_free_tags; |
578 | } | 587 | } |
579 | 588 | ||
580 | blk_mq_init_commands(q, virtblk_init_vbr, vblk); | ||
581 | |||
582 | q->queuedata = vblk; | 589 | q->queuedata = vblk; |
583 | 590 | ||
584 | virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); | 591 | virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN); |
@@ -679,6 +686,8 @@ static int virtblk_probe(struct virtio_device *vdev) | |||
679 | out_del_disk: | 686 | out_del_disk: |
680 | del_gendisk(vblk->disk); | 687 | del_gendisk(vblk->disk); |
681 | blk_cleanup_queue(vblk->disk->queue); | 688 | blk_cleanup_queue(vblk->disk->queue); |
689 | out_free_tags: | ||
690 | blk_mq_free_tag_set(&vblk->tag_set); | ||
682 | out_put_disk: | 691 | out_put_disk: |
683 | put_disk(vblk->disk); | 692 | put_disk(vblk->disk); |
684 | out_free_vq: | 693 | out_free_vq: |
@@ -705,6 +714,8 @@ static void virtblk_remove(struct virtio_device *vdev) | |||
705 | del_gendisk(vblk->disk); | 714 | del_gendisk(vblk->disk); |
706 | blk_cleanup_queue(vblk->disk->queue); | 715 | blk_cleanup_queue(vblk->disk->queue); |
707 | 716 | ||
717 | blk_mq_free_tag_set(&vblk->tag_set); | ||
718 | |||
708 | /* Stop all the virtqueues. */ | 719 | /* Stop all the virtqueues. */ |
709 | vdev->config->reset(vdev); | 720 | vdev->config->reset(vdev); |
710 | 721 | ||
@@ -749,7 +760,7 @@ static int virtblk_restore(struct virtio_device *vdev) | |||
749 | vblk->config_enable = true; | 760 | vblk->config_enable = true; |
750 | ret = init_vq(vdev->priv); | 761 | ret = init_vq(vdev->priv); |
751 | if (!ret) | 762 | if (!ret) |
752 | blk_mq_start_stopped_hw_queues(vblk->disk->queue); | 763 | blk_mq_start_stopped_hw_queues(vblk->disk->queue, true); |
753 | 764 | ||
754 | return ret; | 765 | return ret; |
755 | } | 766 | } |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 25c11ad34184..5deb235bd18f 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
@@ -612,10 +612,10 @@ static void do_blkif_request(struct request_queue *rq) | |||
612 | } | 612 | } |
613 | 613 | ||
614 | pr_debug("do_blk_req %p: cmd %p, sec %lx, " | 614 | pr_debug("do_blk_req %p: cmd %p, sec %lx, " |
615 | "(%u/%u) buffer:%p [%s]\n", | 615 | "(%u/%u) [%s]\n", |
616 | req, req->cmd, (unsigned long)blk_rq_pos(req), | 616 | req, req->cmd, (unsigned long)blk_rq_pos(req), |
617 | blk_rq_cur_sectors(req), blk_rq_sectors(req), | 617 | blk_rq_cur_sectors(req), blk_rq_sectors(req), |
618 | req->buffer, rq_data_dir(req) ? "write" : "read"); | 618 | rq_data_dir(req) ? "write" : "read"); |
619 | 619 | ||
620 | if (blkif_queue_request(req)) { | 620 | if (blkif_queue_request(req)) { |
621 | blk_requeue_request(rq, req); | 621 | blk_requeue_request(rq, req); |
diff --git a/drivers/block/xsysace.c b/drivers/block/xsysace.c index 1393b8871a28..ab3ea62e5dfc 100644 --- a/drivers/block/xsysace.c +++ b/drivers/block/xsysace.c | |||
@@ -661,7 +661,7 @@ static void ace_fsm_dostate(struct ace_device *ace) | |||
661 | rq_data_dir(req)); | 661 | rq_data_dir(req)); |
662 | 662 | ||
663 | ace->req = req; | 663 | ace->req = req; |
664 | ace->data_ptr = req->buffer; | 664 | ace->data_ptr = bio_data(req->bio); |
665 | ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR; | 665 | ace->data_count = blk_rq_cur_sectors(req) * ACE_BUF_PER_SECTOR; |
666 | ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF); | 666 | ace_out32(ace, ACE_MPULBA, blk_rq_pos(req) & 0x0FFFFFFF); |
667 | 667 | ||
@@ -733,7 +733,7 @@ static void ace_fsm_dostate(struct ace_device *ace) | |||
733 | * blk_rq_sectors(ace->req), | 733 | * blk_rq_sectors(ace->req), |
734 | * blk_rq_cur_sectors(ace->req)); | 734 | * blk_rq_cur_sectors(ace->req)); |
735 | */ | 735 | */ |
736 | ace->data_ptr = ace->req->buffer; | 736 | ace->data_ptr = bio_data(ace->req->bio); |
737 | ace->data_count = blk_rq_cur_sectors(ace->req) * 16; | 737 | ace->data_count = blk_rq_cur_sectors(ace->req) * 16; |
738 | ace_fsm_yieldirq(ace); | 738 | ace_fsm_yieldirq(ace); |
739 | break; | 739 | break; |
diff --git a/drivers/block/z2ram.c b/drivers/block/z2ram.c index 27de5046708a..968f9e52effa 100644 --- a/drivers/block/z2ram.c +++ b/drivers/block/z2ram.c | |||
@@ -87,13 +87,15 @@ static void do_z2_request(struct request_queue *q) | |||
87 | while (len) { | 87 | while (len) { |
88 | unsigned long addr = start & Z2RAM_CHUNKMASK; | 88 | unsigned long addr = start & Z2RAM_CHUNKMASK; |
89 | unsigned long size = Z2RAM_CHUNKSIZE - addr; | 89 | unsigned long size = Z2RAM_CHUNKSIZE - addr; |
90 | void *buffer = bio_data(req->bio); | ||
91 | |||
90 | if (len < size) | 92 | if (len < size) |
91 | size = len; | 93 | size = len; |
92 | addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; | 94 | addr += z2ram_map[ start >> Z2RAM_CHUNKSHIFT ]; |
93 | if (rq_data_dir(req) == READ) | 95 | if (rq_data_dir(req) == READ) |
94 | memcpy(req->buffer, (char *)addr, size); | 96 | memcpy(buffer, (char *)addr, size); |
95 | else | 97 | else |
96 | memcpy((char *)addr, req->buffer, size); | 98 | memcpy((char *)addr, buffer, size); |
97 | start += size; | 99 | start += size; |
98 | len -= size; | 100 | len -= size; |
99 | } | 101 | } |
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index 8a3aff724d98..49ac5662585b 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c | |||
@@ -312,36 +312,24 @@ static const char *mrw_format_status[] = { | |||
312 | 312 | ||
313 | static const char *mrw_address_space[] = { "DMA", "GAA" }; | 313 | static const char *mrw_address_space[] = { "DMA", "GAA" }; |
314 | 314 | ||
315 | #if (ERRLOGMASK!=CD_NOTHING) | 315 | #if (ERRLOGMASK != CD_NOTHING) |
316 | #define cdinfo(type, fmt, args...) \ | 316 | #define cd_dbg(type, fmt, ...) \ |
317 | do { \ | 317 | do { \ |
318 | if ((ERRLOGMASK & type) || debug == 1) \ | 318 | if ((ERRLOGMASK & type) || debug == 1) \ |
319 | pr_info(fmt, ##args); \ | 319 | pr_debug(fmt, ##__VA_ARGS__); \ |
320 | } while (0) | 320 | } while (0) |
321 | #else | 321 | #else |
322 | #define cdinfo(type, fmt, args...) \ | 322 | #define cd_dbg(type, fmt, ...) \ |
323 | do { \ | 323 | do { \ |
324 | if (0 && (ERRLOGMASK & type) || debug == 1) \ | 324 | if (0 && (ERRLOGMASK & type) || debug == 1) \ |
325 | pr_info(fmt, ##args); \ | 325 | pr_debug(fmt, ##__VA_ARGS__); \ |
326 | } while (0) | 326 | } while (0) |
327 | #endif | 327 | #endif |
328 | 328 | ||
329 | /* These are used to simplify getting data in from and back to user land */ | ||
330 | #define IOCTL_IN(arg, type, in) \ | ||
331 | if (copy_from_user(&(in), (type __user *) (arg), sizeof (in))) \ | ||
332 | return -EFAULT; | ||
333 | |||
334 | #define IOCTL_OUT(arg, type, out) \ | ||
335 | if (copy_to_user((type __user *) (arg), &(out), sizeof (out))) \ | ||
336 | return -EFAULT; | ||
337 | |||
338 | /* The (cdo->capability & ~cdi->mask & CDC_XXX) construct was used in | 329 | /* The (cdo->capability & ~cdi->mask & CDC_XXX) construct was used in |
339 | a lot of places. This macro makes the code more clear. */ | 330 | a lot of places. This macro makes the code more clear. */ |
340 | #define CDROM_CAN(type) (cdi->ops->capability & ~cdi->mask & (type)) | 331 | #define CDROM_CAN(type) (cdi->ops->capability & ~cdi->mask & (type)) |
341 | 332 | ||
342 | /* used in the audio ioctls */ | ||
343 | #define CHECKAUDIO if ((ret=check_for_audio_disc(cdi, cdo))) return ret | ||
344 | |||
345 | /* | 333 | /* |
346 | * Another popular OS uses 7 seconds as the hard timeout for default | 334 | * Another popular OS uses 7 seconds as the hard timeout for default |
347 | * commands, so it is a good choice for us as well. | 335 | * commands, so it is a good choice for us as well. |
@@ -349,21 +337,6 @@ do { \ | |||
349 | #define CDROM_DEF_TIMEOUT (7 * HZ) | 337 | #define CDROM_DEF_TIMEOUT (7 * HZ) |
350 | 338 | ||
351 | /* Not-exported routines. */ | 339 | /* Not-exported routines. */ |
352 | static int open_for_data(struct cdrom_device_info * cdi); | ||
353 | static int check_for_audio_disc(struct cdrom_device_info * cdi, | ||
354 | struct cdrom_device_ops * cdo); | ||
355 | static void sanitize_format(union cdrom_addr *addr, | ||
356 | u_char * curr, u_char requested); | ||
357 | static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, | ||
358 | unsigned long arg); | ||
359 | |||
360 | int cdrom_get_last_written(struct cdrom_device_info *, long *); | ||
361 | static int cdrom_get_next_writable(struct cdrom_device_info *, long *); | ||
362 | static void cdrom_count_tracks(struct cdrom_device_info *, tracktype*); | ||
363 | |||
364 | static int cdrom_mrw_exit(struct cdrom_device_info *cdi); | ||
365 | |||
366 | static int cdrom_get_disc_info(struct cdrom_device_info *cdi, disc_information *di); | ||
367 | 340 | ||
368 | static void cdrom_sysctl_register(void); | 341 | static void cdrom_sysctl_register(void); |
369 | 342 | ||
@@ -382,113 +355,65 @@ static int cdrom_dummy_generic_packet(struct cdrom_device_info *cdi, | |||
382 | return -EIO; | 355 | return -EIO; |
383 | } | 356 | } |
384 | 357 | ||
385 | /* This macro makes sure we don't have to check on cdrom_device_ops | 358 | static int cdrom_flush_cache(struct cdrom_device_info *cdi) |
386 | * existence in the run-time routines below. Change_capability is a | ||
387 | * hack to have the capability flags defined const, while we can still | ||
388 | * change it here without gcc complaining at every line. | ||
389 | */ | ||
390 | #define ENSURE(call, bits) if (cdo->call == NULL) *change_capability &= ~(bits) | ||
391 | |||
392 | int register_cdrom(struct cdrom_device_info *cdi) | ||
393 | { | ||
394 | static char banner_printed; | ||
395 | struct cdrom_device_ops *cdo = cdi->ops; | ||
396 | int *change_capability = (int *)&cdo->capability; /* hack */ | ||
397 | |||
398 | cdinfo(CD_OPEN, "entering register_cdrom\n"); | ||
399 | |||
400 | if (cdo->open == NULL || cdo->release == NULL) | ||
401 | return -EINVAL; | ||
402 | if (!banner_printed) { | ||
403 | pr_info("Uniform CD-ROM driver " REVISION "\n"); | ||
404 | banner_printed = 1; | ||
405 | cdrom_sysctl_register(); | ||
406 | } | ||
407 | |||
408 | ENSURE(drive_status, CDC_DRIVE_STATUS ); | ||
409 | if (cdo->check_events == NULL && cdo->media_changed == NULL) | ||
410 | *change_capability = ~(CDC_MEDIA_CHANGED | CDC_SELECT_DISC); | ||
411 | ENSURE(tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); | ||
412 | ENSURE(lock_door, CDC_LOCK); | ||
413 | ENSURE(select_speed, CDC_SELECT_SPEED); | ||
414 | ENSURE(get_last_session, CDC_MULTI_SESSION); | ||
415 | ENSURE(get_mcn, CDC_MCN); | ||
416 | ENSURE(reset, CDC_RESET); | ||
417 | ENSURE(generic_packet, CDC_GENERIC_PACKET); | ||
418 | cdi->mc_flags = 0; | ||
419 | cdo->n_minors = 0; | ||
420 | cdi->options = CDO_USE_FFLAGS; | ||
421 | |||
422 | if (autoclose==1 && CDROM_CAN(CDC_CLOSE_TRAY)) | ||
423 | cdi->options |= (int) CDO_AUTO_CLOSE; | ||
424 | if (autoeject==1 && CDROM_CAN(CDC_OPEN_TRAY)) | ||
425 | cdi->options |= (int) CDO_AUTO_EJECT; | ||
426 | if (lockdoor==1) | ||
427 | cdi->options |= (int) CDO_LOCK; | ||
428 | if (check_media_type==1) | ||
429 | cdi->options |= (int) CDO_CHECK_TYPE; | ||
430 | |||
431 | if (CDROM_CAN(CDC_MRW_W)) | ||
432 | cdi->exit = cdrom_mrw_exit; | ||
433 | |||
434 | if (cdi->disk) | ||
435 | cdi->cdda_method = CDDA_BPC_FULL; | ||
436 | else | ||
437 | cdi->cdda_method = CDDA_OLD; | ||
438 | |||
439 | if (!cdo->generic_packet) | ||
440 | cdo->generic_packet = cdrom_dummy_generic_packet; | ||
441 | |||
442 | cdinfo(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name); | ||
443 | mutex_lock(&cdrom_mutex); | ||
444 | list_add(&cdi->list, &cdrom_list); | ||
445 | mutex_unlock(&cdrom_mutex); | ||
446 | return 0; | ||
447 | } | ||
448 | #undef ENSURE | ||
449 | |||
450 | void unregister_cdrom(struct cdrom_device_info *cdi) | ||
451 | { | 359 | { |
452 | cdinfo(CD_OPEN, "entering unregister_cdrom\n"); | 360 | struct packet_command cgc; |
453 | 361 | ||
454 | mutex_lock(&cdrom_mutex); | 362 | init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); |
455 | list_del(&cdi->list); | 363 | cgc.cmd[0] = GPCMD_FLUSH_CACHE; |
456 | mutex_unlock(&cdrom_mutex); | ||
457 | 364 | ||
458 | if (cdi->exit) | 365 | cgc.timeout = 5 * 60 * HZ; |
459 | cdi->exit(cdi); | ||
460 | 366 | ||
461 | cdi->ops->n_minors--; | 367 | return cdi->ops->generic_packet(cdi, &cgc); |
462 | cdinfo(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name); | ||
463 | } | 368 | } |
464 | 369 | ||
465 | int cdrom_get_media_event(struct cdrom_device_info *cdi, | 370 | /* requires CD R/RW */ |
466 | struct media_event_desc *med) | 371 | static int cdrom_get_disc_info(struct cdrom_device_info *cdi, |
372 | disc_information *di) | ||
467 | { | 373 | { |
374 | struct cdrom_device_ops *cdo = cdi->ops; | ||
468 | struct packet_command cgc; | 375 | struct packet_command cgc; |
469 | unsigned char buffer[8]; | 376 | int ret, buflen; |
470 | struct event_header *eh = (struct event_header *) buffer; | ||
471 | 377 | ||
472 | init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ); | 378 | /* set up command and get the disc info */ |
473 | cgc.cmd[0] = GPCMD_GET_EVENT_STATUS_NOTIFICATION; | 379 | init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ); |
474 | cgc.cmd[1] = 1; /* IMMED */ | 380 | cgc.cmd[0] = GPCMD_READ_DISC_INFO; |
475 | cgc.cmd[4] = 1 << 4; /* media event */ | 381 | cgc.cmd[8] = cgc.buflen = 2; |
476 | cgc.cmd[8] = sizeof(buffer); | ||
477 | cgc.quiet = 1; | 382 | cgc.quiet = 1; |
478 | 383 | ||
479 | if (cdi->ops->generic_packet(cdi, &cgc)) | 384 | ret = cdo->generic_packet(cdi, &cgc); |
480 | return 1; | 385 | if (ret) |
386 | return ret; | ||
481 | 387 | ||
482 | if (be16_to_cpu(eh->data_len) < sizeof(*med)) | 388 | /* not all drives have the same disc_info length, so requeue |
483 | return 1; | 389 | * packet with the length the drive tells us it can supply |
390 | */ | ||
391 | buflen = be16_to_cpu(di->disc_information_length) + | ||
392 | sizeof(di->disc_information_length); | ||
484 | 393 | ||
485 | if (eh->nea || eh->notification_class != 0x4) | 394 | if (buflen > sizeof(disc_information)) |
486 | return 1; | 395 | buflen = sizeof(disc_information); |
487 | 396 | ||
488 | memcpy(med, &buffer[sizeof(*eh)], sizeof(*med)); | 397 | cgc.cmd[8] = cgc.buflen = buflen; |
489 | return 0; | 398 | ret = cdo->generic_packet(cdi, &cgc); |
399 | if (ret) | ||
400 | return ret; | ||
401 | |||
402 | /* return actual fill size */ | ||
403 | return buflen; | ||
490 | } | 404 | } |
491 | 405 | ||
406 | /* This macro makes sure we don't have to check on cdrom_device_ops | ||
407 | * existence in the run-time routines below. Change_capability is a | ||
408 | * hack to have the capability flags defined const, while we can still | ||
409 | * change it here without gcc complaining at every line. | ||
410 | */ | ||
411 | #define ENSURE(call, bits) \ | ||
412 | do { \ | ||
413 | if (cdo->call == NULL) \ | ||
414 | *change_capability &= ~(bits); \ | ||
415 | } while (0) | ||
416 | |||
492 | /* | 417 | /* |
493 | * the first prototypes used 0x2c as the page code for the mrw mode page, | 418 | * the first prototypes used 0x2c as the page code for the mrw mode page, |
494 | * subsequently this was changed to 0x03. probe the one used by this drive | 419 | * subsequently this was changed to 0x03. probe the one used by this drive |
@@ -605,18 +530,6 @@ static int cdrom_mrw_bgformat_susp(struct cdrom_device_info *cdi, int immed) | |||
605 | return cdi->ops->generic_packet(cdi, &cgc); | 530 | return cdi->ops->generic_packet(cdi, &cgc); |
606 | } | 531 | } |
607 | 532 | ||
608 | static int cdrom_flush_cache(struct cdrom_device_info *cdi) | ||
609 | { | ||
610 | struct packet_command cgc; | ||
611 | |||
612 | init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); | ||
613 | cgc.cmd[0] = GPCMD_FLUSH_CACHE; | ||
614 | |||
615 | cgc.timeout = 5 * 60 * HZ; | ||
616 | |||
617 | return cdi->ops->generic_packet(cdi, &cgc); | ||
618 | } | ||
619 | |||
620 | static int cdrom_mrw_exit(struct cdrom_device_info *cdi) | 533 | static int cdrom_mrw_exit(struct cdrom_device_info *cdi) |
621 | { | 534 | { |
622 | disc_information di; | 535 | disc_information di; |
@@ -650,17 +563,19 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space) | |||
650 | cgc.buffer = buffer; | 563 | cgc.buffer = buffer; |
651 | cgc.buflen = sizeof(buffer); | 564 | cgc.buflen = sizeof(buffer); |
652 | 565 | ||
653 | if ((ret = cdrom_mode_sense(cdi, &cgc, cdi->mrw_mode_page, 0))) | 566 | ret = cdrom_mode_sense(cdi, &cgc, cdi->mrw_mode_page, 0); |
567 | if (ret) | ||
654 | return ret; | 568 | return ret; |
655 | 569 | ||
656 | mph = (struct mode_page_header *) buffer; | 570 | mph = (struct mode_page_header *)buffer; |
657 | offset = be16_to_cpu(mph->desc_length); | 571 | offset = be16_to_cpu(mph->desc_length); |
658 | size = be16_to_cpu(mph->mode_data_length) + 2; | 572 | size = be16_to_cpu(mph->mode_data_length) + 2; |
659 | 573 | ||
660 | buffer[offset + 3] = space; | 574 | buffer[offset + 3] = space; |
661 | cgc.buflen = size; | 575 | cgc.buflen = size; |
662 | 576 | ||
663 | if ((ret = cdrom_mode_select(cdi, &cgc))) | 577 | ret = cdrom_mode_select(cdi, &cgc); |
578 | if (ret) | ||
664 | return ret; | 579 | return ret; |
665 | 580 | ||
666 | pr_info("%s: mrw address space %s selected\n", | 581 | pr_info("%s: mrw address space %s selected\n", |
@@ -668,6 +583,106 @@ static int cdrom_mrw_set_lba_space(struct cdrom_device_info *cdi, int space) | |||
668 | return 0; | 583 | return 0; |
669 | } | 584 | } |
670 | 585 | ||
586 | int register_cdrom(struct cdrom_device_info *cdi) | ||
587 | { | ||
588 | static char banner_printed; | ||
589 | struct cdrom_device_ops *cdo = cdi->ops; | ||
590 | int *change_capability = (int *)&cdo->capability; /* hack */ | ||
591 | |||
592 | cd_dbg(CD_OPEN, "entering register_cdrom\n"); | ||
593 | |||
594 | if (cdo->open == NULL || cdo->release == NULL) | ||
595 | return -EINVAL; | ||
596 | if (!banner_printed) { | ||
597 | pr_info("Uniform CD-ROM driver " REVISION "\n"); | ||
598 | banner_printed = 1; | ||
599 | cdrom_sysctl_register(); | ||
600 | } | ||
601 | |||
602 | ENSURE(drive_status, CDC_DRIVE_STATUS); | ||
603 | if (cdo->check_events == NULL && cdo->media_changed == NULL) | ||
604 | *change_capability = ~(CDC_MEDIA_CHANGED | CDC_SELECT_DISC); | ||
605 | ENSURE(tray_move, CDC_CLOSE_TRAY | CDC_OPEN_TRAY); | ||
606 | ENSURE(lock_door, CDC_LOCK); | ||
607 | ENSURE(select_speed, CDC_SELECT_SPEED); | ||
608 | ENSURE(get_last_session, CDC_MULTI_SESSION); | ||
609 | ENSURE(get_mcn, CDC_MCN); | ||
610 | ENSURE(reset, CDC_RESET); | ||
611 | ENSURE(generic_packet, CDC_GENERIC_PACKET); | ||
612 | cdi->mc_flags = 0; | ||
613 | cdo->n_minors = 0; | ||
614 | cdi->options = CDO_USE_FFLAGS; | ||
615 | |||
616 | if (autoclose == 1 && CDROM_CAN(CDC_CLOSE_TRAY)) | ||
617 | cdi->options |= (int) CDO_AUTO_CLOSE; | ||
618 | if (autoeject == 1 && CDROM_CAN(CDC_OPEN_TRAY)) | ||
619 | cdi->options |= (int) CDO_AUTO_EJECT; | ||
620 | if (lockdoor == 1) | ||
621 | cdi->options |= (int) CDO_LOCK; | ||
622 | if (check_media_type == 1) | ||
623 | cdi->options |= (int) CDO_CHECK_TYPE; | ||
624 | |||
625 | if (CDROM_CAN(CDC_MRW_W)) | ||
626 | cdi->exit = cdrom_mrw_exit; | ||
627 | |||
628 | if (cdi->disk) | ||
629 | cdi->cdda_method = CDDA_BPC_FULL; | ||
630 | else | ||
631 | cdi->cdda_method = CDDA_OLD; | ||
632 | |||
633 | if (!cdo->generic_packet) | ||
634 | cdo->generic_packet = cdrom_dummy_generic_packet; | ||
635 | |||
636 | cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" registered\n", cdi->name); | ||
637 | mutex_lock(&cdrom_mutex); | ||
638 | list_add(&cdi->list, &cdrom_list); | ||
639 | mutex_unlock(&cdrom_mutex); | ||
640 | return 0; | ||
641 | } | ||
642 | #undef ENSURE | ||
643 | |||
644 | void unregister_cdrom(struct cdrom_device_info *cdi) | ||
645 | { | ||
646 | cd_dbg(CD_OPEN, "entering unregister_cdrom\n"); | ||
647 | |||
648 | mutex_lock(&cdrom_mutex); | ||
649 | list_del(&cdi->list); | ||
650 | mutex_unlock(&cdrom_mutex); | ||
651 | |||
652 | if (cdi->exit) | ||
653 | cdi->exit(cdi); | ||
654 | |||
655 | cdi->ops->n_minors--; | ||
656 | cd_dbg(CD_REG_UNREG, "drive \"/dev/%s\" unregistered\n", cdi->name); | ||
657 | } | ||
658 | |||
659 | int cdrom_get_media_event(struct cdrom_device_info *cdi, | ||
660 | struct media_event_desc *med) | ||
661 | { | ||
662 | struct packet_command cgc; | ||
663 | unsigned char buffer[8]; | ||
664 | struct event_header *eh = (struct event_header *)buffer; | ||
665 | |||
666 | init_cdrom_command(&cgc, buffer, sizeof(buffer), CGC_DATA_READ); | ||
667 | cgc.cmd[0] = GPCMD_GET_EVENT_STATUS_NOTIFICATION; | ||
668 | cgc.cmd[1] = 1; /* IMMED */ | ||
669 | cgc.cmd[4] = 1 << 4; /* media event */ | ||
670 | cgc.cmd[8] = sizeof(buffer); | ||
671 | cgc.quiet = 1; | ||
672 | |||
673 | if (cdi->ops->generic_packet(cdi, &cgc)) | ||
674 | return 1; | ||
675 | |||
676 | if (be16_to_cpu(eh->data_len) < sizeof(*med)) | ||
677 | return 1; | ||
678 | |||
679 | if (eh->nea || eh->notification_class != 0x4) | ||
680 | return 1; | ||
681 | |||
682 | memcpy(med, &buffer[sizeof(*eh)], sizeof(*med)); | ||
683 | return 0; | ||
684 | } | ||
685 | |||
671 | static int cdrom_get_random_writable(struct cdrom_device_info *cdi, | 686 | static int cdrom_get_random_writable(struct cdrom_device_info *cdi, |
672 | struct rwrt_feature_desc *rfd) | 687 | struct rwrt_feature_desc *rfd) |
673 | { | 688 | { |
@@ -839,7 +854,7 @@ static int cdrom_ram_open_write(struct cdrom_device_info *cdi) | |||
839 | else if (CDF_RWRT == be16_to_cpu(rfd.feature_code)) | 854 | else if (CDF_RWRT == be16_to_cpu(rfd.feature_code)) |
840 | ret = !rfd.curr; | 855 | ret = !rfd.curr; |
841 | 856 | ||
842 | cdinfo(CD_OPEN, "can open for random write\n"); | 857 | cd_dbg(CD_OPEN, "can open for random write\n"); |
843 | return ret; | 858 | return ret; |
844 | } | 859 | } |
845 | 860 | ||
@@ -928,12 +943,12 @@ static void cdrom_dvd_rw_close_write(struct cdrom_device_info *cdi) | |||
928 | struct packet_command cgc; | 943 | struct packet_command cgc; |
929 | 944 | ||
930 | if (cdi->mmc3_profile != 0x1a) { | 945 | if (cdi->mmc3_profile != 0x1a) { |
931 | cdinfo(CD_CLOSE, "%s: No DVD+RW\n", cdi->name); | 946 | cd_dbg(CD_CLOSE, "%s: No DVD+RW\n", cdi->name); |
932 | return; | 947 | return; |
933 | } | 948 | } |
934 | 949 | ||
935 | if (!cdi->media_written) { | 950 | if (!cdi->media_written) { |
936 | cdinfo(CD_CLOSE, "%s: DVD+RW media clean\n", cdi->name); | 951 | cd_dbg(CD_CLOSE, "%s: DVD+RW media clean\n", cdi->name); |
937 | return; | 952 | return; |
938 | } | 953 | } |
939 | 954 | ||
@@ -969,82 +984,74 @@ static int cdrom_close_write(struct cdrom_device_info *cdi) | |||
969 | #endif | 984 | #endif |
970 | } | 985 | } |
971 | 986 | ||
972 | /* We use the open-option O_NONBLOCK to indicate that the | 987 | /* badly broken, I know. Is due for a fixup anytime. */ |
973 | * purpose of opening is only for subsequent ioctl() calls; no device | 988 | static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype *tracks) |
974 | * integrity checks are performed. | ||
975 | * | ||
976 | * We hope that all cd-player programs will adopt this convention. It | ||
977 | * is in their own interest: device control becomes a lot easier | ||
978 | * this way. | ||
979 | */ | ||
980 | int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, fmode_t mode) | ||
981 | { | 989 | { |
982 | int ret; | 990 | struct cdrom_tochdr header; |
983 | 991 | struct cdrom_tocentry entry; | |
984 | cdinfo(CD_OPEN, "entering cdrom_open\n"); | 992 | int ret, i; |
985 | 993 | tracks->data = 0; | |
986 | /* open is event synchronization point, check events first */ | 994 | tracks->audio = 0; |
987 | check_disk_change(bdev); | 995 | tracks->cdi = 0; |
988 | 996 | tracks->xa = 0; | |
989 | /* if this was a O_NONBLOCK open and we should honor the flags, | 997 | tracks->error = 0; |
990 | * do a quick open without drive/disc integrity checks. */ | 998 | cd_dbg(CD_COUNT_TRACKS, "entering cdrom_count_tracks\n"); |
991 | cdi->use_count++; | 999 | /* Grab the TOC header so we can see how many tracks there are */ |
992 | if ((mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) { | 1000 | ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCHDR, &header); |
993 | ret = cdi->ops->open(cdi, 1); | 1001 | if (ret) { |
994 | } else { | 1002 | if (ret == -ENOMEDIUM) |
995 | ret = open_for_data(cdi); | 1003 | tracks->error = CDS_NO_DISC; |
996 | if (ret) | 1004 | else |
997 | goto err; | 1005 | tracks->error = CDS_NO_INFO; |
998 | cdrom_mmc3_profile(cdi); | 1006 | return; |
999 | if (mode & FMODE_WRITE) { | ||
1000 | ret = -EROFS; | ||
1001 | if (cdrom_open_write(cdi)) | ||
1002 | goto err_release; | ||
1003 | if (!CDROM_CAN(CDC_RAM)) | ||
1004 | goto err_release; | ||
1005 | ret = 0; | ||
1006 | cdi->media_written = 0; | ||
1007 | } | ||
1008 | } | 1007 | } |
1009 | 1008 | /* check what type of tracks are on this disc */ | |
1010 | if (ret) | 1009 | entry.cdte_format = CDROM_MSF; |
1011 | goto err; | 1010 | for (i = header.cdth_trk0; i <= header.cdth_trk1; i++) { |
1012 | 1011 | entry.cdte_track = i; | |
1013 | cdinfo(CD_OPEN, "Use count for \"/dev/%s\" now %d\n", | 1012 | if (cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &entry)) { |
1014 | cdi->name, cdi->use_count); | 1013 | tracks->error = CDS_NO_INFO; |
1015 | return 0; | 1014 | return; |
1016 | err_release: | 1015 | } |
1017 | if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) { | 1016 | if (entry.cdte_ctrl & CDROM_DATA_TRACK) { |
1018 | cdi->ops->lock_door(cdi, 0); | 1017 | if (entry.cdte_format == 0x10) |
1019 | cdinfo(CD_OPEN, "door unlocked.\n"); | 1018 | tracks->cdi++; |
1019 | else if (entry.cdte_format == 0x20) | ||
1020 | tracks->xa++; | ||
1021 | else | ||
1022 | tracks->data++; | ||
1023 | } else { | ||
1024 | tracks->audio++; | ||
1025 | } | ||
1026 | cd_dbg(CD_COUNT_TRACKS, "track %d: format=%d, ctrl=%d\n", | ||
1027 | i, entry.cdte_format, entry.cdte_ctrl); | ||
1020 | } | 1028 | } |
1021 | cdi->ops->release(cdi); | 1029 | cd_dbg(CD_COUNT_TRACKS, "disc has %d tracks: %d=audio %d=data %d=Cd-I %d=XA\n", |
1022 | err: | 1030 | header.cdth_trk1, tracks->audio, tracks->data, |
1023 | cdi->use_count--; | 1031 | tracks->cdi, tracks->xa); |
1024 | return ret; | ||
1025 | } | 1032 | } |
1026 | 1033 | ||
1027 | static | 1034 | static |
1028 | int open_for_data(struct cdrom_device_info * cdi) | 1035 | int open_for_data(struct cdrom_device_info *cdi) |
1029 | { | 1036 | { |
1030 | int ret; | 1037 | int ret; |
1031 | struct cdrom_device_ops *cdo = cdi->ops; | 1038 | struct cdrom_device_ops *cdo = cdi->ops; |
1032 | tracktype tracks; | 1039 | tracktype tracks; |
1033 | cdinfo(CD_OPEN, "entering open_for_data\n"); | 1040 | cd_dbg(CD_OPEN, "entering open_for_data\n"); |
1034 | /* Check if the driver can report drive status. If it can, we | 1041 | /* Check if the driver can report drive status. If it can, we |
1035 | can do clever things. If it can't, well, we at least tried! */ | 1042 | can do clever things. If it can't, well, we at least tried! */ |
1036 | if (cdo->drive_status != NULL) { | 1043 | if (cdo->drive_status != NULL) { |
1037 | ret = cdo->drive_status(cdi, CDSL_CURRENT); | 1044 | ret = cdo->drive_status(cdi, CDSL_CURRENT); |
1038 | cdinfo(CD_OPEN, "drive_status=%d\n", ret); | 1045 | cd_dbg(CD_OPEN, "drive_status=%d\n", ret); |
1039 | if (ret == CDS_TRAY_OPEN) { | 1046 | if (ret == CDS_TRAY_OPEN) { |
1040 | cdinfo(CD_OPEN, "the tray is open...\n"); | 1047 | cd_dbg(CD_OPEN, "the tray is open...\n"); |
1041 | /* can/may i close it? */ | 1048 | /* can/may i close it? */ |
1042 | if (CDROM_CAN(CDC_CLOSE_TRAY) && | 1049 | if (CDROM_CAN(CDC_CLOSE_TRAY) && |
1043 | cdi->options & CDO_AUTO_CLOSE) { | 1050 | cdi->options & CDO_AUTO_CLOSE) { |
1044 | cdinfo(CD_OPEN, "trying to close the tray.\n"); | 1051 | cd_dbg(CD_OPEN, "trying to close the tray\n"); |
1045 | ret=cdo->tray_move(cdi,0); | 1052 | ret=cdo->tray_move(cdi,0); |
1046 | if (ret) { | 1053 | if (ret) { |
1047 | cdinfo(CD_OPEN, "bummer. tried to close the tray but failed.\n"); | 1054 | cd_dbg(CD_OPEN, "bummer. tried to close the tray but failed.\n"); |
1048 | /* Ignore the error from the low | 1055 | /* Ignore the error from the low |
1049 | level driver. We don't care why it | 1056 | level driver. We don't care why it |
1050 | couldn't close the tray. We only care | 1057 | couldn't close the tray. We only care |
@@ -1054,19 +1061,19 @@ int open_for_data(struct cdrom_device_info * cdi) | |||
1054 | goto clean_up_and_return; | 1061 | goto clean_up_and_return; |
1055 | } | 1062 | } |
1056 | } else { | 1063 | } else { |
1057 | cdinfo(CD_OPEN, "bummer. this drive can't close the tray.\n"); | 1064 | cd_dbg(CD_OPEN, "bummer. this drive can't close the tray.\n"); |
1058 | ret=-ENOMEDIUM; | 1065 | ret=-ENOMEDIUM; |
1059 | goto clean_up_and_return; | 1066 | goto clean_up_and_return; |
1060 | } | 1067 | } |
1061 | /* Ok, the door should be closed now.. Check again */ | 1068 | /* Ok, the door should be closed now.. Check again */ |
1062 | ret = cdo->drive_status(cdi, CDSL_CURRENT); | 1069 | ret = cdo->drive_status(cdi, CDSL_CURRENT); |
1063 | if ((ret == CDS_NO_DISC) || (ret==CDS_TRAY_OPEN)) { | 1070 | if ((ret == CDS_NO_DISC) || (ret==CDS_TRAY_OPEN)) { |
1064 | cdinfo(CD_OPEN, "bummer. the tray is still not closed.\n"); | 1071 | cd_dbg(CD_OPEN, "bummer. the tray is still not closed.\n"); |
1065 | cdinfo(CD_OPEN, "tray might not contain a medium.\n"); | 1072 | cd_dbg(CD_OPEN, "tray might not contain a medium\n"); |
1066 | ret=-ENOMEDIUM; | 1073 | ret=-ENOMEDIUM; |
1067 | goto clean_up_and_return; | 1074 | goto clean_up_and_return; |
1068 | } | 1075 | } |
1069 | cdinfo(CD_OPEN, "the tray is now closed.\n"); | 1076 | cd_dbg(CD_OPEN, "the tray is now closed\n"); |
1070 | } | 1077 | } |
1071 | /* the door should be closed now, check for the disc */ | 1078 | /* the door should be closed now, check for the disc */ |
1072 | ret = cdo->drive_status(cdi, CDSL_CURRENT); | 1079 | ret = cdo->drive_status(cdi, CDSL_CURRENT); |
@@ -1077,7 +1084,7 @@ int open_for_data(struct cdrom_device_info * cdi) | |||
1077 | } | 1084 | } |
1078 | cdrom_count_tracks(cdi, &tracks); | 1085 | cdrom_count_tracks(cdi, &tracks); |
1079 | if (tracks.error == CDS_NO_DISC) { | 1086 | if (tracks.error == CDS_NO_DISC) { |
1080 | cdinfo(CD_OPEN, "bummer. no disc.\n"); | 1087 | cd_dbg(CD_OPEN, "bummer. no disc.\n"); |
1081 | ret=-ENOMEDIUM; | 1088 | ret=-ENOMEDIUM; |
1082 | goto clean_up_and_return; | 1089 | goto clean_up_and_return; |
1083 | } | 1090 | } |
@@ -1087,34 +1094,34 @@ int open_for_data(struct cdrom_device_info * cdi) | |||
1087 | if (cdi->options & CDO_CHECK_TYPE) { | 1094 | if (cdi->options & CDO_CHECK_TYPE) { |
1088 | /* give people a warning shot, now that CDO_CHECK_TYPE | 1095 | /* give people a warning shot, now that CDO_CHECK_TYPE |
1089 | is the default case! */ | 1096 | is the default case! */ |
1090 | cdinfo(CD_OPEN, "bummer. wrong media type.\n"); | 1097 | cd_dbg(CD_OPEN, "bummer. wrong media type.\n"); |
1091 | cdinfo(CD_WARNING, "pid %d must open device O_NONBLOCK!\n", | 1098 | cd_dbg(CD_WARNING, "pid %d must open device O_NONBLOCK!\n", |
1092 | (unsigned int)task_pid_nr(current)); | 1099 | (unsigned int)task_pid_nr(current)); |
1093 | ret=-EMEDIUMTYPE; | 1100 | ret=-EMEDIUMTYPE; |
1094 | goto clean_up_and_return; | 1101 | goto clean_up_and_return; |
1095 | } | 1102 | } |
1096 | else { | 1103 | else { |
1097 | cdinfo(CD_OPEN, "wrong media type, but CDO_CHECK_TYPE not set.\n"); | 1104 | cd_dbg(CD_OPEN, "wrong media type, but CDO_CHECK_TYPE not set\n"); |
1098 | } | 1105 | } |
1099 | } | 1106 | } |
1100 | 1107 | ||
1101 | cdinfo(CD_OPEN, "all seems well, opening the device.\n"); | 1108 | cd_dbg(CD_OPEN, "all seems well, opening the devicen"); |
1102 | 1109 | ||
1103 | /* all seems well, we can open the device */ | 1110 | /* all seems well, we can open the device */ |
1104 | ret = cdo->open(cdi, 0); /* open for data */ | 1111 | ret = cdo->open(cdi, 0); /* open for data */ |
1105 | cdinfo(CD_OPEN, "opening the device gave me %d.\n", ret); | 1112 | cd_dbg(CD_OPEN, "opening the device gave me %d\n", ret); |
1106 | /* After all this careful checking, we shouldn't have problems | 1113 | /* After all this careful checking, we shouldn't have problems |
1107 | opening the device, but we don't want the device locked if | 1114 | opening the device, but we don't want the device locked if |
1108 | this somehow fails... */ | 1115 | this somehow fails... */ |
1109 | if (ret) { | 1116 | if (ret) { |
1110 | cdinfo(CD_OPEN, "open device failed.\n"); | 1117 | cd_dbg(CD_OPEN, "open device failed\n"); |
1111 | goto clean_up_and_return; | 1118 | goto clean_up_and_return; |
1112 | } | 1119 | } |
1113 | if (CDROM_CAN(CDC_LOCK) && (cdi->options & CDO_LOCK)) { | 1120 | if (CDROM_CAN(CDC_LOCK) && (cdi->options & CDO_LOCK)) { |
1114 | cdo->lock_door(cdi, 1); | 1121 | cdo->lock_door(cdi, 1); |
1115 | cdinfo(CD_OPEN, "door locked.\n"); | 1122 | cd_dbg(CD_OPEN, "door locked\n"); |
1116 | } | 1123 | } |
1117 | cdinfo(CD_OPEN, "device opened successfully.\n"); | 1124 | cd_dbg(CD_OPEN, "device opened successfully\n"); |
1118 | return ret; | 1125 | return ret; |
1119 | 1126 | ||
1120 | /* Something failed. Try to unlock the drive, because some drivers | 1127 | /* Something failed. Try to unlock the drive, because some drivers |
@@ -1123,14 +1130,70 @@ int open_for_data(struct cdrom_device_info * cdi) | |||
1123 | This ensures that the drive gets unlocked after a mount fails. This | 1130 | This ensures that the drive gets unlocked after a mount fails. This |
1124 | is a goto to avoid bloating the driver with redundant code. */ | 1131 | is a goto to avoid bloating the driver with redundant code. */ |
1125 | clean_up_and_return: | 1132 | clean_up_and_return: |
1126 | cdinfo(CD_OPEN, "open failed.\n"); | 1133 | cd_dbg(CD_OPEN, "open failed\n"); |
1127 | if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) { | 1134 | if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) { |
1128 | cdo->lock_door(cdi, 0); | 1135 | cdo->lock_door(cdi, 0); |
1129 | cdinfo(CD_OPEN, "door unlocked.\n"); | 1136 | cd_dbg(CD_OPEN, "door unlocked\n"); |
1130 | } | 1137 | } |
1131 | return ret; | 1138 | return ret; |
1132 | } | 1139 | } |
1133 | 1140 | ||
1141 | /* We use the open-option O_NONBLOCK to indicate that the | ||
1142 | * purpose of opening is only for subsequent ioctl() calls; no device | ||
1143 | * integrity checks are performed. | ||
1144 | * | ||
1145 | * We hope that all cd-player programs will adopt this convention. It | ||
1146 | * is in their own interest: device control becomes a lot easier | ||
1147 | * this way. | ||
1148 | */ | ||
1149 | int cdrom_open(struct cdrom_device_info *cdi, struct block_device *bdev, | ||
1150 | fmode_t mode) | ||
1151 | { | ||
1152 | int ret; | ||
1153 | |||
1154 | cd_dbg(CD_OPEN, "entering cdrom_open\n"); | ||
1155 | |||
1156 | /* open is event synchronization point, check events first */ | ||
1157 | check_disk_change(bdev); | ||
1158 | |||
1159 | /* if this was a O_NONBLOCK open and we should honor the flags, | ||
1160 | * do a quick open without drive/disc integrity checks. */ | ||
1161 | cdi->use_count++; | ||
1162 | if ((mode & FMODE_NDELAY) && (cdi->options & CDO_USE_FFLAGS)) { | ||
1163 | ret = cdi->ops->open(cdi, 1); | ||
1164 | } else { | ||
1165 | ret = open_for_data(cdi); | ||
1166 | if (ret) | ||
1167 | goto err; | ||
1168 | cdrom_mmc3_profile(cdi); | ||
1169 | if (mode & FMODE_WRITE) { | ||
1170 | ret = -EROFS; | ||
1171 | if (cdrom_open_write(cdi)) | ||
1172 | goto err_release; | ||
1173 | if (!CDROM_CAN(CDC_RAM)) | ||
1174 | goto err_release; | ||
1175 | ret = 0; | ||
1176 | cdi->media_written = 0; | ||
1177 | } | ||
1178 | } | ||
1179 | |||
1180 | if (ret) | ||
1181 | goto err; | ||
1182 | |||
1183 | cd_dbg(CD_OPEN, "Use count for \"/dev/%s\" now %d\n", | ||
1184 | cdi->name, cdi->use_count); | ||
1185 | return 0; | ||
1186 | err_release: | ||
1187 | if (CDROM_CAN(CDC_LOCK) && cdi->options & CDO_LOCK) { | ||
1188 | cdi->ops->lock_door(cdi, 0); | ||
1189 | cd_dbg(CD_OPEN, "door unlocked\n"); | ||
1190 | } | ||
1191 | cdi->ops->release(cdi); | ||
1192 | err: | ||
1193 | cdi->use_count--; | ||
1194 | return ret; | ||
1195 | } | ||
1196 | |||
1134 | /* This code is similar to that in open_for_data. The routine is called | 1197 | /* This code is similar to that in open_for_data. The routine is called |
1135 | whenever an audio play operation is requested. | 1198 | whenever an audio play operation is requested. |
1136 | */ | 1199 | */ |
@@ -1139,21 +1202,21 @@ static int check_for_audio_disc(struct cdrom_device_info * cdi, | |||
1139 | { | 1202 | { |
1140 | int ret; | 1203 | int ret; |
1141 | tracktype tracks; | 1204 | tracktype tracks; |
1142 | cdinfo(CD_OPEN, "entering check_for_audio_disc\n"); | 1205 | cd_dbg(CD_OPEN, "entering check_for_audio_disc\n"); |
1143 | if (!(cdi->options & CDO_CHECK_TYPE)) | 1206 | if (!(cdi->options & CDO_CHECK_TYPE)) |
1144 | return 0; | 1207 | return 0; |
1145 | if (cdo->drive_status != NULL) { | 1208 | if (cdo->drive_status != NULL) { |
1146 | ret = cdo->drive_status(cdi, CDSL_CURRENT); | 1209 | ret = cdo->drive_status(cdi, CDSL_CURRENT); |
1147 | cdinfo(CD_OPEN, "drive_status=%d\n", ret); | 1210 | cd_dbg(CD_OPEN, "drive_status=%d\n", ret); |
1148 | if (ret == CDS_TRAY_OPEN) { | 1211 | if (ret == CDS_TRAY_OPEN) { |
1149 | cdinfo(CD_OPEN, "the tray is open...\n"); | 1212 | cd_dbg(CD_OPEN, "the tray is open...\n"); |
1150 | /* can/may i close it? */ | 1213 | /* can/may i close it? */ |
1151 | if (CDROM_CAN(CDC_CLOSE_TRAY) && | 1214 | if (CDROM_CAN(CDC_CLOSE_TRAY) && |
1152 | cdi->options & CDO_AUTO_CLOSE) { | 1215 | cdi->options & CDO_AUTO_CLOSE) { |
1153 | cdinfo(CD_OPEN, "trying to close the tray.\n"); | 1216 | cd_dbg(CD_OPEN, "trying to close the tray\n"); |
1154 | ret=cdo->tray_move(cdi,0); | 1217 | ret=cdo->tray_move(cdi,0); |
1155 | if (ret) { | 1218 | if (ret) { |
1156 | cdinfo(CD_OPEN, "bummer. tried to close tray but failed.\n"); | 1219 | cd_dbg(CD_OPEN, "bummer. tried to close tray but failed.\n"); |
1157 | /* Ignore the error from the low | 1220 | /* Ignore the error from the low |
1158 | level driver. We don't care why it | 1221 | level driver. We don't care why it |
1159 | couldn't close the tray. We only care | 1222 | couldn't close the tray. We only care |
@@ -1162,20 +1225,20 @@ static int check_for_audio_disc(struct cdrom_device_info * cdi, | |||
1162 | return -ENOMEDIUM; | 1225 | return -ENOMEDIUM; |
1163 | } | 1226 | } |
1164 | } else { | 1227 | } else { |
1165 | cdinfo(CD_OPEN, "bummer. this driver can't close the tray.\n"); | 1228 | cd_dbg(CD_OPEN, "bummer. this driver can't close the tray.\n"); |
1166 | return -ENOMEDIUM; | 1229 | return -ENOMEDIUM; |
1167 | } | 1230 | } |
1168 | /* Ok, the door should be closed now.. Check again */ | 1231 | /* Ok, the door should be closed now.. Check again */ |
1169 | ret = cdo->drive_status(cdi, CDSL_CURRENT); | 1232 | ret = cdo->drive_status(cdi, CDSL_CURRENT); |
1170 | if ((ret == CDS_NO_DISC) || (ret==CDS_TRAY_OPEN)) { | 1233 | if ((ret == CDS_NO_DISC) || (ret==CDS_TRAY_OPEN)) { |
1171 | cdinfo(CD_OPEN, "bummer. the tray is still not closed.\n"); | 1234 | cd_dbg(CD_OPEN, "bummer. the tray is still not closed.\n"); |
1172 | return -ENOMEDIUM; | 1235 | return -ENOMEDIUM; |
1173 | } | 1236 | } |
1174 | if (ret!=CDS_DISC_OK) { | 1237 | if (ret!=CDS_DISC_OK) { |
1175 | cdinfo(CD_OPEN, "bummer. disc isn't ready.\n"); | 1238 | cd_dbg(CD_OPEN, "bummer. disc isn't ready.\n"); |
1176 | return -EIO; | 1239 | return -EIO; |
1177 | } | 1240 | } |
1178 | cdinfo(CD_OPEN, "the tray is now closed.\n"); | 1241 | cd_dbg(CD_OPEN, "the tray is now closed\n"); |
1179 | } | 1242 | } |
1180 | } | 1243 | } |
1181 | cdrom_count_tracks(cdi, &tracks); | 1244 | cdrom_count_tracks(cdi, &tracks); |
@@ -1193,17 +1256,18 @@ void cdrom_release(struct cdrom_device_info *cdi, fmode_t mode) | |||
1193 | struct cdrom_device_ops *cdo = cdi->ops; | 1256 | struct cdrom_device_ops *cdo = cdi->ops; |
1194 | int opened_for_data; | 1257 | int opened_for_data; |
1195 | 1258 | ||
1196 | cdinfo(CD_CLOSE, "entering cdrom_release\n"); | 1259 | cd_dbg(CD_CLOSE, "entering cdrom_release\n"); |
1197 | 1260 | ||
1198 | if (cdi->use_count > 0) | 1261 | if (cdi->use_count > 0) |
1199 | cdi->use_count--; | 1262 | cdi->use_count--; |
1200 | 1263 | ||
1201 | if (cdi->use_count == 0) { | 1264 | if (cdi->use_count == 0) { |
1202 | cdinfo(CD_CLOSE, "Use count for \"/dev/%s\" now zero\n", cdi->name); | 1265 | cd_dbg(CD_CLOSE, "Use count for \"/dev/%s\" now zero\n", |
1266 | cdi->name); | ||
1203 | cdrom_dvd_rw_close_write(cdi); | 1267 | cdrom_dvd_rw_close_write(cdi); |
1204 | 1268 | ||
1205 | if ((cdo->capability & CDC_LOCK) && !cdi->keeplocked) { | 1269 | if ((cdo->capability & CDC_LOCK) && !cdi->keeplocked) { |
1206 | cdinfo(CD_CLOSE, "Unlocking door!\n"); | 1270 | cd_dbg(CD_CLOSE, "Unlocking door!\n"); |
1207 | cdo->lock_door(cdi, 0); | 1271 | cdo->lock_door(cdi, 0); |
1208 | } | 1272 | } |
1209 | } | 1273 | } |
@@ -1262,7 +1326,7 @@ static int cdrom_slot_status(struct cdrom_device_info *cdi, int slot) | |||
1262 | struct cdrom_changer_info *info; | 1326 | struct cdrom_changer_info *info; |
1263 | int ret; | 1327 | int ret; |
1264 | 1328 | ||
1265 | cdinfo(CD_CHANGER, "entering cdrom_slot_status()\n"); | 1329 | cd_dbg(CD_CHANGER, "entering cdrom_slot_status()\n"); |
1266 | if (cdi->sanyo_slot) | 1330 | if (cdi->sanyo_slot) |
1267 | return CDS_NO_INFO; | 1331 | return CDS_NO_INFO; |
1268 | 1332 | ||
@@ -1292,7 +1356,7 @@ int cdrom_number_of_slots(struct cdrom_device_info *cdi) | |||
1292 | int nslots = 1; | 1356 | int nslots = 1; |
1293 | struct cdrom_changer_info *info; | 1357 | struct cdrom_changer_info *info; |
1294 | 1358 | ||
1295 | cdinfo(CD_CHANGER, "entering cdrom_number_of_slots()\n"); | 1359 | cd_dbg(CD_CHANGER, "entering cdrom_number_of_slots()\n"); |
1296 | /* cdrom_read_mech_status requires a valid value for capacity: */ | 1360 | /* cdrom_read_mech_status requires a valid value for capacity: */ |
1297 | cdi->capacity = 0; | 1361 | cdi->capacity = 0; |
1298 | 1362 | ||
@@ -1313,7 +1377,7 @@ static int cdrom_load_unload(struct cdrom_device_info *cdi, int slot) | |||
1313 | { | 1377 | { |
1314 | struct packet_command cgc; | 1378 | struct packet_command cgc; |
1315 | 1379 | ||
1316 | cdinfo(CD_CHANGER, "entering cdrom_load_unload()\n"); | 1380 | cd_dbg(CD_CHANGER, "entering cdrom_load_unload()\n"); |
1317 | if (cdi->sanyo_slot && slot < 0) | 1381 | if (cdi->sanyo_slot && slot < 0) |
1318 | return 0; | 1382 | return 0; |
1319 | 1383 | ||
@@ -1342,7 +1406,7 @@ static int cdrom_select_disc(struct cdrom_device_info *cdi, int slot) | |||
1342 | int curslot; | 1406 | int curslot; |
1343 | int ret; | 1407 | int ret; |
1344 | 1408 | ||
1345 | cdinfo(CD_CHANGER, "entering cdrom_select_disc()\n"); | 1409 | cd_dbg(CD_CHANGER, "entering cdrom_select_disc()\n"); |
1346 | if (!CDROM_CAN(CDC_SELECT_DISC)) | 1410 | if (!CDROM_CAN(CDC_SELECT_DISC)) |
1347 | return -EDRIVE_CANT_DO_THIS; | 1411 | return -EDRIVE_CANT_DO_THIS; |
1348 | 1412 | ||
@@ -1476,51 +1540,6 @@ int cdrom_media_changed(struct cdrom_device_info *cdi) | |||
1476 | return media_changed(cdi, 0); | 1540 | return media_changed(cdi, 0); |
1477 | } | 1541 | } |
1478 | 1542 | ||
1479 | /* badly broken, I know. Is due for a fixup anytime. */ | ||
1480 | static void cdrom_count_tracks(struct cdrom_device_info *cdi, tracktype* tracks) | ||
1481 | { | ||
1482 | struct cdrom_tochdr header; | ||
1483 | struct cdrom_tocentry entry; | ||
1484 | int ret, i; | ||
1485 | tracks->data=0; | ||
1486 | tracks->audio=0; | ||
1487 | tracks->cdi=0; | ||
1488 | tracks->xa=0; | ||
1489 | tracks->error=0; | ||
1490 | cdinfo(CD_COUNT_TRACKS, "entering cdrom_count_tracks\n"); | ||
1491 | /* Grab the TOC header so we can see how many tracks there are */ | ||
1492 | if ((ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCHDR, &header))) { | ||
1493 | if (ret == -ENOMEDIUM) | ||
1494 | tracks->error = CDS_NO_DISC; | ||
1495 | else | ||
1496 | tracks->error = CDS_NO_INFO; | ||
1497 | return; | ||
1498 | } | ||
1499 | /* check what type of tracks are on this disc */ | ||
1500 | entry.cdte_format = CDROM_MSF; | ||
1501 | for (i = header.cdth_trk0; i <= header.cdth_trk1; i++) { | ||
1502 | entry.cdte_track = i; | ||
1503 | if (cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &entry)) { | ||
1504 | tracks->error=CDS_NO_INFO; | ||
1505 | return; | ||
1506 | } | ||
1507 | if (entry.cdte_ctrl & CDROM_DATA_TRACK) { | ||
1508 | if (entry.cdte_format == 0x10) | ||
1509 | tracks->cdi++; | ||
1510 | else if (entry.cdte_format == 0x20) | ||
1511 | tracks->xa++; | ||
1512 | else | ||
1513 | tracks->data++; | ||
1514 | } else | ||
1515 | tracks->audio++; | ||
1516 | cdinfo(CD_COUNT_TRACKS, "track %d: format=%d, ctrl=%d\n", | ||
1517 | i, entry.cdte_format, entry.cdte_ctrl); | ||
1518 | } | ||
1519 | cdinfo(CD_COUNT_TRACKS, "disc has %d tracks: %d=audio %d=data %d=Cd-I %d=XA\n", | ||
1520 | header.cdth_trk1, tracks->audio, tracks->data, | ||
1521 | tracks->cdi, tracks->xa); | ||
1522 | } | ||
1523 | |||
1524 | /* Requests to the low-level drivers will /always/ be done in the | 1543 | /* Requests to the low-level drivers will /always/ be done in the |
1525 | following format convention: | 1544 | following format convention: |
1526 | 1545 | ||
@@ -1632,7 +1651,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1632 | switch (ai->type) { | 1651 | switch (ai->type) { |
1633 | /* LU data send */ | 1652 | /* LU data send */ |
1634 | case DVD_LU_SEND_AGID: | 1653 | case DVD_LU_SEND_AGID: |
1635 | cdinfo(CD_DVD, "entering DVD_LU_SEND_AGID\n"); | 1654 | cd_dbg(CD_DVD, "entering DVD_LU_SEND_AGID\n"); |
1636 | cgc.quiet = 1; | 1655 | cgc.quiet = 1; |
1637 | setup_report_key(&cgc, ai->lsa.agid, 0); | 1656 | setup_report_key(&cgc, ai->lsa.agid, 0); |
1638 | 1657 | ||
@@ -1644,7 +1663,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1644 | break; | 1663 | break; |
1645 | 1664 | ||
1646 | case DVD_LU_SEND_KEY1: | 1665 | case DVD_LU_SEND_KEY1: |
1647 | cdinfo(CD_DVD, "entering DVD_LU_SEND_KEY1\n"); | 1666 | cd_dbg(CD_DVD, "entering DVD_LU_SEND_KEY1\n"); |
1648 | setup_report_key(&cgc, ai->lsk.agid, 2); | 1667 | setup_report_key(&cgc, ai->lsk.agid, 2); |
1649 | 1668 | ||
1650 | if ((ret = cdo->generic_packet(cdi, &cgc))) | 1669 | if ((ret = cdo->generic_packet(cdi, &cgc))) |
@@ -1655,7 +1674,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1655 | break; | 1674 | break; |
1656 | 1675 | ||
1657 | case DVD_LU_SEND_CHALLENGE: | 1676 | case DVD_LU_SEND_CHALLENGE: |
1658 | cdinfo(CD_DVD, "entering DVD_LU_SEND_CHALLENGE\n"); | 1677 | cd_dbg(CD_DVD, "entering DVD_LU_SEND_CHALLENGE\n"); |
1659 | setup_report_key(&cgc, ai->lsc.agid, 1); | 1678 | setup_report_key(&cgc, ai->lsc.agid, 1); |
1660 | 1679 | ||
1661 | if ((ret = cdo->generic_packet(cdi, &cgc))) | 1680 | if ((ret = cdo->generic_packet(cdi, &cgc))) |
@@ -1667,7 +1686,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1667 | 1686 | ||
1668 | /* Post-auth key */ | 1687 | /* Post-auth key */ |
1669 | case DVD_LU_SEND_TITLE_KEY: | 1688 | case DVD_LU_SEND_TITLE_KEY: |
1670 | cdinfo(CD_DVD, "entering DVD_LU_SEND_TITLE_KEY\n"); | 1689 | cd_dbg(CD_DVD, "entering DVD_LU_SEND_TITLE_KEY\n"); |
1671 | cgc.quiet = 1; | 1690 | cgc.quiet = 1; |
1672 | setup_report_key(&cgc, ai->lstk.agid, 4); | 1691 | setup_report_key(&cgc, ai->lstk.agid, 4); |
1673 | cgc.cmd[5] = ai->lstk.lba; | 1692 | cgc.cmd[5] = ai->lstk.lba; |
@@ -1686,7 +1705,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1686 | break; | 1705 | break; |
1687 | 1706 | ||
1688 | case DVD_LU_SEND_ASF: | 1707 | case DVD_LU_SEND_ASF: |
1689 | cdinfo(CD_DVD, "entering DVD_LU_SEND_ASF\n"); | 1708 | cd_dbg(CD_DVD, "entering DVD_LU_SEND_ASF\n"); |
1690 | setup_report_key(&cgc, ai->lsasf.agid, 5); | 1709 | setup_report_key(&cgc, ai->lsasf.agid, 5); |
1691 | 1710 | ||
1692 | if ((ret = cdo->generic_packet(cdi, &cgc))) | 1711 | if ((ret = cdo->generic_packet(cdi, &cgc))) |
@@ -1697,7 +1716,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1697 | 1716 | ||
1698 | /* LU data receive (LU changes state) */ | 1717 | /* LU data receive (LU changes state) */ |
1699 | case DVD_HOST_SEND_CHALLENGE: | 1718 | case DVD_HOST_SEND_CHALLENGE: |
1700 | cdinfo(CD_DVD, "entering DVD_HOST_SEND_CHALLENGE\n"); | 1719 | cd_dbg(CD_DVD, "entering DVD_HOST_SEND_CHALLENGE\n"); |
1701 | setup_send_key(&cgc, ai->hsc.agid, 1); | 1720 | setup_send_key(&cgc, ai->hsc.agid, 1); |
1702 | buf[1] = 0xe; | 1721 | buf[1] = 0xe; |
1703 | copy_chal(&buf[4], ai->hsc.chal); | 1722 | copy_chal(&buf[4], ai->hsc.chal); |
@@ -1709,7 +1728,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1709 | break; | 1728 | break; |
1710 | 1729 | ||
1711 | case DVD_HOST_SEND_KEY2: | 1730 | case DVD_HOST_SEND_KEY2: |
1712 | cdinfo(CD_DVD, "entering DVD_HOST_SEND_KEY2\n"); | 1731 | cd_dbg(CD_DVD, "entering DVD_HOST_SEND_KEY2\n"); |
1713 | setup_send_key(&cgc, ai->hsk.agid, 3); | 1732 | setup_send_key(&cgc, ai->hsk.agid, 3); |
1714 | buf[1] = 0xa; | 1733 | buf[1] = 0xa; |
1715 | copy_key(&buf[4], ai->hsk.key); | 1734 | copy_key(&buf[4], ai->hsk.key); |
@@ -1724,7 +1743,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1724 | /* Misc */ | 1743 | /* Misc */ |
1725 | case DVD_INVALIDATE_AGID: | 1744 | case DVD_INVALIDATE_AGID: |
1726 | cgc.quiet = 1; | 1745 | cgc.quiet = 1; |
1727 | cdinfo(CD_DVD, "entering DVD_INVALIDATE_AGID\n"); | 1746 | cd_dbg(CD_DVD, "entering DVD_INVALIDATE_AGID\n"); |
1728 | setup_report_key(&cgc, ai->lsa.agid, 0x3f); | 1747 | setup_report_key(&cgc, ai->lsa.agid, 0x3f); |
1729 | if ((ret = cdo->generic_packet(cdi, &cgc))) | 1748 | if ((ret = cdo->generic_packet(cdi, &cgc))) |
1730 | return ret; | 1749 | return ret; |
@@ -1732,7 +1751,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1732 | 1751 | ||
1733 | /* Get region settings */ | 1752 | /* Get region settings */ |
1734 | case DVD_LU_SEND_RPC_STATE: | 1753 | case DVD_LU_SEND_RPC_STATE: |
1735 | cdinfo(CD_DVD, "entering DVD_LU_SEND_RPC_STATE\n"); | 1754 | cd_dbg(CD_DVD, "entering DVD_LU_SEND_RPC_STATE\n"); |
1736 | setup_report_key(&cgc, 0, 8); | 1755 | setup_report_key(&cgc, 0, 8); |
1737 | memset(&rpc_state, 0, sizeof(rpc_state_t)); | 1756 | memset(&rpc_state, 0, sizeof(rpc_state_t)); |
1738 | cgc.buffer = (char *) &rpc_state; | 1757 | cgc.buffer = (char *) &rpc_state; |
@@ -1749,7 +1768,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1749 | 1768 | ||
1750 | /* Set region settings */ | 1769 | /* Set region settings */ |
1751 | case DVD_HOST_SEND_RPC_STATE: | 1770 | case DVD_HOST_SEND_RPC_STATE: |
1752 | cdinfo(CD_DVD, "entering DVD_HOST_SEND_RPC_STATE\n"); | 1771 | cd_dbg(CD_DVD, "entering DVD_HOST_SEND_RPC_STATE\n"); |
1753 | setup_send_key(&cgc, 0, 6); | 1772 | setup_send_key(&cgc, 0, 6); |
1754 | buf[1] = 6; | 1773 | buf[1] = 6; |
1755 | buf[4] = ai->hrpcs.pdrc; | 1774 | buf[4] = ai->hrpcs.pdrc; |
@@ -1759,7 +1778,7 @@ static int dvd_do_auth(struct cdrom_device_info *cdi, dvd_authinfo *ai) | |||
1759 | break; | 1778 | break; |
1760 | 1779 | ||
1761 | default: | 1780 | default: |
1762 | cdinfo(CD_WARNING, "Invalid DVD key ioctl (%d)\n", ai->type); | 1781 | cd_dbg(CD_WARNING, "Invalid DVD key ioctl (%d)\n", ai->type); |
1763 | return -ENOTTY; | 1782 | return -ENOTTY; |
1764 | } | 1783 | } |
1765 | 1784 | ||
@@ -1891,7 +1910,8 @@ static int dvd_read_bca(struct cdrom_device_info *cdi, dvd_struct *s, | |||
1891 | 1910 | ||
1892 | s->bca.len = buf[0] << 8 | buf[1]; | 1911 | s->bca.len = buf[0] << 8 | buf[1]; |
1893 | if (s->bca.len < 12 || s->bca.len > 188) { | 1912 | if (s->bca.len < 12 || s->bca.len > 188) { |
1894 | cdinfo(CD_WARNING, "Received invalid BCA length (%d)\n", s->bca.len); | 1913 | cd_dbg(CD_WARNING, "Received invalid BCA length (%d)\n", |
1914 | s->bca.len); | ||
1895 | ret = -EIO; | 1915 | ret = -EIO; |
1896 | goto out; | 1916 | goto out; |
1897 | } | 1917 | } |
@@ -1927,14 +1947,13 @@ static int dvd_read_manufact(struct cdrom_device_info *cdi, dvd_struct *s, | |||
1927 | 1947 | ||
1928 | s->manufact.len = buf[0] << 8 | buf[1]; | 1948 | s->manufact.len = buf[0] << 8 | buf[1]; |
1929 | if (s->manufact.len < 0) { | 1949 | if (s->manufact.len < 0) { |
1930 | cdinfo(CD_WARNING, "Received invalid manufacture info length" | 1950 | cd_dbg(CD_WARNING, "Received invalid manufacture info length (%d)\n", |
1931 | " (%d)\n", s->manufact.len); | 1951 | s->manufact.len); |
1932 | ret = -EIO; | 1952 | ret = -EIO; |
1933 | } else { | 1953 | } else { |
1934 | if (s->manufact.len > 2048) { | 1954 | if (s->manufact.len > 2048) { |
1935 | cdinfo(CD_WARNING, "Received invalid manufacture info " | 1955 | cd_dbg(CD_WARNING, "Received invalid manufacture info length (%d): truncating to 2048\n", |
1936 | "length (%d): truncating to 2048\n", | 1956 | s->manufact.len); |
1937 | s->manufact.len); | ||
1938 | s->manufact.len = 2048; | 1957 | s->manufact.len = 2048; |
1939 | } | 1958 | } |
1940 | memcpy(s->manufact.value, &buf[4], s->manufact.len); | 1959 | memcpy(s->manufact.value, &buf[4], s->manufact.len); |
@@ -1965,8 +1984,8 @@ static int dvd_read_struct(struct cdrom_device_info *cdi, dvd_struct *s, | |||
1965 | return dvd_read_manufact(cdi, s, cgc); | 1984 | return dvd_read_manufact(cdi, s, cgc); |
1966 | 1985 | ||
1967 | default: | 1986 | default: |
1968 | cdinfo(CD_WARNING, ": Invalid DVD structure read requested (%d)\n", | 1987 | cd_dbg(CD_WARNING, ": Invalid DVD structure read requested (%d)\n", |
1969 | s->type); | 1988 | s->type); |
1970 | return -EINVAL; | 1989 | return -EINVAL; |
1971 | } | 1990 | } |
1972 | } | 1991 | } |
@@ -2255,7 +2274,7 @@ static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi, | |||
2255 | u8 requested_format; | 2274 | u8 requested_format; |
2256 | int ret; | 2275 | int ret; |
2257 | 2276 | ||
2258 | cdinfo(CD_DO_IOCTL, "entering CDROMMULTISESSION\n"); | 2277 | cd_dbg(CD_DO_IOCTL, "entering CDROMMULTISESSION\n"); |
2259 | 2278 | ||
2260 | if (!(cdi->ops->capability & CDC_MULTI_SESSION)) | 2279 | if (!(cdi->ops->capability & CDC_MULTI_SESSION)) |
2261 | return -ENOSYS; | 2280 | return -ENOSYS; |
@@ -2277,13 +2296,13 @@ static int cdrom_ioctl_multisession(struct cdrom_device_info *cdi, | |||
2277 | if (copy_to_user(argp, &ms_info, sizeof(ms_info))) | 2296 | if (copy_to_user(argp, &ms_info, sizeof(ms_info))) |
2278 | return -EFAULT; | 2297 | return -EFAULT; |
2279 | 2298 | ||
2280 | cdinfo(CD_DO_IOCTL, "CDROMMULTISESSION successful\n"); | 2299 | cd_dbg(CD_DO_IOCTL, "CDROMMULTISESSION successful\n"); |
2281 | return 0; | 2300 | return 0; |
2282 | } | 2301 | } |
2283 | 2302 | ||
2284 | static int cdrom_ioctl_eject(struct cdrom_device_info *cdi) | 2303 | static int cdrom_ioctl_eject(struct cdrom_device_info *cdi) |
2285 | { | 2304 | { |
2286 | cdinfo(CD_DO_IOCTL, "entering CDROMEJECT\n"); | 2305 | cd_dbg(CD_DO_IOCTL, "entering CDROMEJECT\n"); |
2287 | 2306 | ||
2288 | if (!CDROM_CAN(CDC_OPEN_TRAY)) | 2307 | if (!CDROM_CAN(CDC_OPEN_TRAY)) |
2289 | return -ENOSYS; | 2308 | return -ENOSYS; |
@@ -2300,7 +2319,7 @@ static int cdrom_ioctl_eject(struct cdrom_device_info *cdi) | |||
2300 | 2319 | ||
2301 | static int cdrom_ioctl_closetray(struct cdrom_device_info *cdi) | 2320 | static int cdrom_ioctl_closetray(struct cdrom_device_info *cdi) |
2302 | { | 2321 | { |
2303 | cdinfo(CD_DO_IOCTL, "entering CDROMCLOSETRAY\n"); | 2322 | cd_dbg(CD_DO_IOCTL, "entering CDROMCLOSETRAY\n"); |
2304 | 2323 | ||
2305 | if (!CDROM_CAN(CDC_CLOSE_TRAY)) | 2324 | if (!CDROM_CAN(CDC_CLOSE_TRAY)) |
2306 | return -ENOSYS; | 2325 | return -ENOSYS; |
@@ -2310,7 +2329,7 @@ static int cdrom_ioctl_closetray(struct cdrom_device_info *cdi) | |||
2310 | static int cdrom_ioctl_eject_sw(struct cdrom_device_info *cdi, | 2329 | static int cdrom_ioctl_eject_sw(struct cdrom_device_info *cdi, |
2311 | unsigned long arg) | 2330 | unsigned long arg) |
2312 | { | 2331 | { |
2313 | cdinfo(CD_DO_IOCTL, "entering CDROMEJECT_SW\n"); | 2332 | cd_dbg(CD_DO_IOCTL, "entering CDROMEJECT_SW\n"); |
2314 | 2333 | ||
2315 | if (!CDROM_CAN(CDC_OPEN_TRAY)) | 2334 | if (!CDROM_CAN(CDC_OPEN_TRAY)) |
2316 | return -ENOSYS; | 2335 | return -ENOSYS; |
@@ -2329,7 +2348,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi, | |||
2329 | struct cdrom_changer_info *info; | 2348 | struct cdrom_changer_info *info; |
2330 | int ret; | 2349 | int ret; |
2331 | 2350 | ||
2332 | cdinfo(CD_DO_IOCTL, "entering CDROM_MEDIA_CHANGED\n"); | 2351 | cd_dbg(CD_DO_IOCTL, "entering CDROM_MEDIA_CHANGED\n"); |
2333 | 2352 | ||
2334 | if (!CDROM_CAN(CDC_MEDIA_CHANGED)) | 2353 | if (!CDROM_CAN(CDC_MEDIA_CHANGED)) |
2335 | return -ENOSYS; | 2354 | return -ENOSYS; |
@@ -2355,7 +2374,7 @@ static int cdrom_ioctl_media_changed(struct cdrom_device_info *cdi, | |||
2355 | static int cdrom_ioctl_set_options(struct cdrom_device_info *cdi, | 2374 | static int cdrom_ioctl_set_options(struct cdrom_device_info *cdi, |
2356 | unsigned long arg) | 2375 | unsigned long arg) |
2357 | { | 2376 | { |
2358 | cdinfo(CD_DO_IOCTL, "entering CDROM_SET_OPTIONS\n"); | 2377 | cd_dbg(CD_DO_IOCTL, "entering CDROM_SET_OPTIONS\n"); |
2359 | 2378 | ||
2360 | /* | 2379 | /* |
2361 | * Options need to be in sync with capability. | 2380 | * Options need to be in sync with capability. |
@@ -2383,7 +2402,7 @@ static int cdrom_ioctl_set_options(struct cdrom_device_info *cdi, | |||
2383 | static int cdrom_ioctl_clear_options(struct cdrom_device_info *cdi, | 2402 | static int cdrom_ioctl_clear_options(struct cdrom_device_info *cdi, |
2384 | unsigned long arg) | 2403 | unsigned long arg) |
2385 | { | 2404 | { |
2386 | cdinfo(CD_DO_IOCTL, "entering CDROM_CLEAR_OPTIONS\n"); | 2405 | cd_dbg(CD_DO_IOCTL, "entering CDROM_CLEAR_OPTIONS\n"); |
2387 | 2406 | ||
2388 | cdi->options &= ~(int) arg; | 2407 | cdi->options &= ~(int) arg; |
2389 | return cdi->options; | 2408 | return cdi->options; |
@@ -2392,7 +2411,7 @@ static int cdrom_ioctl_clear_options(struct cdrom_device_info *cdi, | |||
2392 | static int cdrom_ioctl_select_speed(struct cdrom_device_info *cdi, | 2411 | static int cdrom_ioctl_select_speed(struct cdrom_device_info *cdi, |
2393 | unsigned long arg) | 2412 | unsigned long arg) |
2394 | { | 2413 | { |
2395 | cdinfo(CD_DO_IOCTL, "entering CDROM_SELECT_SPEED\n"); | 2414 | cd_dbg(CD_DO_IOCTL, "entering CDROM_SELECT_SPEED\n"); |
2396 | 2415 | ||
2397 | if (!CDROM_CAN(CDC_SELECT_SPEED)) | 2416 | if (!CDROM_CAN(CDC_SELECT_SPEED)) |
2398 | return -ENOSYS; | 2417 | return -ENOSYS; |
@@ -2402,7 +2421,7 @@ static int cdrom_ioctl_select_speed(struct cdrom_device_info *cdi, | |||
2402 | static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi, | 2421 | static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi, |
2403 | unsigned long arg) | 2422 | unsigned long arg) |
2404 | { | 2423 | { |
2405 | cdinfo(CD_DO_IOCTL, "entering CDROM_SELECT_DISC\n"); | 2424 | cd_dbg(CD_DO_IOCTL, "entering CDROM_SELECT_DISC\n"); |
2406 | 2425 | ||
2407 | if (!CDROM_CAN(CDC_SELECT_DISC)) | 2426 | if (!CDROM_CAN(CDC_SELECT_DISC)) |
2408 | return -ENOSYS; | 2427 | return -ENOSYS; |
@@ -2420,14 +2439,14 @@ static int cdrom_ioctl_select_disc(struct cdrom_device_info *cdi, | |||
2420 | if (cdi->ops->select_disc) | 2439 | if (cdi->ops->select_disc) |
2421 | return cdi->ops->select_disc(cdi, arg); | 2440 | return cdi->ops->select_disc(cdi, arg); |
2422 | 2441 | ||
2423 | cdinfo(CD_CHANGER, "Using generic cdrom_select_disc()\n"); | 2442 | cd_dbg(CD_CHANGER, "Using generic cdrom_select_disc()\n"); |
2424 | return cdrom_select_disc(cdi, arg); | 2443 | return cdrom_select_disc(cdi, arg); |
2425 | } | 2444 | } |
2426 | 2445 | ||
2427 | static int cdrom_ioctl_reset(struct cdrom_device_info *cdi, | 2446 | static int cdrom_ioctl_reset(struct cdrom_device_info *cdi, |
2428 | struct block_device *bdev) | 2447 | struct block_device *bdev) |
2429 | { | 2448 | { |
2430 | cdinfo(CD_DO_IOCTL, "entering CDROM_RESET\n"); | 2449 | cd_dbg(CD_DO_IOCTL, "entering CDROM_RESET\n"); |
2431 | 2450 | ||
2432 | if (!capable(CAP_SYS_ADMIN)) | 2451 | if (!capable(CAP_SYS_ADMIN)) |
2433 | return -EACCES; | 2452 | return -EACCES; |
@@ -2440,7 +2459,7 @@ static int cdrom_ioctl_reset(struct cdrom_device_info *cdi, | |||
2440 | static int cdrom_ioctl_lock_door(struct cdrom_device_info *cdi, | 2459 | static int cdrom_ioctl_lock_door(struct cdrom_device_info *cdi, |
2441 | unsigned long arg) | 2460 | unsigned long arg) |
2442 | { | 2461 | { |
2443 | cdinfo(CD_DO_IOCTL, "%socking door.\n", arg ? "L" : "Unl"); | 2462 | cd_dbg(CD_DO_IOCTL, "%socking door\n", arg ? "L" : "Unl"); |
2444 | 2463 | ||
2445 | if (!CDROM_CAN(CDC_LOCK)) | 2464 | if (!CDROM_CAN(CDC_LOCK)) |
2446 | return -EDRIVE_CANT_DO_THIS; | 2465 | return -EDRIVE_CANT_DO_THIS; |
@@ -2459,7 +2478,7 @@ static int cdrom_ioctl_lock_door(struct cdrom_device_info *cdi, | |||
2459 | static int cdrom_ioctl_debug(struct cdrom_device_info *cdi, | 2478 | static int cdrom_ioctl_debug(struct cdrom_device_info *cdi, |
2460 | unsigned long arg) | 2479 | unsigned long arg) |
2461 | { | 2480 | { |
2462 | cdinfo(CD_DO_IOCTL, "%sabling debug.\n", arg ? "En" : "Dis"); | 2481 | cd_dbg(CD_DO_IOCTL, "%sabling debug\n", arg ? "En" : "Dis"); |
2463 | 2482 | ||
2464 | if (!capable(CAP_SYS_ADMIN)) | 2483 | if (!capable(CAP_SYS_ADMIN)) |
2465 | return -EACCES; | 2484 | return -EACCES; |
@@ -2469,7 +2488,7 @@ static int cdrom_ioctl_debug(struct cdrom_device_info *cdi, | |||
2469 | 2488 | ||
2470 | static int cdrom_ioctl_get_capability(struct cdrom_device_info *cdi) | 2489 | static int cdrom_ioctl_get_capability(struct cdrom_device_info *cdi) |
2471 | { | 2490 | { |
2472 | cdinfo(CD_DO_IOCTL, "entering CDROM_GET_CAPABILITY\n"); | 2491 | cd_dbg(CD_DO_IOCTL, "entering CDROM_GET_CAPABILITY\n"); |
2473 | return (cdi->ops->capability & ~cdi->mask); | 2492 | return (cdi->ops->capability & ~cdi->mask); |
2474 | } | 2493 | } |
2475 | 2494 | ||
@@ -2485,7 +2504,7 @@ static int cdrom_ioctl_get_mcn(struct cdrom_device_info *cdi, | |||
2485 | struct cdrom_mcn mcn; | 2504 | struct cdrom_mcn mcn; |
2486 | int ret; | 2505 | int ret; |
2487 | 2506 | ||
2488 | cdinfo(CD_DO_IOCTL, "entering CDROM_GET_MCN\n"); | 2507 | cd_dbg(CD_DO_IOCTL, "entering CDROM_GET_MCN\n"); |
2489 | 2508 | ||
2490 | if (!(cdi->ops->capability & CDC_MCN)) | 2509 | if (!(cdi->ops->capability & CDC_MCN)) |
2491 | return -ENOSYS; | 2510 | return -ENOSYS; |
@@ -2495,14 +2514,14 @@ static int cdrom_ioctl_get_mcn(struct cdrom_device_info *cdi, | |||
2495 | 2514 | ||
2496 | if (copy_to_user(argp, &mcn, sizeof(mcn))) | 2515 | if (copy_to_user(argp, &mcn, sizeof(mcn))) |
2497 | return -EFAULT; | 2516 | return -EFAULT; |
2498 | cdinfo(CD_DO_IOCTL, "CDROM_GET_MCN successful\n"); | 2517 | cd_dbg(CD_DO_IOCTL, "CDROM_GET_MCN successful\n"); |
2499 | return 0; | 2518 | return 0; |
2500 | } | 2519 | } |
2501 | 2520 | ||
2502 | static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi, | 2521 | static int cdrom_ioctl_drive_status(struct cdrom_device_info *cdi, |
2503 | unsigned long arg) | 2522 | unsigned long arg) |
2504 | { | 2523 | { |
2505 | cdinfo(CD_DO_IOCTL, "entering CDROM_DRIVE_STATUS\n"); | 2524 | cd_dbg(CD_DO_IOCTL, "entering CDROM_DRIVE_STATUS\n"); |
2506 | 2525 | ||
2507 | if (!(cdi->ops->capability & CDC_DRIVE_STATUS)) | 2526 | if (!(cdi->ops->capability & CDC_DRIVE_STATUS)) |
2508 | return -ENOSYS; | 2527 | return -ENOSYS; |
@@ -2535,7 +2554,7 @@ static int cdrom_ioctl_disc_status(struct cdrom_device_info *cdi) | |||
2535 | { | 2554 | { |
2536 | tracktype tracks; | 2555 | tracktype tracks; |
2537 | 2556 | ||
2538 | cdinfo(CD_DO_IOCTL, "entering CDROM_DISC_STATUS\n"); | 2557 | cd_dbg(CD_DO_IOCTL, "entering CDROM_DISC_STATUS\n"); |
2539 | 2558 | ||
2540 | cdrom_count_tracks(cdi, &tracks); | 2559 | cdrom_count_tracks(cdi, &tracks); |
2541 | if (tracks.error) | 2560 | if (tracks.error) |
@@ -2557,13 +2576,13 @@ static int cdrom_ioctl_disc_status(struct cdrom_device_info *cdi) | |||
2557 | return CDS_DATA_1; | 2576 | return CDS_DATA_1; |
2558 | /* Policy mode off */ | 2577 | /* Policy mode off */ |
2559 | 2578 | ||
2560 | cdinfo(CD_WARNING,"This disc doesn't have any tracks I recognize!\n"); | 2579 | cd_dbg(CD_WARNING, "This disc doesn't have any tracks I recognize!\n"); |
2561 | return CDS_NO_INFO; | 2580 | return CDS_NO_INFO; |
2562 | } | 2581 | } |
2563 | 2582 | ||
2564 | static int cdrom_ioctl_changer_nslots(struct cdrom_device_info *cdi) | 2583 | static int cdrom_ioctl_changer_nslots(struct cdrom_device_info *cdi) |
2565 | { | 2584 | { |
2566 | cdinfo(CD_DO_IOCTL, "entering CDROM_CHANGER_NSLOTS\n"); | 2585 | cd_dbg(CD_DO_IOCTL, "entering CDROM_CHANGER_NSLOTS\n"); |
2567 | return cdi->capacity; | 2586 | return cdi->capacity; |
2568 | } | 2587 | } |
2569 | 2588 | ||
@@ -2574,7 +2593,7 @@ static int cdrom_ioctl_get_subchnl(struct cdrom_device_info *cdi, | |||
2574 | u8 requested, back; | 2593 | u8 requested, back; |
2575 | int ret; | 2594 | int ret; |
2576 | 2595 | ||
2577 | /* cdinfo(CD_DO_IOCTL,"entering CDROMSUBCHNL\n");*/ | 2596 | /* cd_dbg(CD_DO_IOCTL,"entering CDROMSUBCHNL\n");*/ |
2578 | 2597 | ||
2579 | if (copy_from_user(&q, argp, sizeof(q))) | 2598 | if (copy_from_user(&q, argp, sizeof(q))) |
2580 | return -EFAULT; | 2599 | return -EFAULT; |
@@ -2594,7 +2613,7 @@ static int cdrom_ioctl_get_subchnl(struct cdrom_device_info *cdi, | |||
2594 | 2613 | ||
2595 | if (copy_to_user(argp, &q, sizeof(q))) | 2614 | if (copy_to_user(argp, &q, sizeof(q))) |
2596 | return -EFAULT; | 2615 | return -EFAULT; |
2597 | /* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */ | 2616 | /* cd_dbg(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */ |
2598 | return 0; | 2617 | return 0; |
2599 | } | 2618 | } |
2600 | 2619 | ||
@@ -2604,7 +2623,7 @@ static int cdrom_ioctl_read_tochdr(struct cdrom_device_info *cdi, | |||
2604 | struct cdrom_tochdr header; | 2623 | struct cdrom_tochdr header; |
2605 | int ret; | 2624 | int ret; |
2606 | 2625 | ||
2607 | /* cdinfo(CD_DO_IOCTL, "entering CDROMREADTOCHDR\n"); */ | 2626 | /* cd_dbg(CD_DO_IOCTL, "entering CDROMREADTOCHDR\n"); */ |
2608 | 2627 | ||
2609 | if (copy_from_user(&header, argp, sizeof(header))) | 2628 | if (copy_from_user(&header, argp, sizeof(header))) |
2610 | return -EFAULT; | 2629 | return -EFAULT; |
@@ -2615,7 +2634,7 @@ static int cdrom_ioctl_read_tochdr(struct cdrom_device_info *cdi, | |||
2615 | 2634 | ||
2616 | if (copy_to_user(argp, &header, sizeof(header))) | 2635 | if (copy_to_user(argp, &header, sizeof(header))) |
2617 | return -EFAULT; | 2636 | return -EFAULT; |
2618 | /* cdinfo(CD_DO_IOCTL, "CDROMREADTOCHDR successful\n"); */ | 2637 | /* cd_dbg(CD_DO_IOCTL, "CDROMREADTOCHDR successful\n"); */ |
2619 | return 0; | 2638 | return 0; |
2620 | } | 2639 | } |
2621 | 2640 | ||
@@ -2626,7 +2645,7 @@ static int cdrom_ioctl_read_tocentry(struct cdrom_device_info *cdi, | |||
2626 | u8 requested_format; | 2645 | u8 requested_format; |
2627 | int ret; | 2646 | int ret; |
2628 | 2647 | ||
2629 | /* cdinfo(CD_DO_IOCTL, "entering CDROMREADTOCENTRY\n"); */ | 2648 | /* cd_dbg(CD_DO_IOCTL, "entering CDROMREADTOCENTRY\n"); */ |
2630 | 2649 | ||
2631 | if (copy_from_user(&entry, argp, sizeof(entry))) | 2650 | if (copy_from_user(&entry, argp, sizeof(entry))) |
2632 | return -EFAULT; | 2651 | return -EFAULT; |
@@ -2643,7 +2662,7 @@ static int cdrom_ioctl_read_tocentry(struct cdrom_device_info *cdi, | |||
2643 | 2662 | ||
2644 | if (copy_to_user(argp, &entry, sizeof(entry))) | 2663 | if (copy_to_user(argp, &entry, sizeof(entry))) |
2645 | return -EFAULT; | 2664 | return -EFAULT; |
2646 | /* cdinfo(CD_DO_IOCTL, "CDROMREADTOCENTRY successful\n"); */ | 2665 | /* cd_dbg(CD_DO_IOCTL, "CDROMREADTOCENTRY successful\n"); */ |
2647 | return 0; | 2666 | return 0; |
2648 | } | 2667 | } |
2649 | 2668 | ||
@@ -2652,7 +2671,7 @@ static int cdrom_ioctl_play_msf(struct cdrom_device_info *cdi, | |||
2652 | { | 2671 | { |
2653 | struct cdrom_msf msf; | 2672 | struct cdrom_msf msf; |
2654 | 2673 | ||
2655 | cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); | 2674 | cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); |
2656 | 2675 | ||
2657 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) | 2676 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) |
2658 | return -ENOSYS; | 2677 | return -ENOSYS; |
@@ -2667,7 +2686,7 @@ static int cdrom_ioctl_play_trkind(struct cdrom_device_info *cdi, | |||
2667 | struct cdrom_ti ti; | 2686 | struct cdrom_ti ti; |
2668 | int ret; | 2687 | int ret; |
2669 | 2688 | ||
2670 | cdinfo(CD_DO_IOCTL, "entering CDROMPLAYTRKIND\n"); | 2689 | cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYTRKIND\n"); |
2671 | 2690 | ||
2672 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) | 2691 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) |
2673 | return -ENOSYS; | 2692 | return -ENOSYS; |
@@ -2684,7 +2703,7 @@ static int cdrom_ioctl_volctrl(struct cdrom_device_info *cdi, | |||
2684 | { | 2703 | { |
2685 | struct cdrom_volctrl volume; | 2704 | struct cdrom_volctrl volume; |
2686 | 2705 | ||
2687 | cdinfo(CD_DO_IOCTL, "entering CDROMVOLCTRL\n"); | 2706 | cd_dbg(CD_DO_IOCTL, "entering CDROMVOLCTRL\n"); |
2688 | 2707 | ||
2689 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) | 2708 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) |
2690 | return -ENOSYS; | 2709 | return -ENOSYS; |
@@ -2699,7 +2718,7 @@ static int cdrom_ioctl_volread(struct cdrom_device_info *cdi, | |||
2699 | struct cdrom_volctrl volume; | 2718 | struct cdrom_volctrl volume; |
2700 | int ret; | 2719 | int ret; |
2701 | 2720 | ||
2702 | cdinfo(CD_DO_IOCTL, "entering CDROMVOLREAD\n"); | 2721 | cd_dbg(CD_DO_IOCTL, "entering CDROMVOLREAD\n"); |
2703 | 2722 | ||
2704 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) | 2723 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) |
2705 | return -ENOSYS; | 2724 | return -ENOSYS; |
@@ -2718,7 +2737,7 @@ static int cdrom_ioctl_audioctl(struct cdrom_device_info *cdi, | |||
2718 | { | 2737 | { |
2719 | int ret; | 2738 | int ret; |
2720 | 2739 | ||
2721 | cdinfo(CD_DO_IOCTL, "doing audio ioctl (start/stop/pause/resume)\n"); | 2740 | cd_dbg(CD_DO_IOCTL, "doing audio ioctl (start/stop/pause/resume)\n"); |
2722 | 2741 | ||
2723 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) | 2742 | if (!CDROM_CAN(CDC_PLAY_AUDIO)) |
2724 | return -ENOSYS; | 2743 | return -ENOSYS; |
@@ -2729,103 +2748,6 @@ static int cdrom_ioctl_audioctl(struct cdrom_device_info *cdi, | |||
2729 | } | 2748 | } |
2730 | 2749 | ||
2731 | /* | 2750 | /* |
2732 | * Just about every imaginable ioctl is supported in the Uniform layer | ||
2733 | * these days. | ||
2734 | * ATAPI / SCSI specific code now mainly resides in mmc_ioctl(). | ||
2735 | */ | ||
2736 | int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, | ||
2737 | fmode_t mode, unsigned int cmd, unsigned long arg) | ||
2738 | { | ||
2739 | void __user *argp = (void __user *)arg; | ||
2740 | int ret; | ||
2741 | |||
2742 | /* | ||
2743 | * Try the generic SCSI command ioctl's first. | ||
2744 | */ | ||
2745 | ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp); | ||
2746 | if (ret != -ENOTTY) | ||
2747 | return ret; | ||
2748 | |||
2749 | switch (cmd) { | ||
2750 | case CDROMMULTISESSION: | ||
2751 | return cdrom_ioctl_multisession(cdi, argp); | ||
2752 | case CDROMEJECT: | ||
2753 | return cdrom_ioctl_eject(cdi); | ||
2754 | case CDROMCLOSETRAY: | ||
2755 | return cdrom_ioctl_closetray(cdi); | ||
2756 | case CDROMEJECT_SW: | ||
2757 | return cdrom_ioctl_eject_sw(cdi, arg); | ||
2758 | case CDROM_MEDIA_CHANGED: | ||
2759 | return cdrom_ioctl_media_changed(cdi, arg); | ||
2760 | case CDROM_SET_OPTIONS: | ||
2761 | return cdrom_ioctl_set_options(cdi, arg); | ||
2762 | case CDROM_CLEAR_OPTIONS: | ||
2763 | return cdrom_ioctl_clear_options(cdi, arg); | ||
2764 | case CDROM_SELECT_SPEED: | ||
2765 | return cdrom_ioctl_select_speed(cdi, arg); | ||
2766 | case CDROM_SELECT_DISC: | ||
2767 | return cdrom_ioctl_select_disc(cdi, arg); | ||
2768 | case CDROMRESET: | ||
2769 | return cdrom_ioctl_reset(cdi, bdev); | ||
2770 | case CDROM_LOCKDOOR: | ||
2771 | return cdrom_ioctl_lock_door(cdi, arg); | ||
2772 | case CDROM_DEBUG: | ||
2773 | return cdrom_ioctl_debug(cdi, arg); | ||
2774 | case CDROM_GET_CAPABILITY: | ||
2775 | return cdrom_ioctl_get_capability(cdi); | ||
2776 | case CDROM_GET_MCN: | ||
2777 | return cdrom_ioctl_get_mcn(cdi, argp); | ||
2778 | case CDROM_DRIVE_STATUS: | ||
2779 | return cdrom_ioctl_drive_status(cdi, arg); | ||
2780 | case CDROM_DISC_STATUS: | ||
2781 | return cdrom_ioctl_disc_status(cdi); | ||
2782 | case CDROM_CHANGER_NSLOTS: | ||
2783 | return cdrom_ioctl_changer_nslots(cdi); | ||
2784 | } | ||
2785 | |||
2786 | /* | ||
2787 | * Use the ioctls that are implemented through the generic_packet() | ||
2788 | * interface. this may look at bit funny, but if -ENOTTY is | ||
2789 | * returned that particular ioctl is not implemented and we | ||
2790 | * let it go through the device specific ones. | ||
2791 | */ | ||
2792 | if (CDROM_CAN(CDC_GENERIC_PACKET)) { | ||
2793 | ret = mmc_ioctl(cdi, cmd, arg); | ||
2794 | if (ret != -ENOTTY) | ||
2795 | return ret; | ||
2796 | } | ||
2797 | |||
2798 | /* | ||
2799 | * Note: most of the cdinfo() calls are commented out here, | ||
2800 | * because they fill up the sys log when CD players poll | ||
2801 | * the drive. | ||
2802 | */ | ||
2803 | switch (cmd) { | ||
2804 | case CDROMSUBCHNL: | ||
2805 | return cdrom_ioctl_get_subchnl(cdi, argp); | ||
2806 | case CDROMREADTOCHDR: | ||
2807 | return cdrom_ioctl_read_tochdr(cdi, argp); | ||
2808 | case CDROMREADTOCENTRY: | ||
2809 | return cdrom_ioctl_read_tocentry(cdi, argp); | ||
2810 | case CDROMPLAYMSF: | ||
2811 | return cdrom_ioctl_play_msf(cdi, argp); | ||
2812 | case CDROMPLAYTRKIND: | ||
2813 | return cdrom_ioctl_play_trkind(cdi, argp); | ||
2814 | case CDROMVOLCTRL: | ||
2815 | return cdrom_ioctl_volctrl(cdi, argp); | ||
2816 | case CDROMVOLREAD: | ||
2817 | return cdrom_ioctl_volread(cdi, argp); | ||
2818 | case CDROMSTART: | ||
2819 | case CDROMSTOP: | ||
2820 | case CDROMPAUSE: | ||
2821 | case CDROMRESUME: | ||
2822 | return cdrom_ioctl_audioctl(cdi, cmd); | ||
2823 | } | ||
2824 | |||
2825 | return -ENOSYS; | ||
2826 | } | ||
2827 | |||
2828 | /* | ||
2829 | * Required when we need to use READ_10 to issue other than 2048 block | 2751 | * Required when we need to use READ_10 to issue other than 2048 block |
2830 | * reads | 2752 | * reads |
2831 | */ | 2753 | */ |
@@ -2854,10 +2776,158 @@ static int cdrom_switch_blocksize(struct cdrom_device_info *cdi, int size) | |||
2854 | return cdo->generic_packet(cdi, &cgc); | 2776 | return cdo->generic_packet(cdi, &cgc); |
2855 | } | 2777 | } |
2856 | 2778 | ||
2779 | static int cdrom_get_track_info(struct cdrom_device_info *cdi, | ||
2780 | __u16 track, __u8 type, track_information *ti) | ||
2781 | { | ||
2782 | struct cdrom_device_ops *cdo = cdi->ops; | ||
2783 | struct packet_command cgc; | ||
2784 | int ret, buflen; | ||
2785 | |||
2786 | init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ); | ||
2787 | cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO; | ||
2788 | cgc.cmd[1] = type & 3; | ||
2789 | cgc.cmd[4] = (track & 0xff00) >> 8; | ||
2790 | cgc.cmd[5] = track & 0xff; | ||
2791 | cgc.cmd[8] = 8; | ||
2792 | cgc.quiet = 1; | ||
2793 | |||
2794 | ret = cdo->generic_packet(cdi, &cgc); | ||
2795 | if (ret) | ||
2796 | return ret; | ||
2797 | |||
2798 | buflen = be16_to_cpu(ti->track_information_length) + | ||
2799 | sizeof(ti->track_information_length); | ||
2800 | |||
2801 | if (buflen > sizeof(track_information)) | ||
2802 | buflen = sizeof(track_information); | ||
2803 | |||
2804 | cgc.cmd[8] = cgc.buflen = buflen; | ||
2805 | ret = cdo->generic_packet(cdi, &cgc); | ||
2806 | if (ret) | ||
2807 | return ret; | ||
2808 | |||
2809 | /* return actual fill size */ | ||
2810 | return buflen; | ||
2811 | } | ||
2812 | |||
2813 | /* return the last written block on the CD-R media. this is for the udf | ||
2814 | file system. */ | ||
2815 | int cdrom_get_last_written(struct cdrom_device_info *cdi, long *last_written) | ||
2816 | { | ||
2817 | struct cdrom_tocentry toc; | ||
2818 | disc_information di; | ||
2819 | track_information ti; | ||
2820 | __u32 last_track; | ||
2821 | int ret = -1, ti_size; | ||
2822 | |||
2823 | if (!CDROM_CAN(CDC_GENERIC_PACKET)) | ||
2824 | goto use_toc; | ||
2825 | |||
2826 | ret = cdrom_get_disc_info(cdi, &di); | ||
2827 | if (ret < (int)(offsetof(typeof(di), last_track_lsb) | ||
2828 | + sizeof(di.last_track_lsb))) | ||
2829 | goto use_toc; | ||
2830 | |||
2831 | /* if unit didn't return msb, it's zeroed by cdrom_get_disc_info */ | ||
2832 | last_track = (di.last_track_msb << 8) | di.last_track_lsb; | ||
2833 | ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti); | ||
2834 | if (ti_size < (int)offsetof(typeof(ti), track_start)) | ||
2835 | goto use_toc; | ||
2836 | |||
2837 | /* if this track is blank, try the previous. */ | ||
2838 | if (ti.blank) { | ||
2839 | if (last_track == 1) | ||
2840 | goto use_toc; | ||
2841 | last_track--; | ||
2842 | ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti); | ||
2843 | } | ||
2844 | |||
2845 | if (ti_size < (int)(offsetof(typeof(ti), track_size) | ||
2846 | + sizeof(ti.track_size))) | ||
2847 | goto use_toc; | ||
2848 | |||
2849 | /* if last recorded field is valid, return it. */ | ||
2850 | if (ti.lra_v && ti_size >= (int)(offsetof(typeof(ti), last_rec_address) | ||
2851 | + sizeof(ti.last_rec_address))) { | ||
2852 | *last_written = be32_to_cpu(ti.last_rec_address); | ||
2853 | } else { | ||
2854 | /* make it up instead */ | ||
2855 | *last_written = be32_to_cpu(ti.track_start) + | ||
2856 | be32_to_cpu(ti.track_size); | ||
2857 | if (ti.free_blocks) | ||
2858 | *last_written -= (be32_to_cpu(ti.free_blocks) + 7); | ||
2859 | } | ||
2860 | return 0; | ||
2861 | |||
2862 | /* this is where we end up if the drive either can't do a | ||
2863 | GPCMD_READ_DISC_INFO or GPCMD_READ_TRACK_RZONE_INFO or if | ||
2864 | it doesn't give enough information or fails. then we return | ||
2865 | the toc contents. */ | ||
2866 | use_toc: | ||
2867 | toc.cdte_format = CDROM_MSF; | ||
2868 | toc.cdte_track = CDROM_LEADOUT; | ||
2869 | if ((ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &toc))) | ||
2870 | return ret; | ||
2871 | sanitize_format(&toc.cdte_addr, &toc.cdte_format, CDROM_LBA); | ||
2872 | *last_written = toc.cdte_addr.lba; | ||
2873 | return 0; | ||
2874 | } | ||
2875 | |||
2876 | /* return the next writable block. also for udf file system. */ | ||
2877 | static int cdrom_get_next_writable(struct cdrom_device_info *cdi, | ||
2878 | long *next_writable) | ||
2879 | { | ||
2880 | disc_information di; | ||
2881 | track_information ti; | ||
2882 | __u16 last_track; | ||
2883 | int ret, ti_size; | ||
2884 | |||
2885 | if (!CDROM_CAN(CDC_GENERIC_PACKET)) | ||
2886 | goto use_last_written; | ||
2887 | |||
2888 | ret = cdrom_get_disc_info(cdi, &di); | ||
2889 | if (ret < 0 || ret < offsetof(typeof(di), last_track_lsb) | ||
2890 | + sizeof(di.last_track_lsb)) | ||
2891 | goto use_last_written; | ||
2892 | |||
2893 | /* if unit didn't return msb, it's zeroed by cdrom_get_disc_info */ | ||
2894 | last_track = (di.last_track_msb << 8) | di.last_track_lsb; | ||
2895 | ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti); | ||
2896 | if (ti_size < 0 || ti_size < offsetof(typeof(ti), track_start)) | ||
2897 | goto use_last_written; | ||
2898 | |||
2899 | /* if this track is blank, try the previous. */ | ||
2900 | if (ti.blank) { | ||
2901 | if (last_track == 1) | ||
2902 | goto use_last_written; | ||
2903 | last_track--; | ||
2904 | ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti); | ||
2905 | if (ti_size < 0) | ||
2906 | goto use_last_written; | ||
2907 | } | ||
2908 | |||
2909 | /* if next recordable address field is valid, use it. */ | ||
2910 | if (ti.nwa_v && ti_size >= offsetof(typeof(ti), next_writable) | ||
2911 | + sizeof(ti.next_writable)) { | ||
2912 | *next_writable = be32_to_cpu(ti.next_writable); | ||
2913 | return 0; | ||
2914 | } | ||
2915 | |||
2916 | use_last_written: | ||
2917 | ret = cdrom_get_last_written(cdi, next_writable); | ||
2918 | if (ret) { | ||
2919 | *next_writable = 0; | ||
2920 | return ret; | ||
2921 | } else { | ||
2922 | *next_writable += 7; | ||
2923 | return 0; | ||
2924 | } | ||
2925 | } | ||
2926 | |||
2857 | static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, | 2927 | static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, |
2858 | void __user *arg, | 2928 | void __user *arg, |
2859 | struct packet_command *cgc, | 2929 | struct packet_command *cgc, |
2860 | int cmd) | 2930 | int cmd) |
2861 | { | 2931 | { |
2862 | struct request_sense sense; | 2932 | struct request_sense sense; |
2863 | struct cdrom_msf msf; | 2933 | struct cdrom_msf msf; |
@@ -2876,7 +2946,8 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, | |||
2876 | blocksize = CD_FRAMESIZE_RAW0; | 2946 | blocksize = CD_FRAMESIZE_RAW0; |
2877 | break; | 2947 | break; |
2878 | } | 2948 | } |
2879 | IOCTL_IN(arg, struct cdrom_msf, msf); | 2949 | if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf))) |
2950 | return -EFAULT; | ||
2880 | lba = msf_to_lba(msf.cdmsf_min0, msf.cdmsf_sec0, msf.cdmsf_frame0); | 2951 | lba = msf_to_lba(msf.cdmsf_min0, msf.cdmsf_sec0, msf.cdmsf_frame0); |
2881 | /* FIXME: we need upper bound checking, too!! */ | 2952 | /* FIXME: we need upper bound checking, too!! */ |
2882 | if (lba < 0) | 2953 | if (lba < 0) |
@@ -2891,8 +2962,8 @@ static noinline int mmc_ioctl_cdrom_read_data(struct cdrom_device_info *cdi, | |||
2891 | cgc->data_direction = CGC_DATA_READ; | 2962 | cgc->data_direction = CGC_DATA_READ; |
2892 | ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize); | 2963 | ret = cdrom_read_block(cdi, cgc, lba, 1, format, blocksize); |
2893 | if (ret && sense.sense_key == 0x05 && | 2964 | if (ret && sense.sense_key == 0x05 && |
2894 | sense.asc == 0x20 && | 2965 | sense.asc == 0x20 && |
2895 | sense.ascq == 0x00) { | 2966 | sense.ascq == 0x00) { |
2896 | /* | 2967 | /* |
2897 | * SCSI-II devices are not required to support | 2968 | * SCSI-II devices are not required to support |
2898 | * READ_CD, so let's try switching block size | 2969 | * READ_CD, so let's try switching block size |
@@ -2913,12 +2984,14 @@ out: | |||
2913 | } | 2984 | } |
2914 | 2985 | ||
2915 | static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi, | 2986 | static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi, |
2916 | void __user *arg) | 2987 | void __user *arg) |
2917 | { | 2988 | { |
2918 | struct cdrom_read_audio ra; | 2989 | struct cdrom_read_audio ra; |
2919 | int lba; | 2990 | int lba; |
2920 | 2991 | ||
2921 | IOCTL_IN(arg, struct cdrom_read_audio, ra); | 2992 | if (copy_from_user(&ra, (struct cdrom_read_audio __user *)arg, |
2993 | sizeof(ra))) | ||
2994 | return -EFAULT; | ||
2922 | 2995 | ||
2923 | if (ra.addr_format == CDROM_MSF) | 2996 | if (ra.addr_format == CDROM_MSF) |
2924 | lba = msf_to_lba(ra.addr.msf.minute, | 2997 | lba = msf_to_lba(ra.addr.msf.minute, |
@@ -2937,12 +3010,13 @@ static noinline int mmc_ioctl_cdrom_read_audio(struct cdrom_device_info *cdi, | |||
2937 | } | 3010 | } |
2938 | 3011 | ||
2939 | static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi, | 3012 | static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi, |
2940 | void __user *arg) | 3013 | void __user *arg) |
2941 | { | 3014 | { |
2942 | int ret; | 3015 | int ret; |
2943 | struct cdrom_subchnl q; | 3016 | struct cdrom_subchnl q; |
2944 | u_char requested, back; | 3017 | u_char requested, back; |
2945 | IOCTL_IN(arg, struct cdrom_subchnl, q); | 3018 | if (copy_from_user(&q, (struct cdrom_subchnl __user *)arg, sizeof(q))) |
3019 | return -EFAULT; | ||
2946 | requested = q.cdsc_format; | 3020 | requested = q.cdsc_format; |
2947 | if (!((requested == CDROM_MSF) || | 3021 | if (!((requested == CDROM_MSF) || |
2948 | (requested == CDROM_LBA))) | 3022 | (requested == CDROM_LBA))) |
@@ -2954,19 +3028,21 @@ static noinline int mmc_ioctl_cdrom_subchannel(struct cdrom_device_info *cdi, | |||
2954 | back = q.cdsc_format; /* local copy */ | 3028 | back = q.cdsc_format; /* local copy */ |
2955 | sanitize_format(&q.cdsc_absaddr, &back, requested); | 3029 | sanitize_format(&q.cdsc_absaddr, &back, requested); |
2956 | sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested); | 3030 | sanitize_format(&q.cdsc_reladdr, &q.cdsc_format, requested); |
2957 | IOCTL_OUT(arg, struct cdrom_subchnl, q); | 3031 | if (copy_to_user((struct cdrom_subchnl __user *)arg, &q, sizeof(q))) |
2958 | /* cdinfo(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */ | 3032 | return -EFAULT; |
3033 | /* cd_dbg(CD_DO_IOCTL, "CDROMSUBCHNL successful\n"); */ | ||
2959 | return 0; | 3034 | return 0; |
2960 | } | 3035 | } |
2961 | 3036 | ||
2962 | static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi, | 3037 | static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi, |
2963 | void __user *arg, | 3038 | void __user *arg, |
2964 | struct packet_command *cgc) | 3039 | struct packet_command *cgc) |
2965 | { | 3040 | { |
2966 | struct cdrom_device_ops *cdo = cdi->ops; | 3041 | struct cdrom_device_ops *cdo = cdi->ops; |
2967 | struct cdrom_msf msf; | 3042 | struct cdrom_msf msf; |
2968 | cdinfo(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); | 3043 | cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYMSF\n"); |
2969 | IOCTL_IN(arg, struct cdrom_msf, msf); | 3044 | if (copy_from_user(&msf, (struct cdrom_msf __user *)arg, sizeof(msf))) |
3045 | return -EFAULT; | ||
2970 | cgc->cmd[0] = GPCMD_PLAY_AUDIO_MSF; | 3046 | cgc->cmd[0] = GPCMD_PLAY_AUDIO_MSF; |
2971 | cgc->cmd[3] = msf.cdmsf_min0; | 3047 | cgc->cmd[3] = msf.cdmsf_min0; |
2972 | cgc->cmd[4] = msf.cdmsf_sec0; | 3048 | cgc->cmd[4] = msf.cdmsf_sec0; |
@@ -2979,13 +3055,14 @@ static noinline int mmc_ioctl_cdrom_play_msf(struct cdrom_device_info *cdi, | |||
2979 | } | 3055 | } |
2980 | 3056 | ||
2981 | static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi, | 3057 | static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi, |
2982 | void __user *arg, | 3058 | void __user *arg, |
2983 | struct packet_command *cgc) | 3059 | struct packet_command *cgc) |
2984 | { | 3060 | { |
2985 | struct cdrom_device_ops *cdo = cdi->ops; | 3061 | struct cdrom_device_ops *cdo = cdi->ops; |
2986 | struct cdrom_blk blk; | 3062 | struct cdrom_blk blk; |
2987 | cdinfo(CD_DO_IOCTL, "entering CDROMPLAYBLK\n"); | 3063 | cd_dbg(CD_DO_IOCTL, "entering CDROMPLAYBLK\n"); |
2988 | IOCTL_IN(arg, struct cdrom_blk, blk); | 3064 | if (copy_from_user(&blk, (struct cdrom_blk __user *)arg, sizeof(blk))) |
3065 | return -EFAULT; | ||
2989 | cgc->cmd[0] = GPCMD_PLAY_AUDIO_10; | 3066 | cgc->cmd[0] = GPCMD_PLAY_AUDIO_10; |
2990 | cgc->cmd[2] = (blk.from >> 24) & 0xff; | 3067 | cgc->cmd[2] = (blk.from >> 24) & 0xff; |
2991 | cgc->cmd[3] = (blk.from >> 16) & 0xff; | 3068 | cgc->cmd[3] = (blk.from >> 16) & 0xff; |
@@ -2998,9 +3075,9 @@ static noinline int mmc_ioctl_cdrom_play_blk(struct cdrom_device_info *cdi, | |||
2998 | } | 3075 | } |
2999 | 3076 | ||
3000 | static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi, | 3077 | static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi, |
3001 | void __user *arg, | 3078 | void __user *arg, |
3002 | struct packet_command *cgc, | 3079 | struct packet_command *cgc, |
3003 | unsigned int cmd) | 3080 | unsigned int cmd) |
3004 | { | 3081 | { |
3005 | struct cdrom_volctrl volctrl; | 3082 | struct cdrom_volctrl volctrl; |
3006 | unsigned char buffer[32]; | 3083 | unsigned char buffer[32]; |
@@ -3008,9 +3085,11 @@ static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi, | |||
3008 | unsigned short offset; | 3085 | unsigned short offset; |
3009 | int ret; | 3086 | int ret; |
3010 | 3087 | ||
3011 | cdinfo(CD_DO_IOCTL, "entering CDROMVOLUME\n"); | 3088 | cd_dbg(CD_DO_IOCTL, "entering CDROMVOLUME\n"); |
3012 | 3089 | ||
3013 | IOCTL_IN(arg, struct cdrom_volctrl, volctrl); | 3090 | if (copy_from_user(&volctrl, (struct cdrom_volctrl __user *)arg, |
3091 | sizeof(volctrl))) | ||
3092 | return -EFAULT; | ||
3014 | 3093 | ||
3015 | cgc->buffer = buffer; | 3094 | cgc->buffer = buffer; |
3016 | cgc->buflen = 24; | 3095 | cgc->buflen = 24; |
@@ -3030,14 +3109,14 @@ static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi, | |||
3030 | if (offset + 16 > cgc->buflen) { | 3109 | if (offset + 16 > cgc->buflen) { |
3031 | cgc->buflen = offset + 16; | 3110 | cgc->buflen = offset + 16; |
3032 | ret = cdrom_mode_sense(cdi, cgc, | 3111 | ret = cdrom_mode_sense(cdi, cgc, |
3033 | GPMODE_AUDIO_CTL_PAGE, 0); | 3112 | GPMODE_AUDIO_CTL_PAGE, 0); |
3034 | if (ret) | 3113 | if (ret) |
3035 | return ret; | 3114 | return ret; |
3036 | } | 3115 | } |
3037 | 3116 | ||
3038 | /* sanity check */ | 3117 | /* sanity check */ |
3039 | if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE || | 3118 | if ((buffer[offset] & 0x3f) != GPMODE_AUDIO_CTL_PAGE || |
3040 | buffer[offset + 1] < 14) | 3119 | buffer[offset + 1] < 14) |
3041 | return -EINVAL; | 3120 | return -EINVAL; |
3042 | 3121 | ||
3043 | /* now we have the current volume settings. if it was only | 3122 | /* now we have the current volume settings. if it was only |
@@ -3047,7 +3126,9 @@ static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi, | |||
3047 | volctrl.channel1 = buffer[offset+11]; | 3126 | volctrl.channel1 = buffer[offset+11]; |
3048 | volctrl.channel2 = buffer[offset+13]; | 3127 | volctrl.channel2 = buffer[offset+13]; |
3049 | volctrl.channel3 = buffer[offset+15]; | 3128 | volctrl.channel3 = buffer[offset+15]; |
3050 | IOCTL_OUT(arg, struct cdrom_volctrl, volctrl); | 3129 | if (copy_to_user((struct cdrom_volctrl __user *)arg, &volctrl, |
3130 | sizeof(volctrl))) | ||
3131 | return -EFAULT; | ||
3051 | return 0; | 3132 | return 0; |
3052 | } | 3133 | } |
3053 | 3134 | ||
@@ -3069,11 +3150,11 @@ static noinline int mmc_ioctl_cdrom_volume(struct cdrom_device_info *cdi, | |||
3069 | } | 3150 | } |
3070 | 3151 | ||
3071 | static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi, | 3152 | static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi, |
3072 | struct packet_command *cgc, | 3153 | struct packet_command *cgc, |
3073 | int cmd) | 3154 | int cmd) |
3074 | { | 3155 | { |
3075 | struct cdrom_device_ops *cdo = cdi->ops; | 3156 | struct cdrom_device_ops *cdo = cdi->ops; |
3076 | cdinfo(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); | 3157 | cd_dbg(CD_DO_IOCTL, "entering CDROMSTART/CDROMSTOP\n"); |
3077 | cgc->cmd[0] = GPCMD_START_STOP_UNIT; | 3158 | cgc->cmd[0] = GPCMD_START_STOP_UNIT; |
3078 | cgc->cmd[1] = 1; | 3159 | cgc->cmd[1] = 1; |
3079 | cgc->cmd[4] = (cmd == CDROMSTART) ? 1 : 0; | 3160 | cgc->cmd[4] = (cmd == CDROMSTART) ? 1 : 0; |
@@ -3082,11 +3163,11 @@ static noinline int mmc_ioctl_cdrom_start_stop(struct cdrom_device_info *cdi, | |||
3082 | } | 3163 | } |
3083 | 3164 | ||
3084 | static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi, | 3165 | static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi, |
3085 | struct packet_command *cgc, | 3166 | struct packet_command *cgc, |
3086 | int cmd) | 3167 | int cmd) |
3087 | { | 3168 | { |
3088 | struct cdrom_device_ops *cdo = cdi->ops; | 3169 | struct cdrom_device_ops *cdo = cdi->ops; |
3089 | cdinfo(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); | 3170 | cd_dbg(CD_DO_IOCTL, "entering CDROMPAUSE/CDROMRESUME\n"); |
3090 | cgc->cmd[0] = GPCMD_PAUSE_RESUME; | 3171 | cgc->cmd[0] = GPCMD_PAUSE_RESUME; |
3091 | cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; | 3172 | cgc->cmd[8] = (cmd == CDROMRESUME) ? 1 : 0; |
3092 | cgc->data_direction = CGC_DATA_NONE; | 3173 | cgc->data_direction = CGC_DATA_NONE; |
@@ -3094,8 +3175,8 @@ static noinline int mmc_ioctl_cdrom_pause_resume(struct cdrom_device_info *cdi, | |||
3094 | } | 3175 | } |
3095 | 3176 | ||
3096 | static noinline int mmc_ioctl_dvd_read_struct(struct cdrom_device_info *cdi, | 3177 | static noinline int mmc_ioctl_dvd_read_struct(struct cdrom_device_info *cdi, |
3097 | void __user *arg, | 3178 | void __user *arg, |
3098 | struct packet_command *cgc) | 3179 | struct packet_command *cgc) |
3099 | { | 3180 | { |
3100 | int ret; | 3181 | int ret; |
3101 | dvd_struct *s; | 3182 | dvd_struct *s; |
@@ -3108,7 +3189,7 @@ static noinline int mmc_ioctl_dvd_read_struct(struct cdrom_device_info *cdi, | |||
3108 | if (!s) | 3189 | if (!s) |
3109 | return -ENOMEM; | 3190 | return -ENOMEM; |
3110 | 3191 | ||
3111 | cdinfo(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n"); | 3192 | cd_dbg(CD_DO_IOCTL, "entering DVD_READ_STRUCT\n"); |
3112 | if (copy_from_user(s, arg, size)) { | 3193 | if (copy_from_user(s, arg, size)) { |
3113 | kfree(s); | 3194 | kfree(s); |
3114 | return -EFAULT; | 3195 | return -EFAULT; |
@@ -3126,44 +3207,48 @@ out: | |||
3126 | } | 3207 | } |
3127 | 3208 | ||
3128 | static noinline int mmc_ioctl_dvd_auth(struct cdrom_device_info *cdi, | 3209 | static noinline int mmc_ioctl_dvd_auth(struct cdrom_device_info *cdi, |
3129 | void __user *arg) | 3210 | void __user *arg) |
3130 | { | 3211 | { |
3131 | int ret; | 3212 | int ret; |
3132 | dvd_authinfo ai; | 3213 | dvd_authinfo ai; |
3133 | if (!CDROM_CAN(CDC_DVD)) | 3214 | if (!CDROM_CAN(CDC_DVD)) |
3134 | return -ENOSYS; | 3215 | return -ENOSYS; |
3135 | cdinfo(CD_DO_IOCTL, "entering DVD_AUTH\n"); | 3216 | cd_dbg(CD_DO_IOCTL, "entering DVD_AUTH\n"); |
3136 | IOCTL_IN(arg, dvd_authinfo, ai); | 3217 | if (copy_from_user(&ai, (dvd_authinfo __user *)arg, sizeof(ai))) |
3218 | return -EFAULT; | ||
3137 | ret = dvd_do_auth(cdi, &ai); | 3219 | ret = dvd_do_auth(cdi, &ai); |
3138 | if (ret) | 3220 | if (ret) |
3139 | return ret; | 3221 | return ret; |
3140 | IOCTL_OUT(arg, dvd_authinfo, ai); | 3222 | if (copy_to_user((dvd_authinfo __user *)arg, &ai, sizeof(ai))) |
3223 | return -EFAULT; | ||
3141 | return 0; | 3224 | return 0; |
3142 | } | 3225 | } |
3143 | 3226 | ||
3144 | static noinline int mmc_ioctl_cdrom_next_writable(struct cdrom_device_info *cdi, | 3227 | static noinline int mmc_ioctl_cdrom_next_writable(struct cdrom_device_info *cdi, |
3145 | void __user *arg) | 3228 | void __user *arg) |
3146 | { | 3229 | { |
3147 | int ret; | 3230 | int ret; |
3148 | long next = 0; | 3231 | long next = 0; |
3149 | cdinfo(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n"); | 3232 | cd_dbg(CD_DO_IOCTL, "entering CDROM_NEXT_WRITABLE\n"); |
3150 | ret = cdrom_get_next_writable(cdi, &next); | 3233 | ret = cdrom_get_next_writable(cdi, &next); |
3151 | if (ret) | 3234 | if (ret) |
3152 | return ret; | 3235 | return ret; |
3153 | IOCTL_OUT(arg, long, next); | 3236 | if (copy_to_user((long __user *)arg, &next, sizeof(next))) |
3237 | return -EFAULT; | ||
3154 | return 0; | 3238 | return 0; |
3155 | } | 3239 | } |
3156 | 3240 | ||
3157 | static noinline int mmc_ioctl_cdrom_last_written(struct cdrom_device_info *cdi, | 3241 | static noinline int mmc_ioctl_cdrom_last_written(struct cdrom_device_info *cdi, |
3158 | void __user *arg) | 3242 | void __user *arg) |
3159 | { | 3243 | { |
3160 | int ret; | 3244 | int ret; |
3161 | long last = 0; | 3245 | long last = 0; |
3162 | cdinfo(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n"); | 3246 | cd_dbg(CD_DO_IOCTL, "entering CDROM_LAST_WRITTEN\n"); |
3163 | ret = cdrom_get_last_written(cdi, &last); | 3247 | ret = cdrom_get_last_written(cdi, &last); |
3164 | if (ret) | 3248 | if (ret) |
3165 | return ret; | 3249 | return ret; |
3166 | IOCTL_OUT(arg, long, last); | 3250 | if (copy_to_user((long __user *)arg, &last, sizeof(last))) |
3251 | return -EFAULT; | ||
3167 | return 0; | 3252 | return 0; |
3168 | } | 3253 | } |
3169 | 3254 | ||
@@ -3212,181 +3297,101 @@ static int mmc_ioctl(struct cdrom_device_info *cdi, unsigned int cmd, | |||
3212 | return -ENOTTY; | 3297 | return -ENOTTY; |
3213 | } | 3298 | } |
3214 | 3299 | ||
3215 | static int cdrom_get_track_info(struct cdrom_device_info *cdi, __u16 track, __u8 type, | 3300 | /* |
3216 | track_information *ti) | 3301 | * Just about every imaginable ioctl is supported in the Uniform layer |
3217 | { | 3302 | * these days. |
3218 | struct cdrom_device_ops *cdo = cdi->ops; | 3303 | * ATAPI / SCSI specific code now mainly resides in mmc_ioctl(). |
3219 | struct packet_command cgc; | 3304 | */ |
3220 | int ret, buflen; | 3305 | int cdrom_ioctl(struct cdrom_device_info *cdi, struct block_device *bdev, |
3221 | 3306 | fmode_t mode, unsigned int cmd, unsigned long arg) | |
3222 | init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ); | ||
3223 | cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO; | ||
3224 | cgc.cmd[1] = type & 3; | ||
3225 | cgc.cmd[4] = (track & 0xff00) >> 8; | ||
3226 | cgc.cmd[5] = track & 0xff; | ||
3227 | cgc.cmd[8] = 8; | ||
3228 | cgc.quiet = 1; | ||
3229 | |||
3230 | if ((ret = cdo->generic_packet(cdi, &cgc))) | ||
3231 | return ret; | ||
3232 | |||
3233 | buflen = be16_to_cpu(ti->track_information_length) + | ||
3234 | sizeof(ti->track_information_length); | ||
3235 | |||
3236 | if (buflen > sizeof(track_information)) | ||
3237 | buflen = sizeof(track_information); | ||
3238 | |||
3239 | cgc.cmd[8] = cgc.buflen = buflen; | ||
3240 | if ((ret = cdo->generic_packet(cdi, &cgc))) | ||
3241 | return ret; | ||
3242 | |||
3243 | /* return actual fill size */ | ||
3244 | return buflen; | ||
3245 | } | ||
3246 | |||
3247 | /* requires CD R/RW */ | ||
3248 | static int cdrom_get_disc_info(struct cdrom_device_info *cdi, disc_information *di) | ||
3249 | { | 3307 | { |
3250 | struct cdrom_device_ops *cdo = cdi->ops; | 3308 | void __user *argp = (void __user *)arg; |
3251 | struct packet_command cgc; | 3309 | int ret; |
3252 | int ret, buflen; | ||
3253 | |||
3254 | /* set up command and get the disc info */ | ||
3255 | init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ); | ||
3256 | cgc.cmd[0] = GPCMD_READ_DISC_INFO; | ||
3257 | cgc.cmd[8] = cgc.buflen = 2; | ||
3258 | cgc.quiet = 1; | ||
3259 | |||
3260 | if ((ret = cdo->generic_packet(cdi, &cgc))) | ||
3261 | return ret; | ||
3262 | 3310 | ||
3263 | /* not all drives have the same disc_info length, so requeue | 3311 | /* |
3264 | * packet with the length the drive tells us it can supply | 3312 | * Try the generic SCSI command ioctl's first. |
3265 | */ | 3313 | */ |
3266 | buflen = be16_to_cpu(di->disc_information_length) + | 3314 | ret = scsi_cmd_blk_ioctl(bdev, mode, cmd, argp); |
3267 | sizeof(di->disc_information_length); | 3315 | if (ret != -ENOTTY) |
3268 | |||
3269 | if (buflen > sizeof(disc_information)) | ||
3270 | buflen = sizeof(disc_information); | ||
3271 | |||
3272 | cgc.cmd[8] = cgc.buflen = buflen; | ||
3273 | if ((ret = cdo->generic_packet(cdi, &cgc))) | ||
3274 | return ret; | 3316 | return ret; |
3275 | 3317 | ||
3276 | /* return actual fill size */ | 3318 | switch (cmd) { |
3277 | return buflen; | 3319 | case CDROMMULTISESSION: |
3278 | } | 3320 | return cdrom_ioctl_multisession(cdi, argp); |
3279 | 3321 | case CDROMEJECT: | |
3280 | /* return the last written block on the CD-R media. this is for the udf | 3322 | return cdrom_ioctl_eject(cdi); |
3281 | file system. */ | 3323 | case CDROMCLOSETRAY: |
3282 | int cdrom_get_last_written(struct cdrom_device_info *cdi, long *last_written) | 3324 | return cdrom_ioctl_closetray(cdi); |
3283 | { | 3325 | case CDROMEJECT_SW: |
3284 | struct cdrom_tocentry toc; | 3326 | return cdrom_ioctl_eject_sw(cdi, arg); |
3285 | disc_information di; | 3327 | case CDROM_MEDIA_CHANGED: |
3286 | track_information ti; | 3328 | return cdrom_ioctl_media_changed(cdi, arg); |
3287 | __u32 last_track; | 3329 | case CDROM_SET_OPTIONS: |
3288 | int ret = -1, ti_size; | 3330 | return cdrom_ioctl_set_options(cdi, arg); |
3289 | 3331 | case CDROM_CLEAR_OPTIONS: | |
3290 | if (!CDROM_CAN(CDC_GENERIC_PACKET)) | 3332 | return cdrom_ioctl_clear_options(cdi, arg); |
3291 | goto use_toc; | 3333 | case CDROM_SELECT_SPEED: |
3292 | 3334 | return cdrom_ioctl_select_speed(cdi, arg); | |
3293 | ret = cdrom_get_disc_info(cdi, &di); | 3335 | case CDROM_SELECT_DISC: |
3294 | if (ret < (int)(offsetof(typeof(di), last_track_lsb) | 3336 | return cdrom_ioctl_select_disc(cdi, arg); |
3295 | + sizeof(di.last_track_lsb))) | 3337 | case CDROMRESET: |
3296 | goto use_toc; | 3338 | return cdrom_ioctl_reset(cdi, bdev); |
3297 | 3339 | case CDROM_LOCKDOOR: | |
3298 | /* if unit didn't return msb, it's zeroed by cdrom_get_disc_info */ | 3340 | return cdrom_ioctl_lock_door(cdi, arg); |
3299 | last_track = (di.last_track_msb << 8) | di.last_track_lsb; | 3341 | case CDROM_DEBUG: |
3300 | ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti); | 3342 | return cdrom_ioctl_debug(cdi, arg); |
3301 | if (ti_size < (int)offsetof(typeof(ti), track_start)) | 3343 | case CDROM_GET_CAPABILITY: |
3302 | goto use_toc; | 3344 | return cdrom_ioctl_get_capability(cdi); |
3303 | 3345 | case CDROM_GET_MCN: | |
3304 | /* if this track is blank, try the previous. */ | 3346 | return cdrom_ioctl_get_mcn(cdi, argp); |
3305 | if (ti.blank) { | 3347 | case CDROM_DRIVE_STATUS: |
3306 | if (last_track==1) | 3348 | return cdrom_ioctl_drive_status(cdi, arg); |
3307 | goto use_toc; | 3349 | case CDROM_DISC_STATUS: |
3308 | last_track--; | 3350 | return cdrom_ioctl_disc_status(cdi); |
3309 | ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti); | 3351 | case CDROM_CHANGER_NSLOTS: |
3310 | } | 3352 | return cdrom_ioctl_changer_nslots(cdi); |
3311 | |||
3312 | if (ti_size < (int)(offsetof(typeof(ti), track_size) | ||
3313 | + sizeof(ti.track_size))) | ||
3314 | goto use_toc; | ||
3315 | |||
3316 | /* if last recorded field is valid, return it. */ | ||
3317 | if (ti.lra_v && ti_size >= (int)(offsetof(typeof(ti), last_rec_address) | ||
3318 | + sizeof(ti.last_rec_address))) { | ||
3319 | *last_written = be32_to_cpu(ti.last_rec_address); | ||
3320 | } else { | ||
3321 | /* make it up instead */ | ||
3322 | *last_written = be32_to_cpu(ti.track_start) + | ||
3323 | be32_to_cpu(ti.track_size); | ||
3324 | if (ti.free_blocks) | ||
3325 | *last_written -= (be32_to_cpu(ti.free_blocks) + 7); | ||
3326 | } | 3353 | } |
3327 | return 0; | ||
3328 | 3354 | ||
3329 | /* this is where we end up if the drive either can't do a | 3355 | /* |
3330 | GPCMD_READ_DISC_INFO or GPCMD_READ_TRACK_RZONE_INFO or if | 3356 | * Use the ioctls that are implemented through the generic_packet() |
3331 | it doesn't give enough information or fails. then we return | 3357 | * interface. this may look at bit funny, but if -ENOTTY is |
3332 | the toc contents. */ | 3358 | * returned that particular ioctl is not implemented and we |
3333 | use_toc: | 3359 | * let it go through the device specific ones. |
3334 | toc.cdte_format = CDROM_MSF; | 3360 | */ |
3335 | toc.cdte_track = CDROM_LEADOUT; | 3361 | if (CDROM_CAN(CDC_GENERIC_PACKET)) { |
3336 | if ((ret = cdi->ops->audio_ioctl(cdi, CDROMREADTOCENTRY, &toc))) | 3362 | ret = mmc_ioctl(cdi, cmd, arg); |
3337 | return ret; | 3363 | if (ret != -ENOTTY) |
3338 | sanitize_format(&toc.cdte_addr, &toc.cdte_format, CDROM_LBA); | 3364 | return ret; |
3339 | *last_written = toc.cdte_addr.lba; | ||
3340 | return 0; | ||
3341 | } | ||
3342 | |||
3343 | /* return the next writable block. also for udf file system. */ | ||
3344 | static int cdrom_get_next_writable(struct cdrom_device_info *cdi, long *next_writable) | ||
3345 | { | ||
3346 | disc_information di; | ||
3347 | track_information ti; | ||
3348 | __u16 last_track; | ||
3349 | int ret, ti_size; | ||
3350 | |||
3351 | if (!CDROM_CAN(CDC_GENERIC_PACKET)) | ||
3352 | goto use_last_written; | ||
3353 | |||
3354 | ret = cdrom_get_disc_info(cdi, &di); | ||
3355 | if (ret < 0 || ret < offsetof(typeof(di), last_track_lsb) | ||
3356 | + sizeof(di.last_track_lsb)) | ||
3357 | goto use_last_written; | ||
3358 | |||
3359 | /* if unit didn't return msb, it's zeroed by cdrom_get_disc_info */ | ||
3360 | last_track = (di.last_track_msb << 8) | di.last_track_lsb; | ||
3361 | ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti); | ||
3362 | if (ti_size < 0 || ti_size < offsetof(typeof(ti), track_start)) | ||
3363 | goto use_last_written; | ||
3364 | |||
3365 | /* if this track is blank, try the previous. */ | ||
3366 | if (ti.blank) { | ||
3367 | if (last_track == 1) | ||
3368 | goto use_last_written; | ||
3369 | last_track--; | ||
3370 | ti_size = cdrom_get_track_info(cdi, last_track, 1, &ti); | ||
3371 | if (ti_size < 0) | ||
3372 | goto use_last_written; | ||
3373 | } | 3365 | } |
3374 | 3366 | ||
3375 | /* if next recordable address field is valid, use it. */ | 3367 | /* |
3376 | if (ti.nwa_v && ti_size >= offsetof(typeof(ti), next_writable) | 3368 | * Note: most of the cd_dbg() calls are commented out here, |
3377 | + sizeof(ti.next_writable)) { | 3369 | * because they fill up the sys log when CD players poll |
3378 | *next_writable = be32_to_cpu(ti.next_writable); | 3370 | * the drive. |
3379 | return 0; | 3371 | */ |
3372 | switch (cmd) { | ||
3373 | case CDROMSUBCHNL: | ||
3374 | return cdrom_ioctl_get_subchnl(cdi, argp); | ||
3375 | case CDROMREADTOCHDR: | ||
3376 | return cdrom_ioctl_read_tochdr(cdi, argp); | ||
3377 | case CDROMREADTOCENTRY: | ||
3378 | return cdrom_ioctl_read_tocentry(cdi, argp); | ||
3379 | case CDROMPLAYMSF: | ||
3380 | return cdrom_ioctl_play_msf(cdi, argp); | ||
3381 | case CDROMPLAYTRKIND: | ||
3382 | return cdrom_ioctl_play_trkind(cdi, argp); | ||
3383 | case CDROMVOLCTRL: | ||
3384 | return cdrom_ioctl_volctrl(cdi, argp); | ||
3385 | case CDROMVOLREAD: | ||
3386 | return cdrom_ioctl_volread(cdi, argp); | ||
3387 | case CDROMSTART: | ||
3388 | case CDROMSTOP: | ||
3389 | case CDROMPAUSE: | ||
3390 | case CDROMRESUME: | ||
3391 | return cdrom_ioctl_audioctl(cdi, cmd); | ||
3380 | } | 3392 | } |
3381 | 3393 | ||
3382 | use_last_written: | 3394 | return -ENOSYS; |
3383 | if ((ret = cdrom_get_last_written(cdi, next_writable))) { | ||
3384 | *next_writable = 0; | ||
3385 | return ret; | ||
3386 | } else { | ||
3387 | *next_writable += 7; | ||
3388 | return 0; | ||
3389 | } | ||
3390 | } | 3395 | } |
3391 | 3396 | ||
3392 | EXPORT_SYMBOL(cdrom_get_last_written); | 3397 | EXPORT_SYMBOL(cdrom_get_last_written); |
diff --git a/drivers/cdrom/gdrom.c b/drivers/cdrom/gdrom.c index 51e75ad96422..584bc3126403 100644 --- a/drivers/cdrom/gdrom.c +++ b/drivers/cdrom/gdrom.c | |||
@@ -602,7 +602,7 @@ static void gdrom_readdisk_dma(struct work_struct *work) | |||
602 | spin_unlock(&gdrom_lock); | 602 | spin_unlock(&gdrom_lock); |
603 | block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET; | 603 | block = blk_rq_pos(req)/GD_TO_BLK + GD_SESSION_OFFSET; |
604 | block_cnt = blk_rq_sectors(req)/GD_TO_BLK; | 604 | block_cnt = blk_rq_sectors(req)/GD_TO_BLK; |
605 | __raw_writel(virt_to_phys(req->buffer), GDROM_DMA_STARTADDR_REG); | 605 | __raw_writel(virt_to_phys(bio_data(req->bio)), GDROM_DMA_STARTADDR_REG); |
606 | __raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG); | 606 | __raw_writel(block_cnt * GDROM_HARD_SECTOR, GDROM_DMA_LENGTH_REG); |
607 | __raw_writel(1, GDROM_DMA_DIRECTION_REG); | 607 | __raw_writel(1, GDROM_DMA_DIRECTION_REG); |
608 | __raw_writel(1, GDROM_DMA_ENABLE_REG); | 608 | __raw_writel(1, GDROM_DMA_ENABLE_REG); |
diff --git a/drivers/char/random.c b/drivers/char/random.c index 6b75713d953a..0a19d866a153 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c | |||
@@ -902,6 +902,7 @@ void add_disk_randomness(struct gendisk *disk) | |||
902 | add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); | 902 | add_timer_randomness(disk->random, 0x100 + disk_devt(disk)); |
903 | trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool)); | 903 | trace_add_disk_randomness(disk_devt(disk), ENTROPY_BITS(&input_pool)); |
904 | } | 904 | } |
905 | EXPORT_SYMBOL_GPL(add_disk_randomness); | ||
905 | #endif | 906 | #endif |
906 | 907 | ||
907 | /********************************************************************* | 908 | /********************************************************************* |
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c index 16f69be820c7..ee880382e3bc 100644 --- a/drivers/ide/ide-disk.c +++ b/drivers/ide/ide-disk.c | |||
@@ -188,10 +188,9 @@ static ide_startstop_t ide_do_rw_disk(ide_drive_t *drive, struct request *rq, | |||
188 | 188 | ||
189 | ledtrig_ide_activity(); | 189 | ledtrig_ide_activity(); |
190 | 190 | ||
191 | pr_debug("%s: %sing: block=%llu, sectors=%u, buffer=0x%08lx\n", | 191 | pr_debug("%s: %sing: block=%llu, sectors=%u\n", |
192 | drive->name, rq_data_dir(rq) == READ ? "read" : "writ", | 192 | drive->name, rq_data_dir(rq) == READ ? "read" : "writ", |
193 | (unsigned long long)block, blk_rq_sectors(rq), | 193 | (unsigned long long)block, blk_rq_sectors(rq)); |
194 | (unsigned long)rq->buffer); | ||
195 | 194 | ||
196 | if (hwif->rw_disk) | 195 | if (hwif->rw_disk) |
197 | hwif->rw_disk(drive, rq); | 196 | hwif->rw_disk(drive, rq); |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 455e64916498..6a71bc7c9133 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -1544,7 +1544,6 @@ static int setup_clone(struct request *clone, struct request *rq, | |||
1544 | clone->cmd = rq->cmd; | 1544 | clone->cmd = rq->cmd; |
1545 | clone->cmd_len = rq->cmd_len; | 1545 | clone->cmd_len = rq->cmd_len; |
1546 | clone->sense = rq->sense; | 1546 | clone->sense = rq->sense; |
1547 | clone->buffer = rq->buffer; | ||
1548 | clone->end_io = end_clone_request; | 1547 | clone->end_io = end_clone_request; |
1549 | clone->end_io_data = tio; | 1548 | clone->end_io_data = tio; |
1550 | 1549 | ||
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c index 0b2ccb68c0d0..4dbfaee9aa95 100644 --- a/drivers/mtd/mtd_blkdevs.c +++ b/drivers/mtd/mtd_blkdevs.c | |||
@@ -82,8 +82,7 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr, | |||
82 | 82 | ||
83 | block = blk_rq_pos(req) << 9 >> tr->blkshift; | 83 | block = blk_rq_pos(req) << 9 >> tr->blkshift; |
84 | nsect = blk_rq_cur_bytes(req) >> tr->blkshift; | 84 | nsect = blk_rq_cur_bytes(req) >> tr->blkshift; |
85 | 85 | buf = bio_data(req->bio); | |
86 | buf = req->buffer; | ||
87 | 86 | ||
88 | if (req->cmd_type != REQ_TYPE_FS) | 87 | if (req->cmd_type != REQ_TYPE_FS) |
89 | return -EIO; | 88 | return -EIO; |
diff --git a/drivers/mtd/ubi/block.c b/drivers/mtd/ubi/block.c index 7ff473c871a9..ee774ba3728d 100644 --- a/drivers/mtd/ubi/block.c +++ b/drivers/mtd/ubi/block.c | |||
@@ -253,7 +253,7 @@ static int do_ubiblock_request(struct ubiblock *dev, struct request *req) | |||
253 | * flash access anyway. | 253 | * flash access anyway. |
254 | */ | 254 | */ |
255 | mutex_lock(&dev->dev_mutex); | 255 | mutex_lock(&dev->dev_mutex); |
256 | ret = ubiblock_read(dev, req->buffer, sec, len); | 256 | ret = ubiblock_read(dev, bio_data(req->bio), sec, len); |
257 | mutex_unlock(&dev->dev_mutex); | 257 | mutex_unlock(&dev->dev_mutex); |
258 | 258 | ||
259 | return ret; | 259 | return ret; |
diff --git a/drivers/sbus/char/jsflash.c b/drivers/sbus/char/jsflash.c index 4ccb5d869389..a40ee1e37486 100644 --- a/drivers/sbus/char/jsflash.c +++ b/drivers/sbus/char/jsflash.c | |||
@@ -207,7 +207,7 @@ static void jsfd_do_request(struct request_queue *q) | |||
207 | goto end; | 207 | goto end; |
208 | } | 208 | } |
209 | 209 | ||
210 | jsfd_read(req->buffer, jdp->dbase + offset, len); | 210 | jsfd_read(bio_data(req->bio), jdp->dbase + offset, len); |
211 | err = 0; | 211 | err = 0; |
212 | end: | 212 | end: |
213 | if (!__blk_end_request_cur(req, err)) | 213 | if (!__blk_end_request_cur(req, err)) |
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 65a123d9c676..3cc82d3dec78 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c | |||
@@ -139,7 +139,7 @@ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, int unbusy) | |||
139 | */ | 139 | */ |
140 | spin_lock_irqsave(q->queue_lock, flags); | 140 | spin_lock_irqsave(q->queue_lock, flags); |
141 | blk_requeue_request(q, cmd->request); | 141 | blk_requeue_request(q, cmd->request); |
142 | kblockd_schedule_work(q, &device->requeue_work); | 142 | kblockd_schedule_work(&device->requeue_work); |
143 | spin_unlock_irqrestore(q->queue_lock, flags); | 143 | spin_unlock_irqrestore(q->queue_lock, flags); |
144 | } | 144 | } |
145 | 145 | ||
@@ -1018,8 +1018,6 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, | |||
1018 | return BLKPREP_DEFER; | 1018 | return BLKPREP_DEFER; |
1019 | } | 1019 | } |
1020 | 1020 | ||
1021 | req->buffer = NULL; | ||
1022 | |||
1023 | /* | 1021 | /* |
1024 | * Next, walk the list, and fill in the addresses and sizes of | 1022 | * Next, walk the list, and fill in the addresses and sizes of |
1025 | * each segment. | 1023 | * each segment. |
@@ -1156,7 +1154,6 @@ int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) | |||
1156 | BUG_ON(blk_rq_bytes(req)); | 1154 | BUG_ON(blk_rq_bytes(req)); |
1157 | 1155 | ||
1158 | memset(&cmd->sdb, 0, sizeof(cmd->sdb)); | 1156 | memset(&cmd->sdb, 0, sizeof(cmd->sdb)); |
1159 | req->buffer = NULL; | ||
1160 | } | 1157 | } |
1161 | 1158 | ||
1162 | cmd->cmd_len = req->cmd_len; | 1159 | cmd->cmd_len = req->cmd_len; |
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index efcbcd182863..96af195224f2 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c | |||
@@ -737,16 +737,14 @@ static int sd_setup_discard_cmnd(struct scsi_device *sdp, struct request *rq) | |||
737 | goto out; | 737 | goto out; |
738 | } | 738 | } |
739 | 739 | ||
740 | rq->completion_data = page; | ||
740 | blk_add_request_payload(rq, page, len); | 741 | blk_add_request_payload(rq, page, len); |
741 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); | 742 | ret = scsi_setup_blk_pc_cmnd(sdp, rq); |
742 | rq->buffer = page_address(page); | ||
743 | rq->__data_len = nr_bytes; | 743 | rq->__data_len = nr_bytes; |
744 | 744 | ||
745 | out: | 745 | out: |
746 | if (ret != BLKPREP_OK) { | 746 | if (ret != BLKPREP_OK) |
747 | __free_page(page); | 747 | __free_page(page); |
748 | rq->buffer = NULL; | ||
749 | } | ||
750 | return ret; | 748 | return ret; |
751 | } | 749 | } |
752 | 750 | ||
@@ -842,10 +840,9 @@ static void sd_unprep_fn(struct request_queue *q, struct request *rq) | |||
842 | { | 840 | { |
843 | struct scsi_cmnd *SCpnt = rq->special; | 841 | struct scsi_cmnd *SCpnt = rq->special; |
844 | 842 | ||
845 | if (rq->cmd_flags & REQ_DISCARD) { | 843 | if (rq->cmd_flags & REQ_DISCARD) |
846 | free_page((unsigned long)rq->buffer); | 844 | __free_page(rq->completion_data); |
847 | rq->buffer = NULL; | 845 | |
848 | } | ||
849 | if (SCpnt->cmnd != rq->cmd) { | 846 | if (SCpnt->cmnd != rq->cmd) { |
850 | mempool_free(SCpnt->cmnd, sd_cdb_pool); | 847 | mempool_free(SCpnt->cmnd, sd_cdb_pool); |
851 | SCpnt->cmnd = NULL; | 848 | SCpnt->cmnd = NULL; |
diff --git a/fs/Makefile b/fs/Makefile index f9cb9876e466..4030cbfbc9af 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -14,14 +14,13 @@ obj-y := open.o read_write.o file_table.o super.o \ | |||
14 | stack.o fs_struct.o statfs.o | 14 | stack.o fs_struct.o statfs.o |
15 | 15 | ||
16 | ifeq ($(CONFIG_BLOCK),y) | 16 | ifeq ($(CONFIG_BLOCK),y) |
17 | obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o | 17 | obj-y += buffer.o block_dev.o direct-io.o mpage.o |
18 | else | 18 | else |
19 | obj-y += no-block.o | 19 | obj-y += no-block.o |
20 | endif | 20 | endif |
21 | 21 | ||
22 | obj-$(CONFIG_PROC_FS) += proc_namespace.o | 22 | obj-$(CONFIG_PROC_FS) += proc_namespace.o |
23 | 23 | ||
24 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o | ||
25 | obj-y += notify/ | 24 | obj-y += notify/ |
26 | obj-$(CONFIG_EPOLL) += eventpoll.o | 25 | obj-$(CONFIG_EPOLL) += eventpoll.o |
27 | obj-$(CONFIG_ANON_INODES) += anon_inodes.o | 26 | obj-$(CONFIG_ANON_INODES) += anon_inodes.o |
diff --git a/include/linux/bio.h b/include/linux/bio.h index bba550826921..5a645769f020 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -333,7 +333,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, | |||
333 | 333 | ||
334 | extern struct bio_set *bioset_create(unsigned int, unsigned int); | 334 | extern struct bio_set *bioset_create(unsigned int, unsigned int); |
335 | extern void bioset_free(struct bio_set *); | 335 | extern void bioset_free(struct bio_set *); |
336 | extern mempool_t *biovec_create_pool(struct bio_set *bs, int pool_entries); | 336 | extern mempool_t *biovec_create_pool(int pool_entries); |
337 | 337 | ||
338 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); | 338 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); |
339 | extern void bio_put(struct bio *); | 339 | extern void bio_put(struct bio *); |
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0120451545d8..91dfb75ce39f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -8,7 +8,13 @@ struct blk_mq_tags; | |||
8 | struct blk_mq_cpu_notifier { | 8 | struct blk_mq_cpu_notifier { |
9 | struct list_head list; | 9 | struct list_head list; |
10 | void *data; | 10 | void *data; |
11 | void (*notify)(void *data, unsigned long action, unsigned int cpu); | 11 | int (*notify)(void *data, unsigned long action, unsigned int cpu); |
12 | }; | ||
13 | |||
14 | struct blk_mq_ctxmap { | ||
15 | unsigned int map_size; | ||
16 | unsigned int bits_per_word; | ||
17 | struct blk_align_bitmap *map; | ||
12 | }; | 18 | }; |
13 | 19 | ||
14 | struct blk_mq_hw_ctx { | 20 | struct blk_mq_hw_ctx { |
@@ -18,7 +24,11 @@ struct blk_mq_hw_ctx { | |||
18 | } ____cacheline_aligned_in_smp; | 24 | } ____cacheline_aligned_in_smp; |
19 | 25 | ||
20 | unsigned long state; /* BLK_MQ_S_* flags */ | 26 | unsigned long state; /* BLK_MQ_S_* flags */ |
21 | struct delayed_work delayed_work; | 27 | struct delayed_work run_work; |
28 | struct delayed_work delay_work; | ||
29 | cpumask_var_t cpumask; | ||
30 | int next_cpu; | ||
31 | int next_cpu_batch; | ||
22 | 32 | ||
23 | unsigned long flags; /* BLK_MQ_F_* flags */ | 33 | unsigned long flags; /* BLK_MQ_F_* flags */ |
24 | 34 | ||
@@ -27,13 +37,13 @@ struct blk_mq_hw_ctx { | |||
27 | 37 | ||
28 | void *driver_data; | 38 | void *driver_data; |
29 | 39 | ||
40 | struct blk_mq_ctxmap ctx_map; | ||
41 | |||
30 | unsigned int nr_ctx; | 42 | unsigned int nr_ctx; |
31 | struct blk_mq_ctx **ctxs; | 43 | struct blk_mq_ctx **ctxs; |
32 | unsigned int nr_ctx_map; | ||
33 | unsigned long *ctx_map; | ||
34 | 44 | ||
35 | struct request **rqs; | 45 | unsigned int wait_index; |
36 | struct list_head page_list; | 46 | |
37 | struct blk_mq_tags *tags; | 47 | struct blk_mq_tags *tags; |
38 | 48 | ||
39 | unsigned long queued; | 49 | unsigned long queued; |
@@ -41,31 +51,40 @@ struct blk_mq_hw_ctx { | |||
41 | #define BLK_MQ_MAX_DISPATCH_ORDER 10 | 51 | #define BLK_MQ_MAX_DISPATCH_ORDER 10 |
42 | unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; | 52 | unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; |
43 | 53 | ||
44 | unsigned int queue_depth; | ||
45 | unsigned int numa_node; | 54 | unsigned int numa_node; |
46 | unsigned int cmd_size; /* per-request extra data */ | 55 | unsigned int cmd_size; /* per-request extra data */ |
47 | 56 | ||
57 | atomic_t nr_active; | ||
58 | |||
48 | struct blk_mq_cpu_notifier cpu_notifier; | 59 | struct blk_mq_cpu_notifier cpu_notifier; |
49 | struct kobject kobj; | 60 | struct kobject kobj; |
50 | }; | 61 | }; |
51 | 62 | ||
52 | struct blk_mq_reg { | 63 | struct blk_mq_tag_set { |
53 | struct blk_mq_ops *ops; | 64 | struct blk_mq_ops *ops; |
54 | unsigned int nr_hw_queues; | 65 | unsigned int nr_hw_queues; |
55 | unsigned int queue_depth; | 66 | unsigned int queue_depth; /* max hw supported */ |
56 | unsigned int reserved_tags; | 67 | unsigned int reserved_tags; |
57 | unsigned int cmd_size; /* per-request extra data */ | 68 | unsigned int cmd_size; /* per-request extra data */ |
58 | int numa_node; | 69 | int numa_node; |
59 | unsigned int timeout; | 70 | unsigned int timeout; |
60 | unsigned int flags; /* BLK_MQ_F_* */ | 71 | unsigned int flags; /* BLK_MQ_F_* */ |
72 | void *driver_data; | ||
73 | |||
74 | struct blk_mq_tags **tags; | ||
75 | |||
76 | struct mutex tag_list_lock; | ||
77 | struct list_head tag_list; | ||
61 | }; | 78 | }; |
62 | 79 | ||
63 | typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); | 80 | typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); |
64 | typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); | 81 | typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); |
65 | typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int); | ||
66 | typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); | ||
67 | typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); | 82 | typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); |
68 | typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); | 83 | typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); |
84 | typedef int (init_request_fn)(void *, struct request *, unsigned int, | ||
85 | unsigned int, unsigned int); | ||
86 | typedef void (exit_request_fn)(void *, struct request *, unsigned int, | ||
87 | unsigned int); | ||
69 | 88 | ||
70 | struct blk_mq_ops { | 89 | struct blk_mq_ops { |
71 | /* | 90 | /* |
@@ -86,18 +105,20 @@ struct blk_mq_ops { | |||
86 | softirq_done_fn *complete; | 105 | softirq_done_fn *complete; |
87 | 106 | ||
88 | /* | 107 | /* |
89 | * Override for hctx allocations (should probably go) | ||
90 | */ | ||
91 | alloc_hctx_fn *alloc_hctx; | ||
92 | free_hctx_fn *free_hctx; | ||
93 | |||
94 | /* | ||
95 | * Called when the block layer side of a hardware queue has been | 108 | * Called when the block layer side of a hardware queue has been |
96 | * set up, allowing the driver to allocate/init matching structures. | 109 | * set up, allowing the driver to allocate/init matching structures. |
97 | * Ditto for exit/teardown. | 110 | * Ditto for exit/teardown. |
98 | */ | 111 | */ |
99 | init_hctx_fn *init_hctx; | 112 | init_hctx_fn *init_hctx; |
100 | exit_hctx_fn *exit_hctx; | 113 | exit_hctx_fn *exit_hctx; |
114 | |||
115 | /* | ||
116 | * Called for every command allocated by the block layer to allow | ||
117 | * the driver to set up driver specific data. | ||
118 | * Ditto for exit/teardown. | ||
119 | */ | ||
120 | init_request_fn *init_request; | ||
121 | exit_request_fn *exit_request; | ||
101 | }; | 122 | }; |
102 | 123 | ||
103 | enum { | 124 | enum { |
@@ -107,18 +128,22 @@ enum { | |||
107 | 128 | ||
108 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, | 129 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, |
109 | BLK_MQ_F_SHOULD_SORT = 1 << 1, | 130 | BLK_MQ_F_SHOULD_SORT = 1 << 1, |
110 | BLK_MQ_F_SHOULD_IPI = 1 << 2, | 131 | BLK_MQ_F_TAG_SHARED = 1 << 2, |
111 | 132 | ||
112 | BLK_MQ_S_STOPPED = 0, | 133 | BLK_MQ_S_STOPPED = 0, |
134 | BLK_MQ_S_TAG_ACTIVE = 1, | ||
113 | 135 | ||
114 | BLK_MQ_MAX_DEPTH = 2048, | 136 | BLK_MQ_MAX_DEPTH = 2048, |
137 | |||
138 | BLK_MQ_CPU_WORK_BATCH = 8, | ||
115 | }; | 139 | }; |
116 | 140 | ||
117 | struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); | 141 | struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); |
118 | int blk_mq_register_disk(struct gendisk *); | 142 | int blk_mq_register_disk(struct gendisk *); |
119 | void blk_mq_unregister_disk(struct gendisk *); | 143 | void blk_mq_unregister_disk(struct gendisk *); |
120 | int blk_mq_init_commands(struct request_queue *, int (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); | 144 | |
121 | void blk_mq_free_commands(struct request_queue *, void (*free)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); | 145 | int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); |
146 | void blk_mq_free_tag_set(struct blk_mq_tag_set *set); | ||
122 | 147 | ||
123 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); | 148 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); |
124 | 149 | ||
@@ -126,28 +151,28 @@ void blk_mq_insert_request(struct request *, bool, bool, bool); | |||
126 | void blk_mq_run_queues(struct request_queue *q, bool async); | 151 | void blk_mq_run_queues(struct request_queue *q, bool async); |
127 | void blk_mq_free_request(struct request *rq); | 152 | void blk_mq_free_request(struct request *rq); |
128 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); | 153 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); |
129 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp); | 154 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, |
130 | struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); | 155 | gfp_t gfp, bool reserved); |
131 | struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); | 156 | struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); |
132 | 157 | ||
133 | struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); | 158 | struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); |
134 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int); | 159 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); |
135 | void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); | ||
136 | 160 | ||
137 | bool blk_mq_end_io_partial(struct request *rq, int error, | 161 | void blk_mq_end_io(struct request *rq, int error); |
138 | unsigned int nr_bytes); | 162 | void __blk_mq_end_io(struct request *rq, int error); |
139 | static inline void blk_mq_end_io(struct request *rq, int error) | ||
140 | { | ||
141 | bool done = !blk_mq_end_io_partial(rq, error, blk_rq_bytes(rq)); | ||
142 | BUG_ON(!done); | ||
143 | } | ||
144 | 163 | ||
164 | void blk_mq_requeue_request(struct request *rq); | ||
165 | void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); | ||
166 | void blk_mq_kick_requeue_list(struct request_queue *q); | ||
145 | void blk_mq_complete_request(struct request *rq); | 167 | void blk_mq_complete_request(struct request *rq); |
146 | 168 | ||
147 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); | 169 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); |
148 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); | 170 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); |
149 | void blk_mq_stop_hw_queues(struct request_queue *q); | 171 | void blk_mq_stop_hw_queues(struct request_queue *q); |
150 | void blk_mq_start_stopped_hw_queues(struct request_queue *q); | 172 | void blk_mq_start_hw_queues(struct request_queue *q); |
173 | void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); | ||
174 | void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); | ||
175 | void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); | ||
151 | 176 | ||
152 | /* | 177 | /* |
153 | * Driver command data is immediately after the request. So subtract request | 178 | * Driver command data is immediately after the request. So subtract request |
@@ -162,12 +187,6 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) | |||
162 | return (void *) rq + sizeof(*rq); | 187 | return (void *) rq + sizeof(*rq); |
163 | } | 188 | } |
164 | 189 | ||
165 | static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, | ||
166 | unsigned int tag) | ||
167 | { | ||
168 | return hctx->rqs[tag]; | ||
169 | } | ||
170 | |||
171 | #define queue_for_each_hw_ctx(q, hctx, i) \ | 190 | #define queue_for_each_hw_ctx(q, hctx, i) \ |
172 | for ((i) = 0; (i) < (q)->nr_hw_queues && \ | 191 | for ((i) = 0; (i) < (q)->nr_hw_queues && \ |
173 | ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) | 192 | ({ hctx = (q)->queue_hw_ctx[i]; 1; }); (i)++) |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index aa0eaa2d0bd8..d8e4cea23a25 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -190,6 +190,7 @@ enum rq_flag_bits { | |||
190 | __REQ_PM, /* runtime pm request */ | 190 | __REQ_PM, /* runtime pm request */ |
191 | __REQ_END, /* last of chain of requests */ | 191 | __REQ_END, /* last of chain of requests */ |
192 | __REQ_HASHED, /* on IO scheduler merge hash */ | 192 | __REQ_HASHED, /* on IO scheduler merge hash */ |
193 | __REQ_MQ_INFLIGHT, /* track inflight for MQ */ | ||
193 | __REQ_NR_BITS, /* stops here */ | 194 | __REQ_NR_BITS, /* stops here */ |
194 | }; | 195 | }; |
195 | 196 | ||
@@ -243,5 +244,6 @@ enum rq_flag_bits { | |||
243 | #define REQ_PM (1ULL << __REQ_PM) | 244 | #define REQ_PM (1ULL << __REQ_PM) |
244 | #define REQ_END (1ULL << __REQ_END) | 245 | #define REQ_END (1ULL << __REQ_END) |
245 | #define REQ_HASHED (1ULL << __REQ_HASHED) | 246 | #define REQ_HASHED (1ULL << __REQ_HASHED) |
247 | #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) | ||
246 | 248 | ||
247 | #endif /* __LINUX_BLK_TYPES_H */ | 249 | #endif /* __LINUX_BLK_TYPES_H */ |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0d84981ee03f..e90e1692e052 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -90,15 +90,15 @@ enum rq_cmd_type_bits { | |||
90 | #define BLK_MAX_CDB 16 | 90 | #define BLK_MAX_CDB 16 |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * try to put the fields that are referenced together in the same cacheline. | 93 | * Try to put the fields that are referenced together in the same cacheline. |
94 | * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init() | 94 | * |
95 | * as well! | 95 | * If you modify this structure, make sure to update blk_rq_init() and |
96 | * especially blk_mq_rq_ctx_init() to take care of the added fields. | ||
96 | */ | 97 | */ |
97 | struct request { | 98 | struct request { |
98 | struct list_head queuelist; | 99 | struct list_head queuelist; |
99 | union { | 100 | union { |
100 | struct call_single_data csd; | 101 | struct call_single_data csd; |
101 | struct work_struct mq_flush_work; | ||
102 | unsigned long fifo_time; | 102 | unsigned long fifo_time; |
103 | }; | 103 | }; |
104 | 104 | ||
@@ -178,7 +178,6 @@ struct request { | |||
178 | unsigned short ioprio; | 178 | unsigned short ioprio; |
179 | 179 | ||
180 | void *special; /* opaque pointer available for LLD use */ | 180 | void *special; /* opaque pointer available for LLD use */ |
181 | char *buffer; /* kaddr of the current segment if available */ | ||
182 | 181 | ||
183 | int tag; | 182 | int tag; |
184 | int errors; | 183 | int errors; |
@@ -463,6 +462,10 @@ struct request_queue { | |||
463 | struct request *flush_rq; | 462 | struct request *flush_rq; |
464 | spinlock_t mq_flush_lock; | 463 | spinlock_t mq_flush_lock; |
465 | 464 | ||
465 | struct list_head requeue_list; | ||
466 | spinlock_t requeue_lock; | ||
467 | struct work_struct requeue_work; | ||
468 | |||
466 | struct mutex sysfs_lock; | 469 | struct mutex sysfs_lock; |
467 | 470 | ||
468 | int bypass_depth; | 471 | int bypass_depth; |
@@ -481,6 +484,9 @@ struct request_queue { | |||
481 | wait_queue_head_t mq_freeze_wq; | 484 | wait_queue_head_t mq_freeze_wq; |
482 | struct percpu_counter mq_usage_counter; | 485 | struct percpu_counter mq_usage_counter; |
483 | struct list_head all_q_node; | 486 | struct list_head all_q_node; |
487 | |||
488 | struct blk_mq_tag_set *tag_set; | ||
489 | struct list_head tag_set_list; | ||
484 | }; | 490 | }; |
485 | 491 | ||
486 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 492 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |
@@ -613,6 +619,15 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) | |||
613 | 619 | ||
614 | #define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0) | 620 | #define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0) |
615 | 621 | ||
622 | /* | ||
623 | * Driver can handle struct request, if it either has an old style | ||
624 | * request_fn defined, or is blk-mq based. | ||
625 | */ | ||
626 | static inline bool queue_is_rq_based(struct request_queue *q) | ||
627 | { | ||
628 | return q->request_fn || q->mq_ops; | ||
629 | } | ||
630 | |||
616 | static inline unsigned int blk_queue_cluster(struct request_queue *q) | 631 | static inline unsigned int blk_queue_cluster(struct request_queue *q) |
617 | { | 632 | { |
618 | return q->limits.cluster; | 633 | return q->limits.cluster; |
@@ -937,6 +952,7 @@ extern struct request *blk_fetch_request(struct request_queue *q); | |||
937 | */ | 952 | */ |
938 | extern bool blk_update_request(struct request *rq, int error, | 953 | extern bool blk_update_request(struct request *rq, int error, |
939 | unsigned int nr_bytes); | 954 | unsigned int nr_bytes); |
955 | extern void blk_finish_request(struct request *rq, int error); | ||
940 | extern bool blk_end_request(struct request *rq, int error, | 956 | extern bool blk_end_request(struct request *rq, int error, |
941 | unsigned int nr_bytes); | 957 | unsigned int nr_bytes); |
942 | extern void blk_end_request_all(struct request *rq, int error); | 958 | extern void blk_end_request_all(struct request *rq, int error); |
@@ -1102,7 +1118,8 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) | |||
1102 | /* | 1118 | /* |
1103 | * tag stuff | 1119 | * tag stuff |
1104 | */ | 1120 | */ |
1105 | #define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED) | 1121 | #define blk_rq_tagged(rq) \ |
1122 | ((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED)) | ||
1106 | extern int blk_queue_start_tag(struct request_queue *, struct request *); | 1123 | extern int blk_queue_start_tag(struct request_queue *, struct request *); |
1107 | extern struct request *blk_queue_find_tag(struct request_queue *, int); | 1124 | extern struct request *blk_queue_find_tag(struct request_queue *, int); |
1108 | extern void blk_queue_end_tag(struct request_queue *, struct request *); | 1125 | extern void blk_queue_end_tag(struct request_queue *, struct request *); |
@@ -1370,8 +1387,9 @@ static inline void put_dev_sector(Sector p) | |||
1370 | } | 1387 | } |
1371 | 1388 | ||
1372 | struct work_struct; | 1389 | struct work_struct; |
1373 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); | 1390 | int kblockd_schedule_work(struct work_struct *work); |
1374 | int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); | 1391 | int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); |
1392 | int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); | ||
1375 | 1393 | ||
1376 | #ifdef CONFIG_BLK_CGROUP | 1394 | #ifdef CONFIG_BLK_CGROUP |
1377 | /* | 1395 | /* |
diff --git a/mm/Makefile b/mm/Makefile index b484452dac57..0173940407f6 100644 --- a/mm/Makefile +++ b/mm/Makefile | |||
@@ -30,7 +30,6 @@ endif | |||
30 | 30 | ||
31 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o | 31 | obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o |
32 | 32 | ||
33 | obj-$(CONFIG_BOUNCE) += bounce.o | ||
34 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o | 33 | obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o |
35 | obj-$(CONFIG_FRONTSWAP) += frontswap.o | 34 | obj-$(CONFIG_FRONTSWAP) += frontswap.o |
36 | obj-$(CONFIG_ZSWAP) += zswap.o | 35 | obj-$(CONFIG_ZSWAP) += zswap.o |