diff options
| author | Jens Axboe <axboe@kernel.dk> | 2013-11-08 11:08:12 -0500 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2013-11-08 11:08:12 -0500 |
| commit | e37459b8e2c7db6735e39e019e448b76e5e77647 (patch) | |
| tree | a3f0944db87a8ae0d41e5acbbbabc1e7ef534d1b | |
| parent | c7d1ba417c7cb7297d14dd47a390ec90ce548d5c (diff) | |
| parent | e7e245000110a7794de8f925b9edc06a9c852f80 (diff) | |
Merge branch 'blk-mq/core' into for-3.13/core
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Conflicts:
block/blk-timeout.c
| -rw-r--r-- | block/Makefile | 5 | ||||
| -rw-r--r-- | block/blk-core.c | 157 | ||||
| -rw-r--r-- | block/blk-exec.c | 14 | ||||
| -rw-r--r-- | block/blk-flush.c | 154 | ||||
| -rw-r--r-- | block/blk-merge.c | 17 | ||||
| -rw-r--r-- | block/blk-mq-cpu.c | 93 | ||||
| -rw-r--r-- | block/blk-mq-cpumap.c | 108 | ||||
| -rw-r--r-- | block/blk-mq-sysfs.c | 384 | ||||
| -rw-r--r-- | block/blk-mq-tag.c | 204 | ||||
| -rw-r--r-- | block/blk-mq-tag.h | 27 | ||||
| -rw-r--r-- | block/blk-mq.c | 1500 | ||||
| -rw-r--r-- | block/blk-mq.h | 52 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 13 | ||||
| -rw-r--r-- | block/blk-timeout.c | 74 | ||||
| -rw-r--r-- | block/blk.h | 17 | ||||
| -rw-r--r-- | drivers/block/Kconfig | 3 | ||||
| -rw-r--r-- | drivers/block/Makefile | 1 | ||||
| -rw-r--r-- | drivers/block/floppy.c | 4 | ||||
| -rw-r--r-- | drivers/block/null_blk.c | 635 | ||||
| -rw-r--r-- | drivers/scsi/sd.c | 2 | ||||
| -rw-r--r-- | include/linux/bio.h | 2 | ||||
| -rw-r--r-- | include/linux/blk-mq.h | 183 | ||||
| -rw-r--r-- | include/linux/blk_types.h | 68 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 60 | ||||
| -rw-r--r-- | include/linux/percpu_ida.h | 23 | ||||
| -rw-r--r-- | kernel/smp.c | 7 | ||||
| -rw-r--r-- | lib/percpu_counter.c | 15 | ||||
| -rw-r--r-- | lib/percpu_ida.c | 89 |
28 files changed, 3721 insertions, 190 deletions
diff --git a/block/Makefile b/block/Makefile index 671a83d063a5..20645e88fb57 100644 --- a/block/Makefile +++ b/block/Makefile | |||
| @@ -5,8 +5,9 @@ | |||
| 5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ | 5 | obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ |
| 6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ | 6 | blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ |
| 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ | 7 | blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ |
| 8 | blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o \ | 8 | blk-iopoll.o blk-lib.o blk-mq.o blk-mq-tag.o \ |
| 9 | partition-generic.o partitions/ | 9 | blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ |
| 10 | genhd.o scsi_ioctl.o partition-generic.o partitions/ | ||
| 10 | 11 | ||
| 11 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o | 12 | obj-$(CONFIG_BLK_DEV_BSG) += bsg.o |
| 12 | obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o | 13 | obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o |
diff --git a/block/blk-core.c b/block/blk-core.c index 25f13479f552..8bdd0121212a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/backing-dev.h> | 16 | #include <linux/backing-dev.h> |
| 17 | #include <linux/bio.h> | 17 | #include <linux/bio.h> |
| 18 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
| 19 | #include <linux/blk-mq.h> | ||
| 19 | #include <linux/highmem.h> | 20 | #include <linux/highmem.h> |
| 20 | #include <linux/mm.h> | 21 | #include <linux/mm.h> |
| 21 | #include <linux/kernel_stat.h> | 22 | #include <linux/kernel_stat.h> |
| @@ -48,7 +49,7 @@ DEFINE_IDA(blk_queue_ida); | |||
| 48 | /* | 49 | /* |
| 49 | * For the allocated request tables | 50 | * For the allocated request tables |
| 50 | */ | 51 | */ |
| 51 | static struct kmem_cache *request_cachep; | 52 | struct kmem_cache *request_cachep = NULL; |
| 52 | 53 | ||
| 53 | /* | 54 | /* |
| 54 | * For queue allocation | 55 | * For queue allocation |
| @@ -60,42 +61,6 @@ struct kmem_cache *blk_requestq_cachep; | |||
| 60 | */ | 61 | */ |
| 61 | static struct workqueue_struct *kblockd_workqueue; | 62 | static struct workqueue_struct *kblockd_workqueue; |
| 62 | 63 | ||
| 63 | static void drive_stat_acct(struct request *rq, int new_io) | ||
| 64 | { | ||
| 65 | struct hd_struct *part; | ||
| 66 | int rw = rq_data_dir(rq); | ||
| 67 | int cpu; | ||
| 68 | |||
| 69 | if (!blk_do_io_stat(rq)) | ||
| 70 | return; | ||
| 71 | |||
| 72 | cpu = part_stat_lock(); | ||
| 73 | |||
| 74 | if (!new_io) { | ||
| 75 | part = rq->part; | ||
| 76 | part_stat_inc(cpu, part, merges[rw]); | ||
| 77 | } else { | ||
| 78 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | ||
| 79 | if (!hd_struct_try_get(part)) { | ||
| 80 | /* | ||
| 81 | * The partition is already being removed, | ||
| 82 | * the request will be accounted on the disk only | ||
| 83 | * | ||
| 84 | * We take a reference on disk->part0 although that | ||
| 85 | * partition will never be deleted, so we can treat | ||
| 86 | * it as any other partition. | ||
| 87 | */ | ||
| 88 | part = &rq->rq_disk->part0; | ||
| 89 | hd_struct_get(part); | ||
| 90 | } | ||
| 91 | part_round_stats(cpu, part); | ||
| 92 | part_inc_in_flight(part, rw); | ||
| 93 | rq->part = part; | ||
| 94 | } | ||
| 95 | |||
| 96 | part_stat_unlock(); | ||
| 97 | } | ||
| 98 | |||
| 99 | void blk_queue_congestion_threshold(struct request_queue *q) | 64 | void blk_queue_congestion_threshold(struct request_queue *q) |
| 100 | { | 65 | { |
| 101 | int nr; | 66 | int nr; |
| @@ -145,7 +110,6 @@ void blk_rq_init(struct request_queue *q, struct request *rq) | |||
| 145 | rq->cmd = rq->__cmd; | 110 | rq->cmd = rq->__cmd; |
| 146 | rq->cmd_len = BLK_MAX_CDB; | 111 | rq->cmd_len = BLK_MAX_CDB; |
| 147 | rq->tag = -1; | 112 | rq->tag = -1; |
| 148 | rq->ref_count = 1; | ||
| 149 | rq->start_time = jiffies; | 113 | rq->start_time = jiffies; |
| 150 | set_start_time_ns(rq); | 114 | set_start_time_ns(rq); |
| 151 | rq->part = NULL; | 115 | rq->part = NULL; |
| @@ -174,9 +138,9 @@ void blk_dump_rq_flags(struct request *rq, char *msg) | |||
| 174 | { | 138 | { |
| 175 | int bit; | 139 | int bit; |
| 176 | 140 | ||
| 177 | printk(KERN_INFO "%s: dev %s: type=%x, flags=%x\n", msg, | 141 | printk(KERN_INFO "%s: dev %s: type=%x, flags=%llx\n", msg, |
| 178 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, | 142 | rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, |
| 179 | rq->cmd_flags); | 143 | (unsigned long long) rq->cmd_flags); |
| 180 | 144 | ||
| 181 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", | 145 | printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", |
| 182 | (unsigned long long)blk_rq_pos(rq), | 146 | (unsigned long long)blk_rq_pos(rq), |
| @@ -595,9 +559,12 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
| 595 | if (!q) | 559 | if (!q) |
| 596 | return NULL; | 560 | return NULL; |
| 597 | 561 | ||
| 562 | if (percpu_counter_init(&q->mq_usage_counter, 0)) | ||
| 563 | goto fail_q; | ||
| 564 | |||
| 598 | q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); | 565 | q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); |
| 599 | if (q->id < 0) | 566 | if (q->id < 0) |
| 600 | goto fail_q; | 567 | goto fail_c; |
| 601 | 568 | ||
| 602 | q->backing_dev_info.ra_pages = | 569 | q->backing_dev_info.ra_pages = |
| 603 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 570 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
| @@ -644,6 +611,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
| 644 | q->bypass_depth = 1; | 611 | q->bypass_depth = 1; |
| 645 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | 612 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); |
| 646 | 613 | ||
| 614 | init_waitqueue_head(&q->mq_freeze_wq); | ||
| 615 | |||
| 647 | if (blkcg_init_queue(q)) | 616 | if (blkcg_init_queue(q)) |
| 648 | goto fail_bdi; | 617 | goto fail_bdi; |
| 649 | 618 | ||
| @@ -653,6 +622,8 @@ fail_bdi: | |||
| 653 | bdi_destroy(&q->backing_dev_info); | 622 | bdi_destroy(&q->backing_dev_info); |
| 654 | fail_id: | 623 | fail_id: |
| 655 | ida_simple_remove(&blk_queue_ida, q->id); | 624 | ida_simple_remove(&blk_queue_ida, q->id); |
| 625 | fail_c: | ||
| 626 | percpu_counter_destroy(&q->mq_usage_counter); | ||
| 656 | fail_q: | 627 | fail_q: |
| 657 | kmem_cache_free(blk_requestq_cachep, q); | 628 | kmem_cache_free(blk_requestq_cachep, q); |
| 658 | return NULL; | 629 | return NULL; |
| @@ -1119,7 +1090,8 @@ retry: | |||
| 1119 | goto retry; | 1090 | goto retry; |
| 1120 | } | 1091 | } |
| 1121 | 1092 | ||
| 1122 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 1093 | static struct request *blk_old_get_request(struct request_queue *q, int rw, |
| 1094 | gfp_t gfp_mask) | ||
| 1123 | { | 1095 | { |
| 1124 | struct request *rq; | 1096 | struct request *rq; |
| 1125 | 1097 | ||
| @@ -1136,6 +1108,14 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | |||
| 1136 | 1108 | ||
| 1137 | return rq; | 1109 | return rq; |
| 1138 | } | 1110 | } |
| 1111 | |||
| 1112 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | ||
| 1113 | { | ||
| 1114 | if (q->mq_ops) | ||
| 1115 | return blk_mq_alloc_request(q, rw, gfp_mask, false); | ||
| 1116 | else | ||
| 1117 | return blk_old_get_request(q, rw, gfp_mask); | ||
| 1118 | } | ||
| 1139 | EXPORT_SYMBOL(blk_get_request); | 1119 | EXPORT_SYMBOL(blk_get_request); |
| 1140 | 1120 | ||
| 1141 | /** | 1121 | /** |
| @@ -1221,7 +1201,7 @@ EXPORT_SYMBOL(blk_requeue_request); | |||
| 1221 | static void add_acct_request(struct request_queue *q, struct request *rq, | 1201 | static void add_acct_request(struct request_queue *q, struct request *rq, |
| 1222 | int where) | 1202 | int where) |
| 1223 | { | 1203 | { |
| 1224 | drive_stat_acct(rq, 1); | 1204 | blk_account_io_start(rq, true); |
| 1225 | __elv_add_request(q, rq, where); | 1205 | __elv_add_request(q, rq, where); |
| 1226 | } | 1206 | } |
| 1227 | 1207 | ||
| @@ -1282,8 +1262,6 @@ void __blk_put_request(struct request_queue *q, struct request *req) | |||
| 1282 | { | 1262 | { |
| 1283 | if (unlikely(!q)) | 1263 | if (unlikely(!q)) |
| 1284 | return; | 1264 | return; |
| 1285 | if (unlikely(--req->ref_count)) | ||
| 1286 | return; | ||
| 1287 | 1265 | ||
| 1288 | blk_pm_put_request(req); | 1266 | blk_pm_put_request(req); |
| 1289 | 1267 | ||
| @@ -1312,12 +1290,17 @@ EXPORT_SYMBOL_GPL(__blk_put_request); | |||
| 1312 | 1290 | ||
| 1313 | void blk_put_request(struct request *req) | 1291 | void blk_put_request(struct request *req) |
| 1314 | { | 1292 | { |
| 1315 | unsigned long flags; | ||
| 1316 | struct request_queue *q = req->q; | 1293 | struct request_queue *q = req->q; |
| 1317 | 1294 | ||
| 1318 | spin_lock_irqsave(q->queue_lock, flags); | 1295 | if (q->mq_ops) |
| 1319 | __blk_put_request(q, req); | 1296 | blk_mq_free_request(req); |
| 1320 | spin_unlock_irqrestore(q->queue_lock, flags); | 1297 | else { |
| 1298 | unsigned long flags; | ||
| 1299 | |||
| 1300 | spin_lock_irqsave(q->queue_lock, flags); | ||
| 1301 | __blk_put_request(q, req); | ||
| 1302 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
| 1303 | } | ||
| 1321 | } | 1304 | } |
| 1322 | EXPORT_SYMBOL(blk_put_request); | 1305 | EXPORT_SYMBOL(blk_put_request); |
| 1323 | 1306 | ||
| @@ -1353,8 +1336,8 @@ void blk_add_request_payload(struct request *rq, struct page *page, | |||
| 1353 | } | 1336 | } |
| 1354 | EXPORT_SYMBOL_GPL(blk_add_request_payload); | 1337 | EXPORT_SYMBOL_GPL(blk_add_request_payload); |
| 1355 | 1338 | ||
| 1356 | static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, | 1339 | bool bio_attempt_back_merge(struct request_queue *q, struct request *req, |
| 1357 | struct bio *bio) | 1340 | struct bio *bio) |
| 1358 | { | 1341 | { |
| 1359 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; | 1342 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; |
| 1360 | 1343 | ||
| @@ -1371,12 +1354,12 @@ static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, | |||
| 1371 | req->__data_len += bio->bi_size; | 1354 | req->__data_len += bio->bi_size; |
| 1372 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | 1355 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
| 1373 | 1356 | ||
| 1374 | drive_stat_acct(req, 0); | 1357 | blk_account_io_start(req, false); |
| 1375 | return true; | 1358 | return true; |
| 1376 | } | 1359 | } |
| 1377 | 1360 | ||
| 1378 | static bool bio_attempt_front_merge(struct request_queue *q, | 1361 | bool bio_attempt_front_merge(struct request_queue *q, struct request *req, |
| 1379 | struct request *req, struct bio *bio) | 1362 | struct bio *bio) |
| 1380 | { | 1363 | { |
| 1381 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; | 1364 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; |
| 1382 | 1365 | ||
| @@ -1401,12 +1384,12 @@ static bool bio_attempt_front_merge(struct request_queue *q, | |||
| 1401 | req->__data_len += bio->bi_size; | 1384 | req->__data_len += bio->bi_size; |
| 1402 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | 1385 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); |
| 1403 | 1386 | ||
| 1404 | drive_stat_acct(req, 0); | 1387 | blk_account_io_start(req, false); |
| 1405 | return true; | 1388 | return true; |
| 1406 | } | 1389 | } |
| 1407 | 1390 | ||
| 1408 | /** | 1391 | /** |
| 1409 | * attempt_plug_merge - try to merge with %current's plugged list | 1392 | * blk_attempt_plug_merge - try to merge with %current's plugged list |
| 1410 | * @q: request_queue new bio is being queued at | 1393 | * @q: request_queue new bio is being queued at |
| 1411 | * @bio: new bio being queued | 1394 | * @bio: new bio being queued |
| 1412 | * @request_count: out parameter for number of traversed plugged requests | 1395 | * @request_count: out parameter for number of traversed plugged requests |
| @@ -1422,12 +1405,13 @@ static bool bio_attempt_front_merge(struct request_queue *q, | |||
| 1422 | * reliable access to the elevator outside queue lock. Only check basic | 1405 | * reliable access to the elevator outside queue lock. Only check basic |
| 1423 | * merging parameters without querying the elevator. | 1406 | * merging parameters without querying the elevator. |
| 1424 | */ | 1407 | */ |
| 1425 | static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, | 1408 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, |
| 1426 | unsigned int *request_count) | 1409 | unsigned int *request_count) |
| 1427 | { | 1410 | { |
| 1428 | struct blk_plug *plug; | 1411 | struct blk_plug *plug; |
| 1429 | struct request *rq; | 1412 | struct request *rq; |
| 1430 | bool ret = false; | 1413 | bool ret = false; |
| 1414 | struct list_head *plug_list; | ||
| 1431 | 1415 | ||
| 1432 | if (blk_queue_nomerges(q)) | 1416 | if (blk_queue_nomerges(q)) |
| 1433 | goto out; | 1417 | goto out; |
| @@ -1437,7 +1421,12 @@ static bool attempt_plug_merge(struct request_queue *q, struct bio *bio, | |||
| 1437 | goto out; | 1421 | goto out; |
| 1438 | *request_count = 0; | 1422 | *request_count = 0; |
| 1439 | 1423 | ||
| 1440 | list_for_each_entry_reverse(rq, &plug->list, queuelist) { | 1424 | if (q->mq_ops) |
| 1425 | plug_list = &plug->mq_list; | ||
| 1426 | else | ||
| 1427 | plug_list = &plug->list; | ||
| 1428 | |||
| 1429 | list_for_each_entry_reverse(rq, plug_list, queuelist) { | ||
| 1441 | int el_ret; | 1430 | int el_ret; |
| 1442 | 1431 | ||
| 1443 | if (rq->q == q) | 1432 | if (rq->q == q) |
| @@ -1505,7 +1494,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) | |||
| 1505 | * Check if we can merge with the plugged list before grabbing | 1494 | * Check if we can merge with the plugged list before grabbing |
| 1506 | * any locks. | 1495 | * any locks. |
| 1507 | */ | 1496 | */ |
| 1508 | if (attempt_plug_merge(q, bio, &request_count)) | 1497 | if (blk_attempt_plug_merge(q, bio, &request_count)) |
| 1509 | return; | 1498 | return; |
| 1510 | 1499 | ||
| 1511 | spin_lock_irq(q->queue_lock); | 1500 | spin_lock_irq(q->queue_lock); |
| @@ -1573,7 +1562,7 @@ get_rq: | |||
| 1573 | } | 1562 | } |
| 1574 | } | 1563 | } |
| 1575 | list_add_tail(&req->queuelist, &plug->list); | 1564 | list_add_tail(&req->queuelist, &plug->list); |
| 1576 | drive_stat_acct(req, 1); | 1565 | blk_account_io_start(req, true); |
| 1577 | } else { | 1566 | } else { |
| 1578 | spin_lock_irq(q->queue_lock); | 1567 | spin_lock_irq(q->queue_lock); |
| 1579 | add_acct_request(q, req, where); | 1568 | add_acct_request(q, req, where); |
| @@ -2027,7 +2016,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq) | |||
| 2027 | } | 2016 | } |
| 2028 | EXPORT_SYMBOL_GPL(blk_rq_err_bytes); | 2017 | EXPORT_SYMBOL_GPL(blk_rq_err_bytes); |
| 2029 | 2018 | ||
| 2030 | static void blk_account_io_completion(struct request *req, unsigned int bytes) | 2019 | void blk_account_io_completion(struct request *req, unsigned int bytes) |
| 2031 | { | 2020 | { |
| 2032 | if (blk_do_io_stat(req)) { | 2021 | if (blk_do_io_stat(req)) { |
| 2033 | const int rw = rq_data_dir(req); | 2022 | const int rw = rq_data_dir(req); |
| @@ -2041,7 +2030,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) | |||
| 2041 | } | 2030 | } |
| 2042 | } | 2031 | } |
| 2043 | 2032 | ||
| 2044 | static void blk_account_io_done(struct request *req) | 2033 | void blk_account_io_done(struct request *req) |
| 2045 | { | 2034 | { |
| 2046 | /* | 2035 | /* |
| 2047 | * Account IO completion. flush_rq isn't accounted as a | 2036 | * Account IO completion. flush_rq isn't accounted as a |
| @@ -2089,6 +2078,42 @@ static inline struct request *blk_pm_peek_request(struct request_queue *q, | |||
| 2089 | } | 2078 | } |
| 2090 | #endif | 2079 | #endif |
| 2091 | 2080 | ||
| 2081 | void blk_account_io_start(struct request *rq, bool new_io) | ||
| 2082 | { | ||
| 2083 | struct hd_struct *part; | ||
| 2084 | int rw = rq_data_dir(rq); | ||
| 2085 | int cpu; | ||
| 2086 | |||
| 2087 | if (!blk_do_io_stat(rq)) | ||
| 2088 | return; | ||
| 2089 | |||
| 2090 | cpu = part_stat_lock(); | ||
| 2091 | |||
| 2092 | if (!new_io) { | ||
| 2093 | part = rq->part; | ||
| 2094 | part_stat_inc(cpu, part, merges[rw]); | ||
| 2095 | } else { | ||
| 2096 | part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); | ||
| 2097 | if (!hd_struct_try_get(part)) { | ||
| 2098 | /* | ||
| 2099 | * The partition is already being removed, | ||
| 2100 | * the request will be accounted on the disk only | ||
| 2101 | * | ||
| 2102 | * We take a reference on disk->part0 although that | ||
| 2103 | * partition will never be deleted, so we can treat | ||
| 2104 | * it as any other partition. | ||
| 2105 | */ | ||
| 2106 | part = &rq->rq_disk->part0; | ||
| 2107 | hd_struct_get(part); | ||
| 2108 | } | ||
| 2109 | part_round_stats(cpu, part); | ||
| 2110 | part_inc_in_flight(part, rw); | ||
| 2111 | rq->part = part; | ||
| 2112 | } | ||
| 2113 | |||
| 2114 | part_stat_unlock(); | ||
| 2115 | } | ||
| 2116 | |||
| 2092 | /** | 2117 | /** |
| 2093 | * blk_peek_request - peek at the top of a request queue | 2118 | * blk_peek_request - peek at the top of a request queue |
| 2094 | * @q: request queue to peek at | 2119 | * @q: request queue to peek at |
| @@ -2465,7 +2490,6 @@ static void blk_finish_request(struct request *req, int error) | |||
| 2465 | if (req->cmd_flags & REQ_DONTPREP) | 2490 | if (req->cmd_flags & REQ_DONTPREP) |
| 2466 | blk_unprep_request(req); | 2491 | blk_unprep_request(req); |
| 2467 | 2492 | ||
| 2468 | |||
| 2469 | blk_account_io_done(req); | 2493 | blk_account_io_done(req); |
| 2470 | 2494 | ||
| 2471 | if (req->end_io) | 2495 | if (req->end_io) |
| @@ -2887,6 +2911,7 @@ void blk_start_plug(struct blk_plug *plug) | |||
| 2887 | 2911 | ||
| 2888 | plug->magic = PLUG_MAGIC; | 2912 | plug->magic = PLUG_MAGIC; |
| 2889 | INIT_LIST_HEAD(&plug->list); | 2913 | INIT_LIST_HEAD(&plug->list); |
| 2914 | INIT_LIST_HEAD(&plug->mq_list); | ||
| 2890 | INIT_LIST_HEAD(&plug->cb_list); | 2915 | INIT_LIST_HEAD(&plug->cb_list); |
| 2891 | 2916 | ||
| 2892 | /* | 2917 | /* |
| @@ -2984,6 +3009,10 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | |||
| 2984 | BUG_ON(plug->magic != PLUG_MAGIC); | 3009 | BUG_ON(plug->magic != PLUG_MAGIC); |
| 2985 | 3010 | ||
| 2986 | flush_plug_callbacks(plug, from_schedule); | 3011 | flush_plug_callbacks(plug, from_schedule); |
| 3012 | |||
| 3013 | if (!list_empty(&plug->mq_list)) | ||
| 3014 | blk_mq_flush_plug_list(plug, from_schedule); | ||
| 3015 | |||
| 2987 | if (list_empty(&plug->list)) | 3016 | if (list_empty(&plug->list)) |
| 2988 | return; | 3017 | return; |
| 2989 | 3018 | ||
diff --git a/block/blk-exec.c b/block/blk-exec.c index ae4f27d7944e..c3edf9dff566 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
| @@ -5,6 +5,7 @@ | |||
| 5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
| 6 | #include <linux/bio.h> | 6 | #include <linux/bio.h> |
| 7 | #include <linux/blkdev.h> | 7 | #include <linux/blkdev.h> |
| 8 | #include <linux/blk-mq.h> | ||
| 8 | #include <linux/sched/sysctl.h> | 9 | #include <linux/sched/sysctl.h> |
| 9 | 10 | ||
| 10 | #include "blk.h" | 11 | #include "blk.h" |
| @@ -24,7 +25,6 @@ static void blk_end_sync_rq(struct request *rq, int error) | |||
| 24 | struct completion *waiting = rq->end_io_data; | 25 | struct completion *waiting = rq->end_io_data; |
| 25 | 26 | ||
| 26 | rq->end_io_data = NULL; | 27 | rq->end_io_data = NULL; |
| 27 | __blk_put_request(rq->q, rq); | ||
| 28 | 28 | ||
| 29 | /* | 29 | /* |
| 30 | * complete last, if this is a stack request the process (and thus | 30 | * complete last, if this is a stack request the process (and thus |
| @@ -59,6 +59,12 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
| 59 | 59 | ||
| 60 | rq->rq_disk = bd_disk; | 60 | rq->rq_disk = bd_disk; |
| 61 | rq->end_io = done; | 61 | rq->end_io = done; |
| 62 | |||
| 63 | if (q->mq_ops) { | ||
| 64 | blk_mq_insert_request(q, rq, true); | ||
| 65 | return; | ||
| 66 | } | ||
| 67 | |||
| 62 | /* | 68 | /* |
| 63 | * need to check this before __blk_run_queue(), because rq can | 69 | * need to check this before __blk_run_queue(), because rq can |
| 64 | * be freed before that returns. | 70 | * be freed before that returns. |
| @@ -103,12 +109,6 @@ int blk_execute_rq(struct request_queue *q, struct gendisk *bd_disk, | |||
| 103 | int err = 0; | 109 | int err = 0; |
| 104 | unsigned long hang_check; | 110 | unsigned long hang_check; |
| 105 | 111 | ||
| 106 | /* | ||
| 107 | * we need an extra reference to the request, so we can look at | ||
| 108 | * it after io completion | ||
| 109 | */ | ||
| 110 | rq->ref_count++; | ||
| 111 | |||
| 112 | if (!rq->sense) { | 112 | if (!rq->sense) { |
| 113 | memset(sense, 0, sizeof(sense)); | 113 | memset(sense, 0, sizeof(sense)); |
| 114 | rq->sense = sense; | 114 | rq->sense = sense; |
diff --git a/block/blk-flush.c b/block/blk-flush.c index cc2b827a853c..331e627301ea 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
| @@ -69,8 +69,10 @@ | |||
| 69 | #include <linux/bio.h> | 69 | #include <linux/bio.h> |
| 70 | #include <linux/blkdev.h> | 70 | #include <linux/blkdev.h> |
| 71 | #include <linux/gfp.h> | 71 | #include <linux/gfp.h> |
| 72 | #include <linux/blk-mq.h> | ||
| 72 | 73 | ||
| 73 | #include "blk.h" | 74 | #include "blk.h" |
| 75 | #include "blk-mq.h" | ||
| 74 | 76 | ||
| 75 | /* FLUSH/FUA sequences */ | 77 | /* FLUSH/FUA sequences */ |
| 76 | enum { | 78 | enum { |
| @@ -124,6 +126,24 @@ static void blk_flush_restore_request(struct request *rq) | |||
| 124 | /* make @rq a normal request */ | 126 | /* make @rq a normal request */ |
| 125 | rq->cmd_flags &= ~REQ_FLUSH_SEQ; | 127 | rq->cmd_flags &= ~REQ_FLUSH_SEQ; |
| 126 | rq->end_io = rq->flush.saved_end_io; | 128 | rq->end_io = rq->flush.saved_end_io; |
| 129 | |||
| 130 | blk_clear_rq_complete(rq); | ||
| 131 | } | ||
| 132 | |||
| 133 | static void mq_flush_data_run(struct work_struct *work) | ||
| 134 | { | ||
| 135 | struct request *rq; | ||
| 136 | |||
| 137 | rq = container_of(work, struct request, mq_flush_data); | ||
| 138 | |||
| 139 | memset(&rq->csd, 0, sizeof(rq->csd)); | ||
| 140 | blk_mq_run_request(rq, true, false); | ||
| 141 | } | ||
| 142 | |||
| 143 | static void blk_mq_flush_data_insert(struct request *rq) | ||
| 144 | { | ||
| 145 | INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); | ||
| 146 | kblockd_schedule_work(rq->q, &rq->mq_flush_data); | ||
| 127 | } | 147 | } |
| 128 | 148 | ||
| 129 | /** | 149 | /** |
| @@ -136,7 +156,7 @@ static void blk_flush_restore_request(struct request *rq) | |||
| 136 | * completion and trigger the next step. | 156 | * completion and trigger the next step. |
| 137 | * | 157 | * |
| 138 | * CONTEXT: | 158 | * CONTEXT: |
| 139 | * spin_lock_irq(q->queue_lock) | 159 | * spin_lock_irq(q->queue_lock or q->mq_flush_lock) |
| 140 | * | 160 | * |
| 141 | * RETURNS: | 161 | * RETURNS: |
| 142 | * %true if requests were added to the dispatch queue, %false otherwise. | 162 | * %true if requests were added to the dispatch queue, %false otherwise. |
| @@ -146,7 +166,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, | |||
| 146 | { | 166 | { |
| 147 | struct request_queue *q = rq->q; | 167 | struct request_queue *q = rq->q; |
| 148 | struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; | 168 | struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; |
| 149 | bool queued = false; | 169 | bool queued = false, kicked; |
| 150 | 170 | ||
| 151 | BUG_ON(rq->flush.seq & seq); | 171 | BUG_ON(rq->flush.seq & seq); |
| 152 | rq->flush.seq |= seq; | 172 | rq->flush.seq |= seq; |
| @@ -167,8 +187,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, | |||
| 167 | 187 | ||
| 168 | case REQ_FSEQ_DATA: | 188 | case REQ_FSEQ_DATA: |
| 169 | list_move_tail(&rq->flush.list, &q->flush_data_in_flight); | 189 | list_move_tail(&rq->flush.list, &q->flush_data_in_flight); |
| 170 | list_add(&rq->queuelist, &q->queue_head); | 190 | if (q->mq_ops) |
| 171 | queued = true; | 191 | blk_mq_flush_data_insert(rq); |
| 192 | else { | ||
| 193 | list_add(&rq->queuelist, &q->queue_head); | ||
| 194 | queued = true; | ||
| 195 | } | ||
| 172 | break; | 196 | break; |
| 173 | 197 | ||
| 174 | case REQ_FSEQ_DONE: | 198 | case REQ_FSEQ_DONE: |
| @@ -181,28 +205,43 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, | |||
| 181 | BUG_ON(!list_empty(&rq->queuelist)); | 205 | BUG_ON(!list_empty(&rq->queuelist)); |
| 182 | list_del_init(&rq->flush.list); | 206 | list_del_init(&rq->flush.list); |
| 183 | blk_flush_restore_request(rq); | 207 | blk_flush_restore_request(rq); |
| 184 | __blk_end_request_all(rq, error); | 208 | if (q->mq_ops) |
| 209 | blk_mq_end_io(rq, error); | ||
| 210 | else | ||
| 211 | __blk_end_request_all(rq, error); | ||
| 185 | break; | 212 | break; |
| 186 | 213 | ||
| 187 | default: | 214 | default: |
| 188 | BUG(); | 215 | BUG(); |
| 189 | } | 216 | } |
| 190 | 217 | ||
| 191 | return blk_kick_flush(q) | queued; | 218 | kicked = blk_kick_flush(q); |
| 219 | /* blk_mq_run_flush will run queue */ | ||
| 220 | if (q->mq_ops) | ||
| 221 | return queued; | ||
| 222 | return kicked | queued; | ||
| 192 | } | 223 | } |
| 193 | 224 | ||
| 194 | static void flush_end_io(struct request *flush_rq, int error) | 225 | static void flush_end_io(struct request *flush_rq, int error) |
| 195 | { | 226 | { |
| 196 | struct request_queue *q = flush_rq->q; | 227 | struct request_queue *q = flush_rq->q; |
| 197 | struct list_head *running = &q->flush_queue[q->flush_running_idx]; | 228 | struct list_head *running; |
| 198 | bool queued = false; | 229 | bool queued = false; |
| 199 | struct request *rq, *n; | 230 | struct request *rq, *n; |
| 231 | unsigned long flags = 0; | ||
| 200 | 232 | ||
| 233 | if (q->mq_ops) { | ||
| 234 | blk_mq_free_request(flush_rq); | ||
| 235 | spin_lock_irqsave(&q->mq_flush_lock, flags); | ||
| 236 | } | ||
| 237 | running = &q->flush_queue[q->flush_running_idx]; | ||
| 201 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); | 238 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); |
| 202 | 239 | ||
| 203 | /* account completion of the flush request */ | 240 | /* account completion of the flush request */ |
| 204 | q->flush_running_idx ^= 1; | 241 | q->flush_running_idx ^= 1; |
| 205 | elv_completed_request(q, flush_rq); | 242 | |
| 243 | if (!q->mq_ops) | ||
| 244 | elv_completed_request(q, flush_rq); | ||
| 206 | 245 | ||
| 207 | /* and push the waiting requests to the next stage */ | 246 | /* and push the waiting requests to the next stage */ |
| 208 | list_for_each_entry_safe(rq, n, running, flush.list) { | 247 | list_for_each_entry_safe(rq, n, running, flush.list) { |
| @@ -223,9 +262,48 @@ static void flush_end_io(struct request *flush_rq, int error) | |||
| 223 | * directly into request_fn may confuse the driver. Always use | 262 | * directly into request_fn may confuse the driver. Always use |
| 224 | * kblockd. | 263 | * kblockd. |
| 225 | */ | 264 | */ |
| 226 | if (queued || q->flush_queue_delayed) | 265 | if (queued || q->flush_queue_delayed) { |
| 227 | blk_run_queue_async(q); | 266 | if (!q->mq_ops) |
| 267 | blk_run_queue_async(q); | ||
| 268 | else | ||
| 269 | /* | ||
| 270 | * This can be optimized to only run queues with requests | ||
| 271 | * queued if necessary. | ||
| 272 | */ | ||
| 273 | blk_mq_run_queues(q, true); | ||
| 274 | } | ||
| 228 | q->flush_queue_delayed = 0; | 275 | q->flush_queue_delayed = 0; |
| 276 | if (q->mq_ops) | ||
| 277 | spin_unlock_irqrestore(&q->mq_flush_lock, flags); | ||
| 278 | } | ||
| 279 | |||
| 280 | static void mq_flush_work(struct work_struct *work) | ||
| 281 | { | ||
| 282 | struct request_queue *q; | ||
| 283 | struct request *rq; | ||
| 284 | |||
| 285 | q = container_of(work, struct request_queue, mq_flush_work); | ||
| 286 | |||
| 287 | /* We don't need set REQ_FLUSH_SEQ, it's for consistency */ | ||
| 288 | rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, | ||
| 289 | __GFP_WAIT|GFP_ATOMIC, true); | ||
| 290 | rq->cmd_type = REQ_TYPE_FS; | ||
| 291 | rq->end_io = flush_end_io; | ||
| 292 | |||
| 293 | blk_mq_run_request(rq, true, false); | ||
| 294 | } | ||
| 295 | |||
| 296 | /* | ||
| 297 | * We can't directly use q->flush_rq, because it doesn't have tag and is not in | ||
| 298 | * hctx->rqs[]. so we must allocate a new request, since we can't sleep here, | ||
| 299 | * so offload the work to workqueue. | ||
| 300 | * | ||
| 301 | * Note: we assume a flush request finished in any hardware queue will flush | ||
| 302 | * the whole disk cache. | ||
| 303 | */ | ||
| 304 | static void mq_run_flush(struct request_queue *q) | ||
| 305 | { | ||
| 306 | kblockd_schedule_work(q, &q->mq_flush_work); | ||
| 229 | } | 307 | } |
| 230 | 308 | ||
| 231 | /** | 309 | /** |
| @@ -236,7 +314,7 @@ static void flush_end_io(struct request *flush_rq, int error) | |||
| 236 | * Please read the comment at the top of this file for more info. | 314 | * Please read the comment at the top of this file for more info. |
| 237 | * | 315 | * |
| 238 | * CONTEXT: | 316 | * CONTEXT: |
| 239 | * spin_lock_irq(q->queue_lock) | 317 | * spin_lock_irq(q->queue_lock or q->mq_flush_lock) |
| 240 | * | 318 | * |
| 241 | * RETURNS: | 319 | * RETURNS: |
| 242 | * %true if flush was issued, %false otherwise. | 320 | * %true if flush was issued, %false otherwise. |
| @@ -261,13 +339,18 @@ static bool blk_kick_flush(struct request_queue *q) | |||
| 261 | * Issue flush and toggle pending_idx. This makes pending_idx | 339 | * Issue flush and toggle pending_idx. This makes pending_idx |
| 262 | * different from running_idx, which means flush is in flight. | 340 | * different from running_idx, which means flush is in flight. |
| 263 | */ | 341 | */ |
| 342 | q->flush_pending_idx ^= 1; | ||
| 343 | if (q->mq_ops) { | ||
| 344 | mq_run_flush(q); | ||
| 345 | return true; | ||
| 346 | } | ||
| 347 | |||
| 264 | blk_rq_init(q, &q->flush_rq); | 348 | blk_rq_init(q, &q->flush_rq); |
| 265 | q->flush_rq.cmd_type = REQ_TYPE_FS; | 349 | q->flush_rq.cmd_type = REQ_TYPE_FS; |
| 266 | q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; | 350 | q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; |
| 267 | q->flush_rq.rq_disk = first_rq->rq_disk; | 351 | q->flush_rq.rq_disk = first_rq->rq_disk; |
| 268 | q->flush_rq.end_io = flush_end_io; | 352 | q->flush_rq.end_io = flush_end_io; |
| 269 | 353 | ||
| 270 | q->flush_pending_idx ^= 1; | ||
| 271 | list_add_tail(&q->flush_rq.queuelist, &q->queue_head); | 354 | list_add_tail(&q->flush_rq.queuelist, &q->queue_head); |
| 272 | return true; | 355 | return true; |
| 273 | } | 356 | } |
| @@ -284,16 +367,37 @@ static void flush_data_end_io(struct request *rq, int error) | |||
| 284 | blk_run_queue_async(q); | 367 | blk_run_queue_async(q); |
| 285 | } | 368 | } |
| 286 | 369 | ||
| 370 | static void mq_flush_data_end_io(struct request *rq, int error) | ||
| 371 | { | ||
| 372 | struct request_queue *q = rq->q; | ||
| 373 | struct blk_mq_hw_ctx *hctx; | ||
| 374 | struct blk_mq_ctx *ctx; | ||
| 375 | unsigned long flags; | ||
| 376 | |||
| 377 | ctx = rq->mq_ctx; | ||
| 378 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
| 379 | |||
| 380 | /* | ||
| 381 | * After populating an empty queue, kick it to avoid stall. Read | ||
| 382 | * the comment in flush_end_io(). | ||
| 383 | */ | ||
| 384 | spin_lock_irqsave(&q->mq_flush_lock, flags); | ||
| 385 | if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) | ||
| 386 | blk_mq_run_hw_queue(hctx, true); | ||
| 387 | spin_unlock_irqrestore(&q->mq_flush_lock, flags); | ||
| 388 | } | ||
| 389 | |||
| 287 | /** | 390 | /** |
| 288 | * blk_insert_flush - insert a new FLUSH/FUA request | 391 | * blk_insert_flush - insert a new FLUSH/FUA request |
| 289 | * @rq: request to insert | 392 | * @rq: request to insert |
| 290 | * | 393 | * |
| 291 | * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions. | 394 | * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions. |
| 395 | * or __blk_mq_run_hw_queue() to dispatch request. | ||
| 292 | * @rq is being submitted. Analyze what needs to be done and put it on the | 396 | * @rq is being submitted. Analyze what needs to be done and put it on the |
| 293 | * right queue. | 397 | * right queue. |
| 294 | * | 398 | * |
| 295 | * CONTEXT: | 399 | * CONTEXT: |
| 296 | * spin_lock_irq(q->queue_lock) | 400 | * spin_lock_irq(q->queue_lock) in !mq case |
| 297 | */ | 401 | */ |
| 298 | void blk_insert_flush(struct request *rq) | 402 | void blk_insert_flush(struct request *rq) |
| 299 | { | 403 | { |
| @@ -316,7 +420,10 @@ void blk_insert_flush(struct request *rq) | |||
| 316 | * complete the request. | 420 | * complete the request. |
| 317 | */ | 421 | */ |
| 318 | if (!policy) { | 422 | if (!policy) { |
| 319 | __blk_end_bidi_request(rq, 0, 0, 0); | 423 | if (q->mq_ops) |
| 424 | blk_mq_end_io(rq, 0); | ||
| 425 | else | ||
| 426 | __blk_end_bidi_request(rq, 0, 0, 0); | ||
| 320 | return; | 427 | return; |
| 321 | } | 428 | } |
| 322 | 429 | ||
| @@ -329,7 +436,10 @@ void blk_insert_flush(struct request *rq) | |||
| 329 | */ | 436 | */ |
| 330 | if ((policy & REQ_FSEQ_DATA) && | 437 | if ((policy & REQ_FSEQ_DATA) && |
| 331 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { | 438 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { |
| 332 | list_add_tail(&rq->queuelist, &q->queue_head); | 439 | if (q->mq_ops) { |
| 440 | blk_mq_run_request(rq, false, true); | ||
| 441 | } else | ||
| 442 | list_add_tail(&rq->queuelist, &q->queue_head); | ||
| 333 | return; | 443 | return; |
| 334 | } | 444 | } |
| 335 | 445 | ||
| @@ -341,6 +451,14 @@ void blk_insert_flush(struct request *rq) | |||
| 341 | INIT_LIST_HEAD(&rq->flush.list); | 451 | INIT_LIST_HEAD(&rq->flush.list); |
| 342 | rq->cmd_flags |= REQ_FLUSH_SEQ; | 452 | rq->cmd_flags |= REQ_FLUSH_SEQ; |
| 343 | rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ | 453 | rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ |
| 454 | if (q->mq_ops) { | ||
| 455 | rq->end_io = mq_flush_data_end_io; | ||
| 456 | |||
| 457 | spin_lock_irq(&q->mq_flush_lock); | ||
| 458 | blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); | ||
| 459 | spin_unlock_irq(&q->mq_flush_lock); | ||
| 460 | return; | ||
| 461 | } | ||
| 344 | rq->end_io = flush_data_end_io; | 462 | rq->end_io = flush_data_end_io; |
| 345 | 463 | ||
| 346 | blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); | 464 | blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); |
| @@ -453,3 +571,9 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, | |||
| 453 | return ret; | 571 | return ret; |
| 454 | } | 572 | } |
| 455 | EXPORT_SYMBOL(blkdev_issue_flush); | 573 | EXPORT_SYMBOL(blkdev_issue_flush); |
| 574 | |||
| 575 | void blk_mq_init_flush(struct request_queue *q) | ||
| 576 | { | ||
| 577 | spin_lock_init(&q->mq_flush_lock); | ||
| 578 | INIT_WORK(&q->mq_flush_work, mq_flush_work); | ||
| 579 | } | ||
diff --git a/block/blk-merge.c b/block/blk-merge.c index 5f2448253797..1ffc58977835 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
| @@ -308,6 +308,17 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, | |||
| 308 | return ll_new_hw_segment(q, req, bio); | 308 | return ll_new_hw_segment(q, req, bio); |
| 309 | } | 309 | } |
| 310 | 310 | ||
| 311 | /* | ||
| 312 | * blk-mq uses req->special to carry normal driver per-request payload, it | ||
| 313 | * does not indicate a prepared command that we cannot merge with. | ||
| 314 | */ | ||
| 315 | static bool req_no_special_merge(struct request *req) | ||
| 316 | { | ||
| 317 | struct request_queue *q = req->q; | ||
| 318 | |||
| 319 | return !q->mq_ops && req->special; | ||
| 320 | } | ||
| 321 | |||
| 311 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | 322 | static int ll_merge_requests_fn(struct request_queue *q, struct request *req, |
| 312 | struct request *next) | 323 | struct request *next) |
| 313 | { | 324 | { |
| @@ -319,7 +330,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | |||
| 319 | * First check if the either of the requests are re-queued | 330 | * First check if the either of the requests are re-queued |
| 320 | * requests. Can't merge them if they are. | 331 | * requests. Can't merge them if they are. |
| 321 | */ | 332 | */ |
| 322 | if (req->special || next->special) | 333 | if (req_no_special_merge(req) || req_no_special_merge(next)) |
| 323 | return 0; | 334 | return 0; |
| 324 | 335 | ||
| 325 | /* | 336 | /* |
| @@ -416,7 +427,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, | |||
| 416 | 427 | ||
| 417 | if (rq_data_dir(req) != rq_data_dir(next) | 428 | if (rq_data_dir(req) != rq_data_dir(next) |
| 418 | || req->rq_disk != next->rq_disk | 429 | || req->rq_disk != next->rq_disk |
| 419 | || next->special) | 430 | || req_no_special_merge(next)) |
| 420 | return 0; | 431 | return 0; |
| 421 | 432 | ||
| 422 | if (req->cmd_flags & REQ_WRITE_SAME && | 433 | if (req->cmd_flags & REQ_WRITE_SAME && |
| @@ -515,7 +526,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) | |||
| 515 | return false; | 526 | return false; |
| 516 | 527 | ||
| 517 | /* must be same device and not a special request */ | 528 | /* must be same device and not a special request */ |
| 518 | if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) | 529 | if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) |
| 519 | return false; | 530 | return false; |
| 520 | 531 | ||
| 521 | /* only merge integrity protected bio into ditto rq */ | 532 | /* only merge integrity protected bio into ditto rq */ |
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c new file mode 100644 index 000000000000..f8ea39d7ae54 --- /dev/null +++ b/block/blk-mq-cpu.c | |||
| @@ -0,0 +1,93 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/init.h> | ||
| 4 | #include <linux/blkdev.h> | ||
| 5 | #include <linux/list.h> | ||
| 6 | #include <linux/llist.h> | ||
| 7 | #include <linux/smp.h> | ||
| 8 | #include <linux/cpu.h> | ||
| 9 | |||
| 10 | #include <linux/blk-mq.h> | ||
| 11 | #include "blk-mq.h" | ||
| 12 | |||
| 13 | static LIST_HEAD(blk_mq_cpu_notify_list); | ||
| 14 | static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock); | ||
| 15 | |||
| 16 | static int __cpuinit blk_mq_main_cpu_notify(struct notifier_block *self, | ||
| 17 | unsigned long action, void *hcpu) | ||
| 18 | { | ||
| 19 | unsigned int cpu = (unsigned long) hcpu; | ||
| 20 | struct blk_mq_cpu_notifier *notify; | ||
| 21 | |||
| 22 | spin_lock(&blk_mq_cpu_notify_lock); | ||
| 23 | |||
| 24 | list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) | ||
| 25 | notify->notify(notify->data, action, cpu); | ||
| 26 | |||
| 27 | spin_unlock(&blk_mq_cpu_notify_lock); | ||
| 28 | return NOTIFY_OK; | ||
| 29 | } | ||
| 30 | |||
| 31 | static void __cpuinit blk_mq_cpu_notify(void *data, unsigned long action, | ||
| 32 | unsigned int cpu) | ||
| 33 | { | ||
| 34 | if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { | ||
| 35 | /* | ||
| 36 | * If the CPU goes away, ensure that we run any pending | ||
| 37 | * completions. | ||
| 38 | */ | ||
| 39 | struct llist_node *node; | ||
| 40 | struct request *rq; | ||
| 41 | |||
| 42 | local_irq_disable(); | ||
| 43 | |||
| 44 | node = llist_del_all(&per_cpu(ipi_lists, cpu)); | ||
| 45 | while (node) { | ||
| 46 | struct llist_node *next = node->next; | ||
| 47 | |||
| 48 | rq = llist_entry(node, struct request, ll_list); | ||
| 49 | __blk_mq_end_io(rq, rq->errors); | ||
| 50 | node = next; | ||
| 51 | } | ||
| 52 | |||
| 53 | local_irq_enable(); | ||
| 54 | } | ||
| 55 | } | ||
| 56 | |||
| 57 | static struct notifier_block __cpuinitdata blk_mq_main_cpu_notifier = { | ||
| 58 | .notifier_call = blk_mq_main_cpu_notify, | ||
| 59 | }; | ||
| 60 | |||
| 61 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | ||
| 62 | { | ||
| 63 | BUG_ON(!notifier->notify); | ||
| 64 | |||
| 65 | spin_lock(&blk_mq_cpu_notify_lock); | ||
| 66 | list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list); | ||
| 67 | spin_unlock(&blk_mq_cpu_notify_lock); | ||
| 68 | } | ||
| 69 | |||
| 70 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) | ||
| 71 | { | ||
| 72 | spin_lock(&blk_mq_cpu_notify_lock); | ||
| 73 | list_del(¬ifier->list); | ||
| 74 | spin_unlock(&blk_mq_cpu_notify_lock); | ||
| 75 | } | ||
| 76 | |||
| 77 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | ||
| 78 | void (*fn)(void *, unsigned long, unsigned int), | ||
| 79 | void *data) | ||
| 80 | { | ||
| 81 | notifier->notify = fn; | ||
| 82 | notifier->data = data; | ||
| 83 | } | ||
| 84 | |||
| 85 | static struct blk_mq_cpu_notifier __cpuinitdata cpu_notifier = { | ||
| 86 | .notify = blk_mq_cpu_notify, | ||
| 87 | }; | ||
| 88 | |||
| 89 | void __init blk_mq_cpu_init(void) | ||
| 90 | { | ||
| 91 | register_hotcpu_notifier(&blk_mq_main_cpu_notifier); | ||
| 92 | blk_mq_register_cpu_notifier(&cpu_notifier); | ||
| 93 | } | ||
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c new file mode 100644 index 000000000000..f8721278601c --- /dev/null +++ b/block/blk-mq-cpumap.c | |||
| @@ -0,0 +1,108 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/threads.h> | ||
| 3 | #include <linux/module.h> | ||
| 4 | #include <linux/mm.h> | ||
| 5 | #include <linux/smp.h> | ||
| 6 | #include <linux/cpu.h> | ||
| 7 | |||
| 8 | #include <linux/blk-mq.h> | ||
| 9 | #include "blk.h" | ||
| 10 | #include "blk-mq.h" | ||
| 11 | |||
| 12 | static void show_map(unsigned int *map, unsigned int nr) | ||
| 13 | { | ||
| 14 | int i; | ||
| 15 | |||
| 16 | pr_info("blk-mq: CPU -> queue map\n"); | ||
| 17 | for_each_online_cpu(i) | ||
| 18 | pr_info(" CPU%2u -> Queue %u\n", i, map[i]); | ||
| 19 | } | ||
| 20 | |||
| 21 | static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues, | ||
| 22 | const int cpu) | ||
| 23 | { | ||
| 24 | return cpu / ((nr_cpus + nr_queues - 1) / nr_queues); | ||
| 25 | } | ||
| 26 | |||
| 27 | static int get_first_sibling(unsigned int cpu) | ||
| 28 | { | ||
| 29 | unsigned int ret; | ||
| 30 | |||
| 31 | ret = cpumask_first(topology_thread_cpumask(cpu)); | ||
| 32 | if (ret < nr_cpu_ids) | ||
| 33 | return ret; | ||
| 34 | |||
| 35 | return cpu; | ||
| 36 | } | ||
| 37 | |||
| 38 | int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues) | ||
| 39 | { | ||
| 40 | unsigned int i, nr_cpus, nr_uniq_cpus, queue, first_sibling; | ||
| 41 | cpumask_var_t cpus; | ||
| 42 | |||
| 43 | if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) | ||
| 44 | return 1; | ||
| 45 | |||
| 46 | cpumask_clear(cpus); | ||
| 47 | nr_cpus = nr_uniq_cpus = 0; | ||
| 48 | for_each_online_cpu(i) { | ||
| 49 | nr_cpus++; | ||
| 50 | first_sibling = get_first_sibling(i); | ||
| 51 | if (!cpumask_test_cpu(first_sibling, cpus)) | ||
| 52 | nr_uniq_cpus++; | ||
| 53 | cpumask_set_cpu(i, cpus); | ||
| 54 | } | ||
| 55 | |||
| 56 | queue = 0; | ||
| 57 | for_each_possible_cpu(i) { | ||
| 58 | if (!cpu_online(i)) { | ||
| 59 | map[i] = 0; | ||
| 60 | continue; | ||
| 61 | } | ||
| 62 | |||
| 63 | /* | ||
| 64 | * Easy case - we have equal or more hardware queues. Or | ||
| 65 | * there are no thread siblings to take into account. Do | ||
| 66 | * 1:1 if enough, or sequential mapping if less. | ||
| 67 | */ | ||
| 68 | if (nr_queues >= nr_cpus || nr_cpus == nr_uniq_cpus) { | ||
| 69 | map[i] = cpu_to_queue_index(nr_cpus, nr_queues, queue); | ||
| 70 | queue++; | ||
| 71 | continue; | ||
| 72 | } | ||
| 73 | |||
| 74 | /* | ||
| 75 | * Less then nr_cpus queues, and we have some number of | ||
| 76 | * threads per cores. Map sibling threads to the same | ||
| 77 | * queue. | ||
| 78 | */ | ||
| 79 | first_sibling = get_first_sibling(i); | ||
| 80 | if (first_sibling == i) { | ||
| 81 | map[i] = cpu_to_queue_index(nr_uniq_cpus, nr_queues, | ||
| 82 | queue); | ||
| 83 | queue++; | ||
| 84 | } else | ||
| 85 | map[i] = map[first_sibling]; | ||
| 86 | } | ||
| 87 | |||
| 88 | show_map(map, nr_cpus); | ||
| 89 | free_cpumask_var(cpus); | ||
| 90 | return 0; | ||
| 91 | } | ||
| 92 | |||
| 93 | unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg) | ||
| 94 | { | ||
| 95 | unsigned int *map; | ||
| 96 | |||
| 97 | /* If cpus are offline, map them to first hctx */ | ||
| 98 | map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, | ||
| 99 | reg->numa_node); | ||
| 100 | if (!map) | ||
| 101 | return NULL; | ||
| 102 | |||
| 103 | if (!blk_mq_update_queue_map(map, reg->nr_hw_queues)) | ||
| 104 | return map; | ||
| 105 | |||
| 106 | kfree(map); | ||
| 107 | return NULL; | ||
| 108 | } | ||
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c new file mode 100644 index 000000000000..ba6cf8e9aa0a --- /dev/null +++ b/block/blk-mq-sysfs.c | |||
| @@ -0,0 +1,384 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/backing-dev.h> | ||
| 4 | #include <linux/bio.h> | ||
| 5 | #include <linux/blkdev.h> | ||
| 6 | #include <linux/mm.h> | ||
| 7 | #include <linux/init.h> | ||
| 8 | #include <linux/slab.h> | ||
| 9 | #include <linux/workqueue.h> | ||
| 10 | #include <linux/smp.h> | ||
| 11 | |||
| 12 | #include <linux/blk-mq.h> | ||
| 13 | #include "blk-mq.h" | ||
| 14 | #include "blk-mq-tag.h" | ||
| 15 | |||
| 16 | static void blk_mq_sysfs_release(struct kobject *kobj) | ||
| 17 | { | ||
| 18 | } | ||
| 19 | |||
| 20 | struct blk_mq_ctx_sysfs_entry { | ||
| 21 | struct attribute attr; | ||
| 22 | ssize_t (*show)(struct blk_mq_ctx *, char *); | ||
| 23 | ssize_t (*store)(struct blk_mq_ctx *, const char *, size_t); | ||
| 24 | }; | ||
| 25 | |||
| 26 | struct blk_mq_hw_ctx_sysfs_entry { | ||
| 27 | struct attribute attr; | ||
| 28 | ssize_t (*show)(struct blk_mq_hw_ctx *, char *); | ||
| 29 | ssize_t (*store)(struct blk_mq_hw_ctx *, const char *, size_t); | ||
| 30 | }; | ||
| 31 | |||
| 32 | static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr, | ||
| 33 | char *page) | ||
| 34 | { | ||
| 35 | struct blk_mq_ctx_sysfs_entry *entry; | ||
| 36 | struct blk_mq_ctx *ctx; | ||
| 37 | struct request_queue *q; | ||
| 38 | ssize_t res; | ||
| 39 | |||
| 40 | entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr); | ||
| 41 | ctx = container_of(kobj, struct blk_mq_ctx, kobj); | ||
| 42 | q = ctx->queue; | ||
| 43 | |||
| 44 | if (!entry->show) | ||
| 45 | return -EIO; | ||
| 46 | |||
| 47 | res = -ENOENT; | ||
| 48 | mutex_lock(&q->sysfs_lock); | ||
| 49 | if (!blk_queue_dying(q)) | ||
| 50 | res = entry->show(ctx, page); | ||
| 51 | mutex_unlock(&q->sysfs_lock); | ||
| 52 | return res; | ||
| 53 | } | ||
| 54 | |||
| 55 | static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr, | ||
| 56 | const char *page, size_t length) | ||
| 57 | { | ||
| 58 | struct blk_mq_ctx_sysfs_entry *entry; | ||
| 59 | struct blk_mq_ctx *ctx; | ||
| 60 | struct request_queue *q; | ||
| 61 | ssize_t res; | ||
| 62 | |||
| 63 | entry = container_of(attr, struct blk_mq_ctx_sysfs_entry, attr); | ||
| 64 | ctx = container_of(kobj, struct blk_mq_ctx, kobj); | ||
| 65 | q = ctx->queue; | ||
| 66 | |||
| 67 | if (!entry->store) | ||
| 68 | return -EIO; | ||
| 69 | |||
| 70 | res = -ENOENT; | ||
| 71 | mutex_lock(&q->sysfs_lock); | ||
| 72 | if (!blk_queue_dying(q)) | ||
| 73 | res = entry->store(ctx, page, length); | ||
| 74 | mutex_unlock(&q->sysfs_lock); | ||
| 75 | return res; | ||
| 76 | } | ||
| 77 | |||
| 78 | static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj, | ||
| 79 | struct attribute *attr, char *page) | ||
| 80 | { | ||
| 81 | struct blk_mq_hw_ctx_sysfs_entry *entry; | ||
| 82 | struct blk_mq_hw_ctx *hctx; | ||
| 83 | struct request_queue *q; | ||
| 84 | ssize_t res; | ||
| 85 | |||
| 86 | entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr); | ||
| 87 | hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj); | ||
| 88 | q = hctx->queue; | ||
| 89 | |||
| 90 | if (!entry->show) | ||
| 91 | return -EIO; | ||
| 92 | |||
| 93 | res = -ENOENT; | ||
| 94 | mutex_lock(&q->sysfs_lock); | ||
| 95 | if (!blk_queue_dying(q)) | ||
| 96 | res = entry->show(hctx, page); | ||
| 97 | mutex_unlock(&q->sysfs_lock); | ||
| 98 | return res; | ||
| 99 | } | ||
| 100 | |||
| 101 | static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj, | ||
| 102 | struct attribute *attr, const char *page, | ||
| 103 | size_t length) | ||
| 104 | { | ||
| 105 | struct blk_mq_hw_ctx_sysfs_entry *entry; | ||
| 106 | struct blk_mq_hw_ctx *hctx; | ||
| 107 | struct request_queue *q; | ||
| 108 | ssize_t res; | ||
| 109 | |||
| 110 | entry = container_of(attr, struct blk_mq_hw_ctx_sysfs_entry, attr); | ||
| 111 | hctx = container_of(kobj, struct blk_mq_hw_ctx, kobj); | ||
| 112 | q = hctx->queue; | ||
| 113 | |||
| 114 | if (!entry->store) | ||
| 115 | return -EIO; | ||
| 116 | |||
| 117 | res = -ENOENT; | ||
| 118 | mutex_lock(&q->sysfs_lock); | ||
| 119 | if (!blk_queue_dying(q)) | ||
| 120 | res = entry->store(hctx, page, length); | ||
| 121 | mutex_unlock(&q->sysfs_lock); | ||
| 122 | return res; | ||
| 123 | } | ||
| 124 | |||
| 125 | static ssize_t blk_mq_sysfs_dispatched_show(struct blk_mq_ctx *ctx, char *page) | ||
| 126 | { | ||
| 127 | return sprintf(page, "%lu %lu\n", ctx->rq_dispatched[1], | ||
| 128 | ctx->rq_dispatched[0]); | ||
| 129 | } | ||
| 130 | |||
| 131 | static ssize_t blk_mq_sysfs_merged_show(struct blk_mq_ctx *ctx, char *page) | ||
| 132 | { | ||
| 133 | return sprintf(page, "%lu\n", ctx->rq_merged); | ||
| 134 | } | ||
| 135 | |||
| 136 | static ssize_t blk_mq_sysfs_completed_show(struct blk_mq_ctx *ctx, char *page) | ||
| 137 | { | ||
| 138 | return sprintf(page, "%lu %lu\n", ctx->rq_completed[1], | ||
| 139 | ctx->rq_completed[0]); | ||
| 140 | } | ||
| 141 | |||
| 142 | static ssize_t sysfs_list_show(char *page, struct list_head *list, char *msg) | ||
| 143 | { | ||
| 144 | char *start_page = page; | ||
| 145 | struct request *rq; | ||
| 146 | |||
| 147 | page += sprintf(page, "%s:\n", msg); | ||
| 148 | |||
| 149 | list_for_each_entry(rq, list, queuelist) | ||
| 150 | page += sprintf(page, "\t%p\n", rq); | ||
| 151 | |||
| 152 | return page - start_page; | ||
| 153 | } | ||
| 154 | |||
| 155 | static ssize_t blk_mq_sysfs_rq_list_show(struct blk_mq_ctx *ctx, char *page) | ||
| 156 | { | ||
| 157 | ssize_t ret; | ||
| 158 | |||
| 159 | spin_lock(&ctx->lock); | ||
| 160 | ret = sysfs_list_show(page, &ctx->rq_list, "CTX pending"); | ||
| 161 | spin_unlock(&ctx->lock); | ||
| 162 | |||
| 163 | return ret; | ||
| 164 | } | ||
| 165 | |||
| 166 | static ssize_t blk_mq_hw_sysfs_queued_show(struct blk_mq_hw_ctx *hctx, | ||
| 167 | char *page) | ||
| 168 | { | ||
| 169 | return sprintf(page, "%lu\n", hctx->queued); | ||
| 170 | } | ||
| 171 | |||
| 172 | static ssize_t blk_mq_hw_sysfs_run_show(struct blk_mq_hw_ctx *hctx, char *page) | ||
| 173 | { | ||
| 174 | return sprintf(page, "%lu\n", hctx->run); | ||
| 175 | } | ||
| 176 | |||
| 177 | static ssize_t blk_mq_hw_sysfs_dispatched_show(struct blk_mq_hw_ctx *hctx, | ||
| 178 | char *page) | ||
| 179 | { | ||
| 180 | char *start_page = page; | ||
| 181 | int i; | ||
| 182 | |||
| 183 | page += sprintf(page, "%8u\t%lu\n", 0U, hctx->dispatched[0]); | ||
| 184 | |||
| 185 | for (i = 1; i < BLK_MQ_MAX_DISPATCH_ORDER; i++) { | ||
| 186 | unsigned long d = 1U << (i - 1); | ||
| 187 | |||
| 188 | page += sprintf(page, "%8lu\t%lu\n", d, hctx->dispatched[i]); | ||
| 189 | } | ||
| 190 | |||
| 191 | return page - start_page; | ||
| 192 | } | ||
| 193 | |||
| 194 | static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx, | ||
| 195 | char *page) | ||
| 196 | { | ||
| 197 | ssize_t ret; | ||
| 198 | |||
| 199 | spin_lock(&hctx->lock); | ||
| 200 | ret = sysfs_list_show(page, &hctx->dispatch, "HCTX pending"); | ||
| 201 | spin_unlock(&hctx->lock); | ||
| 202 | |||
| 203 | return ret; | ||
| 204 | } | ||
| 205 | |||
| 206 | static ssize_t blk_mq_hw_sysfs_ipi_show(struct blk_mq_hw_ctx *hctx, char *page) | ||
| 207 | { | ||
| 208 | ssize_t ret; | ||
| 209 | |||
| 210 | spin_lock(&hctx->lock); | ||
| 211 | ret = sprintf(page, "%u\n", !!(hctx->flags & BLK_MQ_F_SHOULD_IPI)); | ||
| 212 | spin_unlock(&hctx->lock); | ||
| 213 | |||
| 214 | return ret; | ||
| 215 | } | ||
| 216 | |||
| 217 | static ssize_t blk_mq_hw_sysfs_ipi_store(struct blk_mq_hw_ctx *hctx, | ||
| 218 | const char *page, size_t len) | ||
| 219 | { | ||
| 220 | struct blk_mq_ctx *ctx; | ||
| 221 | unsigned long ret; | ||
| 222 | unsigned int i; | ||
| 223 | |||
| 224 | if (kstrtoul(page, 10, &ret)) { | ||
| 225 | pr_err("blk-mq-sysfs: invalid input '%s'\n", page); | ||
| 226 | return -EINVAL; | ||
| 227 | } | ||
| 228 | |||
| 229 | spin_lock(&hctx->lock); | ||
| 230 | if (ret) | ||
| 231 | hctx->flags |= BLK_MQ_F_SHOULD_IPI; | ||
| 232 | else | ||
| 233 | hctx->flags &= ~BLK_MQ_F_SHOULD_IPI; | ||
| 234 | spin_unlock(&hctx->lock); | ||
| 235 | |||
| 236 | hctx_for_each_ctx(hctx, ctx, i) | ||
| 237 | ctx->ipi_redirect = !!ret; | ||
| 238 | |||
| 239 | return len; | ||
| 240 | } | ||
| 241 | |||
| 242 | static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) | ||
| 243 | { | ||
| 244 | return blk_mq_tag_sysfs_show(hctx->tags, page); | ||
| 245 | } | ||
| 246 | |||
| 247 | static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_dispatched = { | ||
| 248 | .attr = {.name = "dispatched", .mode = S_IRUGO }, | ||
| 249 | .show = blk_mq_sysfs_dispatched_show, | ||
| 250 | }; | ||
| 251 | static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_merged = { | ||
| 252 | .attr = {.name = "merged", .mode = S_IRUGO }, | ||
| 253 | .show = blk_mq_sysfs_merged_show, | ||
| 254 | }; | ||
| 255 | static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_completed = { | ||
| 256 | .attr = {.name = "completed", .mode = S_IRUGO }, | ||
| 257 | .show = blk_mq_sysfs_completed_show, | ||
| 258 | }; | ||
| 259 | static struct blk_mq_ctx_sysfs_entry blk_mq_sysfs_rq_list = { | ||
| 260 | .attr = {.name = "rq_list", .mode = S_IRUGO }, | ||
| 261 | .show = blk_mq_sysfs_rq_list_show, | ||
| 262 | }; | ||
| 263 | |||
| 264 | static struct attribute *default_ctx_attrs[] = { | ||
| 265 | &blk_mq_sysfs_dispatched.attr, | ||
| 266 | &blk_mq_sysfs_merged.attr, | ||
| 267 | &blk_mq_sysfs_completed.attr, | ||
| 268 | &blk_mq_sysfs_rq_list.attr, | ||
| 269 | NULL, | ||
| 270 | }; | ||
| 271 | |||
| 272 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_queued = { | ||
| 273 | .attr = {.name = "queued", .mode = S_IRUGO }, | ||
| 274 | .show = blk_mq_hw_sysfs_queued_show, | ||
| 275 | }; | ||
| 276 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_run = { | ||
| 277 | .attr = {.name = "run", .mode = S_IRUGO }, | ||
| 278 | .show = blk_mq_hw_sysfs_run_show, | ||
| 279 | }; | ||
| 280 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_dispatched = { | ||
| 281 | .attr = {.name = "dispatched", .mode = S_IRUGO }, | ||
| 282 | .show = blk_mq_hw_sysfs_dispatched_show, | ||
| 283 | }; | ||
| 284 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { | ||
| 285 | .attr = {.name = "pending", .mode = S_IRUGO }, | ||
| 286 | .show = blk_mq_hw_sysfs_rq_list_show, | ||
| 287 | }; | ||
| 288 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_ipi = { | ||
| 289 | .attr = {.name = "ipi_redirect", .mode = S_IRUGO | S_IWUSR}, | ||
| 290 | .show = blk_mq_hw_sysfs_ipi_show, | ||
| 291 | .store = blk_mq_hw_sysfs_ipi_store, | ||
| 292 | }; | ||
| 293 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { | ||
| 294 | .attr = {.name = "tags", .mode = S_IRUGO }, | ||
| 295 | .show = blk_mq_hw_sysfs_tags_show, | ||
| 296 | }; | ||
| 297 | |||
| 298 | static struct attribute *default_hw_ctx_attrs[] = { | ||
| 299 | &blk_mq_hw_sysfs_queued.attr, | ||
| 300 | &blk_mq_hw_sysfs_run.attr, | ||
| 301 | &blk_mq_hw_sysfs_dispatched.attr, | ||
| 302 | &blk_mq_hw_sysfs_pending.attr, | ||
| 303 | &blk_mq_hw_sysfs_ipi.attr, | ||
| 304 | &blk_mq_hw_sysfs_tags.attr, | ||
| 305 | NULL, | ||
| 306 | }; | ||
| 307 | |||
| 308 | static const struct sysfs_ops blk_mq_sysfs_ops = { | ||
| 309 | .show = blk_mq_sysfs_show, | ||
| 310 | .store = blk_mq_sysfs_store, | ||
| 311 | }; | ||
| 312 | |||
| 313 | static const struct sysfs_ops blk_mq_hw_sysfs_ops = { | ||
| 314 | .show = blk_mq_hw_sysfs_show, | ||
| 315 | .store = blk_mq_hw_sysfs_store, | ||
| 316 | }; | ||
| 317 | |||
| 318 | static struct kobj_type blk_mq_ktype = { | ||
| 319 | .sysfs_ops = &blk_mq_sysfs_ops, | ||
| 320 | .release = blk_mq_sysfs_release, | ||
| 321 | }; | ||
| 322 | |||
| 323 | static struct kobj_type blk_mq_ctx_ktype = { | ||
| 324 | .sysfs_ops = &blk_mq_sysfs_ops, | ||
| 325 | .default_attrs = default_ctx_attrs, | ||
| 326 | .release = blk_mq_sysfs_release, | ||
| 327 | }; | ||
| 328 | |||
| 329 | static struct kobj_type blk_mq_hw_ktype = { | ||
| 330 | .sysfs_ops = &blk_mq_hw_sysfs_ops, | ||
| 331 | .default_attrs = default_hw_ctx_attrs, | ||
| 332 | .release = blk_mq_sysfs_release, | ||
| 333 | }; | ||
| 334 | |||
| 335 | void blk_mq_unregister_disk(struct gendisk *disk) | ||
| 336 | { | ||
| 337 | struct request_queue *q = disk->queue; | ||
| 338 | |||
| 339 | kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); | ||
| 340 | kobject_del(&q->mq_kobj); | ||
| 341 | |||
| 342 | kobject_put(&disk_to_dev(disk)->kobj); | ||
| 343 | } | ||
| 344 | |||
| 345 | int blk_mq_register_disk(struct gendisk *disk) | ||
| 346 | { | ||
| 347 | struct device *dev = disk_to_dev(disk); | ||
| 348 | struct request_queue *q = disk->queue; | ||
| 349 | struct blk_mq_hw_ctx *hctx; | ||
| 350 | struct blk_mq_ctx *ctx; | ||
| 351 | int ret, i, j; | ||
| 352 | |||
| 353 | kobject_init(&q->mq_kobj, &blk_mq_ktype); | ||
| 354 | |||
| 355 | ret = kobject_add(&q->mq_kobj, kobject_get(&dev->kobj), "%s", "mq"); | ||
| 356 | if (ret < 0) | ||
| 357 | return ret; | ||
| 358 | |||
| 359 | kobject_uevent(&q->mq_kobj, KOBJ_ADD); | ||
| 360 | |||
| 361 | queue_for_each_hw_ctx(q, hctx, i) { | ||
| 362 | kobject_init(&hctx->kobj, &blk_mq_hw_ktype); | ||
| 363 | ret = kobject_add(&hctx->kobj, &q->mq_kobj, "%u", i); | ||
| 364 | if (ret) | ||
| 365 | break; | ||
| 366 | |||
| 367 | if (!hctx->nr_ctx) | ||
| 368 | continue; | ||
| 369 | |||
| 370 | hctx_for_each_ctx(hctx, ctx, j) { | ||
| 371 | kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); | ||
| 372 | ret = kobject_add(&ctx->kobj, &hctx->kobj, "cpu%u", ctx->cpu); | ||
| 373 | if (ret) | ||
| 374 | break; | ||
| 375 | } | ||
| 376 | } | ||
| 377 | |||
| 378 | if (ret) { | ||
| 379 | blk_mq_unregister_disk(disk); | ||
| 380 | return ret; | ||
| 381 | } | ||
| 382 | |||
| 383 | return 0; | ||
| 384 | } | ||
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c new file mode 100644 index 000000000000..d64a02fb1f73 --- /dev/null +++ b/block/blk-mq-tag.c | |||
| @@ -0,0 +1,204 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/percpu_ida.h> | ||
| 4 | |||
| 5 | #include <linux/blk-mq.h> | ||
| 6 | #include "blk.h" | ||
| 7 | #include "blk-mq.h" | ||
| 8 | #include "blk-mq-tag.h" | ||
| 9 | |||
| 10 | /* | ||
| 11 | * Per tagged queue (tag address space) map | ||
| 12 | */ | ||
| 13 | struct blk_mq_tags { | ||
| 14 | unsigned int nr_tags; | ||
| 15 | unsigned int nr_reserved_tags; | ||
| 16 | unsigned int nr_batch_move; | ||
| 17 | unsigned int nr_max_cache; | ||
| 18 | |||
| 19 | struct percpu_ida free_tags; | ||
| 20 | struct percpu_ida reserved_tags; | ||
| 21 | }; | ||
| 22 | |||
| 23 | void blk_mq_wait_for_tags(struct blk_mq_tags *tags) | ||
| 24 | { | ||
| 25 | int tag = blk_mq_get_tag(tags, __GFP_WAIT, false); | ||
| 26 | blk_mq_put_tag(tags, tag); | ||
| 27 | } | ||
| 28 | |||
| 29 | bool blk_mq_has_free_tags(struct blk_mq_tags *tags) | ||
| 30 | { | ||
| 31 | return !tags || | ||
| 32 | percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids) != 0; | ||
| 33 | } | ||
| 34 | |||
| 35 | static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp) | ||
| 36 | { | ||
| 37 | int tag; | ||
| 38 | |||
| 39 | tag = percpu_ida_alloc(&tags->free_tags, gfp); | ||
| 40 | if (tag < 0) | ||
| 41 | return BLK_MQ_TAG_FAIL; | ||
| 42 | return tag + tags->nr_reserved_tags; | ||
| 43 | } | ||
| 44 | |||
| 45 | static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags, | ||
| 46 | gfp_t gfp) | ||
| 47 | { | ||
| 48 | int tag; | ||
| 49 | |||
| 50 | if (unlikely(!tags->nr_reserved_tags)) { | ||
| 51 | WARN_ON_ONCE(1); | ||
| 52 | return BLK_MQ_TAG_FAIL; | ||
| 53 | } | ||
| 54 | |||
| 55 | tag = percpu_ida_alloc(&tags->reserved_tags, gfp); | ||
| 56 | if (tag < 0) | ||
| 57 | return BLK_MQ_TAG_FAIL; | ||
| 58 | return tag; | ||
| 59 | } | ||
| 60 | |||
| 61 | unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved) | ||
| 62 | { | ||
| 63 | if (!reserved) | ||
| 64 | return __blk_mq_get_tag(tags, gfp); | ||
| 65 | |||
| 66 | return __blk_mq_get_reserved_tag(tags, gfp); | ||
| 67 | } | ||
| 68 | |||
| 69 | static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag) | ||
| 70 | { | ||
| 71 | BUG_ON(tag >= tags->nr_tags); | ||
| 72 | |||
| 73 | percpu_ida_free(&tags->free_tags, tag - tags->nr_reserved_tags); | ||
| 74 | } | ||
| 75 | |||
| 76 | static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags, | ||
| 77 | unsigned int tag) | ||
| 78 | { | ||
| 79 | BUG_ON(tag >= tags->nr_reserved_tags); | ||
| 80 | |||
| 81 | percpu_ida_free(&tags->reserved_tags, tag); | ||
| 82 | } | ||
| 83 | |||
| 84 | void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag) | ||
| 85 | { | ||
| 86 | if (tag >= tags->nr_reserved_tags) | ||
| 87 | __blk_mq_put_tag(tags, tag); | ||
| 88 | else | ||
| 89 | __blk_mq_put_reserved_tag(tags, tag); | ||
| 90 | } | ||
| 91 | |||
| 92 | static int __blk_mq_tag_iter(unsigned id, void *data) | ||
| 93 | { | ||
| 94 | unsigned long *tag_map = data; | ||
| 95 | __set_bit(id, tag_map); | ||
| 96 | return 0; | ||
| 97 | } | ||
| 98 | |||
| 99 | void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, | ||
| 100 | void (*fn)(void *, unsigned long *), void *data) | ||
| 101 | { | ||
| 102 | unsigned long *tag_map; | ||
| 103 | size_t map_size; | ||
| 104 | |||
| 105 | map_size = ALIGN(tags->nr_tags, BITS_PER_LONG) / BITS_PER_LONG; | ||
| 106 | tag_map = kzalloc(map_size * sizeof(unsigned long), GFP_ATOMIC); | ||
| 107 | if (!tag_map) | ||
| 108 | return; | ||
| 109 | |||
| 110 | percpu_ida_for_each_free(&tags->free_tags, __blk_mq_tag_iter, tag_map); | ||
| 111 | if (tags->nr_reserved_tags) | ||
| 112 | percpu_ida_for_each_free(&tags->reserved_tags, __blk_mq_tag_iter, | ||
| 113 | tag_map); | ||
| 114 | |||
| 115 | fn(data, tag_map); | ||
| 116 | kfree(tag_map); | ||
| 117 | } | ||
| 118 | |||
| 119 | struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags, | ||
| 120 | unsigned int reserved_tags, int node) | ||
| 121 | { | ||
| 122 | unsigned int nr_tags, nr_cache; | ||
| 123 | struct blk_mq_tags *tags; | ||
| 124 | int ret; | ||
| 125 | |||
| 126 | if (total_tags > BLK_MQ_TAG_MAX) { | ||
| 127 | pr_err("blk-mq: tag depth too large\n"); | ||
| 128 | return NULL; | ||
| 129 | } | ||
| 130 | |||
| 131 | tags = kzalloc_node(sizeof(*tags), GFP_KERNEL, node); | ||
| 132 | if (!tags) | ||
| 133 | return NULL; | ||
| 134 | |||
| 135 | nr_tags = total_tags - reserved_tags; | ||
| 136 | nr_cache = nr_tags / num_possible_cpus(); | ||
| 137 | |||
| 138 | if (nr_cache < BLK_MQ_TAG_CACHE_MIN) | ||
| 139 | nr_cache = BLK_MQ_TAG_CACHE_MIN; | ||
| 140 | else if (nr_cache > BLK_MQ_TAG_CACHE_MAX) | ||
| 141 | nr_cache = BLK_MQ_TAG_CACHE_MAX; | ||
| 142 | |||
| 143 | tags->nr_tags = total_tags; | ||
| 144 | tags->nr_reserved_tags = reserved_tags; | ||
| 145 | tags->nr_max_cache = nr_cache; | ||
| 146 | tags->nr_batch_move = max(1u, nr_cache / 2); | ||
| 147 | |||
| 148 | ret = __percpu_ida_init(&tags->free_tags, tags->nr_tags - | ||
| 149 | tags->nr_reserved_tags, | ||
| 150 | tags->nr_max_cache, | ||
| 151 | tags->nr_batch_move); | ||
| 152 | if (ret) | ||
| 153 | goto err_free_tags; | ||
| 154 | |||
| 155 | if (reserved_tags) { | ||
| 156 | /* | ||
| 157 | * With max_cahe and batch set to 1, the allocator fallbacks to | ||
| 158 | * no cached. It's fine reserved tags allocation is slow. | ||
| 159 | */ | ||
| 160 | ret = __percpu_ida_init(&tags->reserved_tags, reserved_tags, | ||
| 161 | 1, 1); | ||
| 162 | if (ret) | ||
| 163 | goto err_reserved_tags; | ||
| 164 | } | ||
| 165 | |||
| 166 | return tags; | ||
| 167 | |||
| 168 | err_reserved_tags: | ||
| 169 | percpu_ida_destroy(&tags->free_tags); | ||
| 170 | err_free_tags: | ||
| 171 | kfree(tags); | ||
| 172 | return NULL; | ||
| 173 | } | ||
| 174 | |||
| 175 | void blk_mq_free_tags(struct blk_mq_tags *tags) | ||
| 176 | { | ||
| 177 | percpu_ida_destroy(&tags->free_tags); | ||
| 178 | percpu_ida_destroy(&tags->reserved_tags); | ||
| 179 | kfree(tags); | ||
| 180 | } | ||
| 181 | |||
| 182 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) | ||
| 183 | { | ||
| 184 | char *orig_page = page; | ||
| 185 | int cpu; | ||
| 186 | |||
| 187 | if (!tags) | ||
| 188 | return 0; | ||
| 189 | |||
| 190 | page += sprintf(page, "nr_tags=%u, reserved_tags=%u, batch_move=%u," | ||
| 191 | " max_cache=%u\n", tags->nr_tags, tags->nr_reserved_tags, | ||
| 192 | tags->nr_batch_move, tags->nr_max_cache); | ||
| 193 | |||
| 194 | page += sprintf(page, "nr_free=%u, nr_reserved=%u\n", | ||
| 195 | percpu_ida_free_tags(&tags->free_tags, nr_cpu_ids), | ||
| 196 | percpu_ida_free_tags(&tags->reserved_tags, nr_cpu_ids)); | ||
| 197 | |||
| 198 | for_each_possible_cpu(cpu) { | ||
| 199 | page += sprintf(page, " cpu%02u: nr_free=%u\n", cpu, | ||
| 200 | percpu_ida_free_tags(&tags->free_tags, cpu)); | ||
| 201 | } | ||
| 202 | |||
| 203 | return page - orig_page; | ||
| 204 | } | ||
diff --git a/block/blk-mq-tag.h b/block/blk-mq-tag.h new file mode 100644 index 000000000000..947ba2c6148e --- /dev/null +++ b/block/blk-mq-tag.h | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | #ifndef INT_BLK_MQ_TAG_H | ||
| 2 | #define INT_BLK_MQ_TAG_H | ||
| 3 | |||
| 4 | struct blk_mq_tags; | ||
| 5 | |||
| 6 | extern struct blk_mq_tags *blk_mq_init_tags(unsigned int nr_tags, unsigned int reserved_tags, int node); | ||
| 7 | extern void blk_mq_free_tags(struct blk_mq_tags *tags); | ||
| 8 | |||
| 9 | extern unsigned int blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp, bool reserved); | ||
| 10 | extern void blk_mq_wait_for_tags(struct blk_mq_tags *tags); | ||
| 11 | extern void blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag); | ||
| 12 | extern void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); | ||
| 13 | extern bool blk_mq_has_free_tags(struct blk_mq_tags *tags); | ||
| 14 | extern ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page); | ||
| 15 | |||
| 16 | enum { | ||
| 17 | BLK_MQ_TAG_CACHE_MIN = 1, | ||
| 18 | BLK_MQ_TAG_CACHE_MAX = 64, | ||
| 19 | }; | ||
| 20 | |||
| 21 | enum { | ||
| 22 | BLK_MQ_TAG_FAIL = -1U, | ||
| 23 | BLK_MQ_TAG_MIN = BLK_MQ_TAG_CACHE_MIN, | ||
| 24 | BLK_MQ_TAG_MAX = BLK_MQ_TAG_FAIL - 1, | ||
| 25 | }; | ||
| 26 | |||
| 27 | #endif | ||
diff --git a/block/blk-mq.c b/block/blk-mq.c new file mode 100644 index 000000000000..88d4e864d4c0 --- /dev/null +++ b/block/blk-mq.c | |||
| @@ -0,0 +1,1500 @@ | |||
| 1 | #include <linux/kernel.h> | ||
| 2 | #include <linux/module.h> | ||
| 3 | #include <linux/backing-dev.h> | ||
| 4 | #include <linux/bio.h> | ||
| 5 | #include <linux/blkdev.h> | ||
| 6 | #include <linux/mm.h> | ||
| 7 | #include <linux/init.h> | ||
| 8 | #include <linux/slab.h> | ||
| 9 | #include <linux/workqueue.h> | ||
| 10 | #include <linux/smp.h> | ||
| 11 | #include <linux/llist.h> | ||
| 12 | #include <linux/list_sort.h> | ||
| 13 | #include <linux/cpu.h> | ||
| 14 | #include <linux/cache.h> | ||
| 15 | #include <linux/sched/sysctl.h> | ||
| 16 | #include <linux/delay.h> | ||
| 17 | |||
| 18 | #include <trace/events/block.h> | ||
| 19 | |||
| 20 | #include <linux/blk-mq.h> | ||
| 21 | #include "blk.h" | ||
| 22 | #include "blk-mq.h" | ||
| 23 | #include "blk-mq-tag.h" | ||
| 24 | |||
| 25 | static DEFINE_MUTEX(all_q_mutex); | ||
| 26 | static LIST_HEAD(all_q_list); | ||
| 27 | |||
| 28 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); | ||
| 29 | |||
| 30 | DEFINE_PER_CPU(struct llist_head, ipi_lists); | ||
| 31 | |||
| 32 | static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, | ||
| 33 | unsigned int cpu) | ||
| 34 | { | ||
| 35 | return per_cpu_ptr(q->queue_ctx, cpu); | ||
| 36 | } | ||
| 37 | |||
| 38 | /* | ||
| 39 | * This assumes per-cpu software queueing queues. They could be per-node | ||
| 40 | * as well, for instance. For now this is hardcoded as-is. Note that we don't | ||
| 41 | * care about preemption, since we know the ctx's are persistent. This does | ||
| 42 | * mean that we can't rely on ctx always matching the currently running CPU. | ||
| 43 | */ | ||
| 44 | static struct blk_mq_ctx *blk_mq_get_ctx(struct request_queue *q) | ||
| 45 | { | ||
| 46 | return __blk_mq_get_ctx(q, get_cpu()); | ||
| 47 | } | ||
| 48 | |||
| 49 | static void blk_mq_put_ctx(struct blk_mq_ctx *ctx) | ||
| 50 | { | ||
| 51 | put_cpu(); | ||
| 52 | } | ||
| 53 | |||
| 54 | /* | ||
| 55 | * Check if any of the ctx's have pending work in this hardware queue | ||
| 56 | */ | ||
| 57 | static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) | ||
| 58 | { | ||
| 59 | unsigned int i; | ||
| 60 | |||
| 61 | for (i = 0; i < hctx->nr_ctx_map; i++) | ||
| 62 | if (hctx->ctx_map[i]) | ||
| 63 | return true; | ||
| 64 | |||
| 65 | return false; | ||
| 66 | } | ||
| 67 | |||
| 68 | /* | ||
| 69 | * Mark this ctx as having pending work in this hardware queue | ||
| 70 | */ | ||
| 71 | static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx, | ||
| 72 | struct blk_mq_ctx *ctx) | ||
| 73 | { | ||
| 74 | if (!test_bit(ctx->index_hw, hctx->ctx_map)) | ||
| 75 | set_bit(ctx->index_hw, hctx->ctx_map); | ||
| 76 | } | ||
| 77 | |||
| 78 | static struct request *blk_mq_alloc_rq(struct blk_mq_hw_ctx *hctx, gfp_t gfp, | ||
| 79 | bool reserved) | ||
| 80 | { | ||
| 81 | struct request *rq; | ||
| 82 | unsigned int tag; | ||
| 83 | |||
| 84 | tag = blk_mq_get_tag(hctx->tags, gfp, reserved); | ||
| 85 | if (tag != BLK_MQ_TAG_FAIL) { | ||
| 86 | rq = hctx->rqs[tag]; | ||
| 87 | rq->tag = tag; | ||
| 88 | |||
| 89 | return rq; | ||
| 90 | } | ||
| 91 | |||
| 92 | return NULL; | ||
| 93 | } | ||
| 94 | |||
| 95 | static int blk_mq_queue_enter(struct request_queue *q) | ||
| 96 | { | ||
| 97 | int ret; | ||
| 98 | |||
| 99 | __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); | ||
| 100 | smp_wmb(); | ||
| 101 | /* we have problems to freeze the queue if it's initializing */ | ||
| 102 | if (!blk_queue_bypass(q) || !blk_queue_init_done(q)) | ||
| 103 | return 0; | ||
| 104 | |||
| 105 | __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); | ||
| 106 | |||
| 107 | spin_lock_irq(q->queue_lock); | ||
| 108 | ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, | ||
| 109 | !blk_queue_bypass(q), *q->queue_lock); | ||
| 110 | /* inc usage with lock hold to avoid freeze_queue runs here */ | ||
| 111 | if (!ret) | ||
| 112 | __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); | ||
| 113 | spin_unlock_irq(q->queue_lock); | ||
| 114 | |||
| 115 | return ret; | ||
| 116 | } | ||
| 117 | |||
| 118 | static void blk_mq_queue_exit(struct request_queue *q) | ||
| 119 | { | ||
| 120 | __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); | ||
| 121 | } | ||
| 122 | |||
| 123 | /* | ||
| 124 | * Guarantee no request is in use, so we can change any data structure of | ||
| 125 | * the queue afterward. | ||
| 126 | */ | ||
| 127 | static void blk_mq_freeze_queue(struct request_queue *q) | ||
| 128 | { | ||
| 129 | bool drain; | ||
| 130 | |||
| 131 | spin_lock_irq(q->queue_lock); | ||
| 132 | drain = !q->bypass_depth++; | ||
| 133 | queue_flag_set(QUEUE_FLAG_BYPASS, q); | ||
| 134 | spin_unlock_irq(q->queue_lock); | ||
| 135 | |||
| 136 | if (!drain) | ||
| 137 | return; | ||
| 138 | |||
| 139 | while (true) { | ||
| 140 | s64 count; | ||
| 141 | |||
| 142 | spin_lock_irq(q->queue_lock); | ||
| 143 | count = percpu_counter_sum(&q->mq_usage_counter); | ||
| 144 | spin_unlock_irq(q->queue_lock); | ||
| 145 | |||
| 146 | if (count == 0) | ||
| 147 | break; | ||
| 148 | blk_mq_run_queues(q, false); | ||
| 149 | msleep(10); | ||
| 150 | } | ||
| 151 | } | ||
| 152 | |||
| 153 | static void blk_mq_unfreeze_queue(struct request_queue *q) | ||
| 154 | { | ||
| 155 | bool wake = false; | ||
| 156 | |||
| 157 | spin_lock_irq(q->queue_lock); | ||
| 158 | if (!--q->bypass_depth) { | ||
| 159 | queue_flag_clear(QUEUE_FLAG_BYPASS, q); | ||
| 160 | wake = true; | ||
| 161 | } | ||
| 162 | WARN_ON_ONCE(q->bypass_depth < 0); | ||
| 163 | spin_unlock_irq(q->queue_lock); | ||
| 164 | if (wake) | ||
| 165 | wake_up_all(&q->mq_freeze_wq); | ||
| 166 | } | ||
| 167 | |||
| 168 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) | ||
| 169 | { | ||
| 170 | return blk_mq_has_free_tags(hctx->tags); | ||
| 171 | } | ||
| 172 | EXPORT_SYMBOL(blk_mq_can_queue); | ||
| 173 | |||
| 174 | static void blk_mq_rq_ctx_init(struct blk_mq_ctx *ctx, struct request *rq, | ||
| 175 | unsigned int rw_flags) | ||
| 176 | { | ||
| 177 | rq->mq_ctx = ctx; | ||
| 178 | rq->cmd_flags = rw_flags; | ||
| 179 | ctx->rq_dispatched[rw_is_sync(rw_flags)]++; | ||
| 180 | } | ||
| 181 | |||
| 182 | static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx, | ||
| 183 | gfp_t gfp, bool reserved) | ||
| 184 | { | ||
| 185 | return blk_mq_alloc_rq(hctx, gfp, reserved); | ||
| 186 | } | ||
| 187 | |||
| 188 | static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, | ||
| 189 | int rw, gfp_t gfp, | ||
| 190 | bool reserved) | ||
| 191 | { | ||
| 192 | struct request *rq; | ||
| 193 | |||
| 194 | do { | ||
| 195 | struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); | ||
| 196 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
| 197 | |||
| 198 | rq = __blk_mq_alloc_request(hctx, gfp & ~__GFP_WAIT, reserved); | ||
| 199 | if (rq) { | ||
| 200 | blk_mq_rq_ctx_init(ctx, rq, rw); | ||
| 201 | break; | ||
| 202 | } else if (!(gfp & __GFP_WAIT)) | ||
| 203 | break; | ||
| 204 | |||
| 205 | blk_mq_put_ctx(ctx); | ||
| 206 | __blk_mq_run_hw_queue(hctx); | ||
| 207 | blk_mq_wait_for_tags(hctx->tags); | ||
| 208 | } while (1); | ||
| 209 | |||
| 210 | return rq; | ||
| 211 | } | ||
| 212 | |||
| 213 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, | ||
| 214 | gfp_t gfp, bool reserved) | ||
| 215 | { | ||
| 216 | struct request *rq; | ||
| 217 | |||
| 218 | if (blk_mq_queue_enter(q)) | ||
| 219 | return NULL; | ||
| 220 | |||
| 221 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); | ||
| 222 | blk_mq_put_ctx(rq->mq_ctx); | ||
| 223 | return rq; | ||
| 224 | } | ||
| 225 | |||
| 226 | struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, | ||
| 227 | gfp_t gfp) | ||
| 228 | { | ||
| 229 | struct request *rq; | ||
| 230 | |||
| 231 | if (blk_mq_queue_enter(q)) | ||
| 232 | return NULL; | ||
| 233 | |||
| 234 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, true); | ||
| 235 | blk_mq_put_ctx(rq->mq_ctx); | ||
| 236 | return rq; | ||
| 237 | } | ||
| 238 | EXPORT_SYMBOL(blk_mq_alloc_reserved_request); | ||
| 239 | |||
| 240 | /* | ||
| 241 | * Re-init and set pdu, if we have it | ||
| 242 | */ | ||
| 243 | static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) | ||
| 244 | { | ||
| 245 | blk_rq_init(hctx->queue, rq); | ||
| 246 | |||
| 247 | if (hctx->cmd_size) | ||
| 248 | rq->special = blk_mq_rq_to_pdu(rq); | ||
| 249 | } | ||
| 250 | |||
| 251 | static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | ||
| 252 | struct blk_mq_ctx *ctx, struct request *rq) | ||
| 253 | { | ||
| 254 | const int tag = rq->tag; | ||
| 255 | struct request_queue *q = rq->q; | ||
| 256 | |||
| 257 | blk_mq_rq_init(hctx, rq); | ||
| 258 | blk_mq_put_tag(hctx->tags, tag); | ||
| 259 | |||
| 260 | blk_mq_queue_exit(q); | ||
| 261 | } | ||
| 262 | |||
| 263 | void blk_mq_free_request(struct request *rq) | ||
| 264 | { | ||
| 265 | struct blk_mq_ctx *ctx = rq->mq_ctx; | ||
| 266 | struct blk_mq_hw_ctx *hctx; | ||
| 267 | struct request_queue *q = rq->q; | ||
| 268 | |||
| 269 | ctx->rq_completed[rq_is_sync(rq)]++; | ||
| 270 | |||
| 271 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
| 272 | __blk_mq_free_request(hctx, ctx, rq); | ||
| 273 | } | ||
| 274 | |||
| 275 | static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) | ||
| 276 | { | ||
| 277 | if (error) | ||
| 278 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
| 279 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
| 280 | error = -EIO; | ||
| 281 | |||
| 282 | if (unlikely(rq->cmd_flags & REQ_QUIET)) | ||
| 283 | set_bit(BIO_QUIET, &bio->bi_flags); | ||
| 284 | |||
| 285 | /* don't actually finish bio if it's part of flush sequence */ | ||
| 286 | if (!(rq->cmd_flags & REQ_FLUSH_SEQ)) | ||
| 287 | bio_endio(bio, error); | ||
| 288 | } | ||
| 289 | |||
| 290 | void blk_mq_complete_request(struct request *rq, int error) | ||
| 291 | { | ||
| 292 | struct bio *bio = rq->bio; | ||
| 293 | unsigned int bytes = 0; | ||
| 294 | |||
| 295 | trace_block_rq_complete(rq->q, rq); | ||
| 296 | |||
| 297 | while (bio) { | ||
| 298 | struct bio *next = bio->bi_next; | ||
| 299 | |||
| 300 | bio->bi_next = NULL; | ||
| 301 | bytes += bio->bi_size; | ||
| 302 | blk_mq_bio_endio(rq, bio, error); | ||
| 303 | bio = next; | ||
| 304 | } | ||
| 305 | |||
| 306 | blk_account_io_completion(rq, bytes); | ||
| 307 | |||
| 308 | if (rq->end_io) | ||
| 309 | rq->end_io(rq, error); | ||
| 310 | else | ||
| 311 | blk_mq_free_request(rq); | ||
| 312 | |||
| 313 | blk_account_io_done(rq); | ||
| 314 | } | ||
| 315 | |||
| 316 | void __blk_mq_end_io(struct request *rq, int error) | ||
| 317 | { | ||
| 318 | if (!blk_mark_rq_complete(rq)) | ||
| 319 | blk_mq_complete_request(rq, error); | ||
| 320 | } | ||
| 321 | |||
| 322 | #if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) | ||
| 323 | |||
| 324 | /* | ||
| 325 | * Called with interrupts disabled. | ||
| 326 | */ | ||
| 327 | static void ipi_end_io(void *data) | ||
| 328 | { | ||
| 329 | struct llist_head *list = &per_cpu(ipi_lists, smp_processor_id()); | ||
| 330 | struct llist_node *entry, *next; | ||
| 331 | struct request *rq; | ||
| 332 | |||
| 333 | entry = llist_del_all(list); | ||
| 334 | |||
| 335 | while (entry) { | ||
| 336 | next = entry->next; | ||
| 337 | rq = llist_entry(entry, struct request, ll_list); | ||
| 338 | __blk_mq_end_io(rq, rq->errors); | ||
| 339 | entry = next; | ||
| 340 | } | ||
| 341 | } | ||
| 342 | |||
| 343 | static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu, | ||
| 344 | struct request *rq, const int error) | ||
| 345 | { | ||
| 346 | struct call_single_data *data = &rq->csd; | ||
| 347 | |||
| 348 | rq->errors = error; | ||
| 349 | rq->ll_list.next = NULL; | ||
| 350 | |||
| 351 | /* | ||
| 352 | * If the list is non-empty, an existing IPI must already | ||
| 353 | * be "in flight". If that is the case, we need not schedule | ||
| 354 | * a new one. | ||
| 355 | */ | ||
| 356 | if (llist_add(&rq->ll_list, &per_cpu(ipi_lists, ctx->cpu))) { | ||
| 357 | data->func = ipi_end_io; | ||
| 358 | data->flags = 0; | ||
| 359 | __smp_call_function_single(ctx->cpu, data, 0); | ||
| 360 | } | ||
| 361 | |||
| 362 | return true; | ||
| 363 | } | ||
| 364 | #else /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ | ||
| 365 | static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu, | ||
| 366 | struct request *rq, const int error) | ||
| 367 | { | ||
| 368 | return false; | ||
| 369 | } | ||
| 370 | #endif | ||
| 371 | |||
| 372 | /* | ||
| 373 | * End IO on this request on a multiqueue enabled driver. We'll either do | ||
| 374 | * it directly inline, or punt to a local IPI handler on the matching | ||
| 375 | * remote CPU. | ||
| 376 | */ | ||
| 377 | void blk_mq_end_io(struct request *rq, int error) | ||
| 378 | { | ||
| 379 | struct blk_mq_ctx *ctx = rq->mq_ctx; | ||
| 380 | int cpu; | ||
| 381 | |||
| 382 | if (!ctx->ipi_redirect) | ||
| 383 | return __blk_mq_end_io(rq, error); | ||
| 384 | |||
| 385 | cpu = get_cpu(); | ||
| 386 | |||
| 387 | if (cpu == ctx->cpu || !cpu_online(ctx->cpu) || | ||
| 388 | !ipi_remote_cpu(ctx, cpu, rq, error)) | ||
| 389 | __blk_mq_end_io(rq, error); | ||
| 390 | |||
| 391 | put_cpu(); | ||
| 392 | } | ||
| 393 | EXPORT_SYMBOL(blk_mq_end_io); | ||
| 394 | |||
| 395 | static void blk_mq_start_request(struct request *rq) | ||
| 396 | { | ||
| 397 | struct request_queue *q = rq->q; | ||
| 398 | |||
| 399 | trace_block_rq_issue(q, rq); | ||
| 400 | |||
| 401 | /* | ||
| 402 | * Just mark start time and set the started bit. Due to memory | ||
| 403 | * ordering, we know we'll see the correct deadline as long as | ||
| 404 | * REQ_ATOMIC_STARTED is seen. | ||
| 405 | */ | ||
| 406 | rq->deadline = jiffies + q->rq_timeout; | ||
| 407 | set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | ||
| 408 | } | ||
| 409 | |||
| 410 | static void blk_mq_requeue_request(struct request *rq) | ||
| 411 | { | ||
| 412 | struct request_queue *q = rq->q; | ||
| 413 | |||
| 414 | trace_block_rq_requeue(q, rq); | ||
| 415 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | ||
| 416 | } | ||
| 417 | |||
| 418 | struct blk_mq_timeout_data { | ||
| 419 | struct blk_mq_hw_ctx *hctx; | ||
| 420 | unsigned long *next; | ||
| 421 | unsigned int *next_set; | ||
| 422 | }; | ||
| 423 | |||
| 424 | static void blk_mq_timeout_check(void *__data, unsigned long *free_tags) | ||
| 425 | { | ||
| 426 | struct blk_mq_timeout_data *data = __data; | ||
| 427 | struct blk_mq_hw_ctx *hctx = data->hctx; | ||
| 428 | unsigned int tag; | ||
| 429 | |||
| 430 | /* It may not be in flight yet (this is where | ||
| 431 | * the REQ_ATOMIC_STARTED flag comes in). The requests are | ||
| 432 | * statically allocated, so we know it's always safe to access the | ||
| 433 | * memory associated with a bit offset into ->rqs[]. | ||
| 434 | */ | ||
| 435 | tag = 0; | ||
| 436 | do { | ||
| 437 | struct request *rq; | ||
| 438 | |||
| 439 | tag = find_next_zero_bit(free_tags, hctx->queue_depth, tag); | ||
| 440 | if (tag >= hctx->queue_depth) | ||
| 441 | break; | ||
| 442 | |||
| 443 | rq = hctx->rqs[tag++]; | ||
| 444 | |||
| 445 | if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) | ||
| 446 | continue; | ||
| 447 | |||
| 448 | blk_rq_check_expired(rq, data->next, data->next_set); | ||
| 449 | } while (1); | ||
| 450 | } | ||
| 451 | |||
| 452 | static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx, | ||
| 453 | unsigned long *next, | ||
| 454 | unsigned int *next_set) | ||
| 455 | { | ||
| 456 | struct blk_mq_timeout_data data = { | ||
| 457 | .hctx = hctx, | ||
| 458 | .next = next, | ||
| 459 | .next_set = next_set, | ||
| 460 | }; | ||
| 461 | |||
| 462 | /* | ||
| 463 | * Ask the tagging code to iterate busy requests, so we can | ||
| 464 | * check them for timeout. | ||
| 465 | */ | ||
| 466 | blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data); | ||
| 467 | } | ||
| 468 | |||
| 469 | static void blk_mq_rq_timer(unsigned long data) | ||
| 470 | { | ||
| 471 | struct request_queue *q = (struct request_queue *) data; | ||
| 472 | struct blk_mq_hw_ctx *hctx; | ||
| 473 | unsigned long next = 0; | ||
| 474 | int i, next_set = 0; | ||
| 475 | |||
| 476 | queue_for_each_hw_ctx(q, hctx, i) | ||
| 477 | blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set); | ||
| 478 | |||
| 479 | if (next_set) | ||
| 480 | mod_timer(&q->timeout, round_jiffies_up(next)); | ||
| 481 | } | ||
| 482 | |||
| 483 | /* | ||
| 484 | * Reverse check our software queue for entries that we could potentially | ||
| 485 | * merge with. Currently includes a hand-wavy stop count of 8, to not spend | ||
| 486 | * too much time checking for merges. | ||
| 487 | */ | ||
| 488 | static bool blk_mq_attempt_merge(struct request_queue *q, | ||
| 489 | struct blk_mq_ctx *ctx, struct bio *bio) | ||
| 490 | { | ||
| 491 | struct request *rq; | ||
| 492 | int checked = 8; | ||
| 493 | |||
| 494 | list_for_each_entry_reverse(rq, &ctx->rq_list, queuelist) { | ||
| 495 | int el_ret; | ||
| 496 | |||
| 497 | if (!checked--) | ||
| 498 | break; | ||
| 499 | |||
| 500 | if (!blk_rq_merge_ok(rq, bio)) | ||
| 501 | continue; | ||
| 502 | |||
| 503 | el_ret = blk_try_merge(rq, bio); | ||
| 504 | if (el_ret == ELEVATOR_BACK_MERGE) { | ||
| 505 | if (bio_attempt_back_merge(q, rq, bio)) { | ||
| 506 | ctx->rq_merged++; | ||
| 507 | return true; | ||
| 508 | } | ||
| 509 | break; | ||
| 510 | } else if (el_ret == ELEVATOR_FRONT_MERGE) { | ||
| 511 | if (bio_attempt_front_merge(q, rq, bio)) { | ||
| 512 | ctx->rq_merged++; | ||
| 513 | return true; | ||
| 514 | } | ||
| 515 | break; | ||
| 516 | } | ||
| 517 | } | ||
| 518 | |||
| 519 | return false; | ||
| 520 | } | ||
| 521 | |||
| 522 | void blk_mq_add_timer(struct request *rq) | ||
| 523 | { | ||
| 524 | __blk_add_timer(rq, NULL); | ||
| 525 | } | ||
| 526 | |||
| 527 | /* | ||
| 528 | * Run this hardware queue, pulling any software queues mapped to it in. | ||
| 529 | * Note that this function currently has various problems around ordering | ||
| 530 | * of IO. In particular, we'd like FIFO behaviour on handling existing | ||
| 531 | * items on the hctx->dispatch list. Ignore that for now. | ||
| 532 | */ | ||
| 533 | static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | ||
| 534 | { | ||
| 535 | struct request_queue *q = hctx->queue; | ||
| 536 | struct blk_mq_ctx *ctx; | ||
| 537 | struct request *rq; | ||
| 538 | LIST_HEAD(rq_list); | ||
| 539 | int bit, queued; | ||
| 540 | |||
| 541 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->flags))) | ||
| 542 | return; | ||
| 543 | |||
| 544 | hctx->run++; | ||
| 545 | |||
| 546 | /* | ||
| 547 | * Touch any software queue that has pending entries. | ||
| 548 | */ | ||
| 549 | for_each_set_bit(bit, hctx->ctx_map, hctx->nr_ctx) { | ||
| 550 | clear_bit(bit, hctx->ctx_map); | ||
| 551 | ctx = hctx->ctxs[bit]; | ||
| 552 | BUG_ON(bit != ctx->index_hw); | ||
| 553 | |||
| 554 | spin_lock(&ctx->lock); | ||
| 555 | list_splice_tail_init(&ctx->rq_list, &rq_list); | ||
| 556 | spin_unlock(&ctx->lock); | ||
| 557 | } | ||
| 558 | |||
| 559 | /* | ||
| 560 | * If we have previous entries on our dispatch list, grab them | ||
| 561 | * and stuff them at the front for more fair dispatch. | ||
| 562 | */ | ||
| 563 | if (!list_empty_careful(&hctx->dispatch)) { | ||
| 564 | spin_lock(&hctx->lock); | ||
| 565 | if (!list_empty(&hctx->dispatch)) | ||
| 566 | list_splice_init(&hctx->dispatch, &rq_list); | ||
| 567 | spin_unlock(&hctx->lock); | ||
| 568 | } | ||
| 569 | |||
| 570 | /* | ||
| 571 | * Delete and return all entries from our dispatch list | ||
| 572 | */ | ||
| 573 | queued = 0; | ||
| 574 | |||
| 575 | /* | ||
| 576 | * Now process all the entries, sending them to the driver. | ||
| 577 | */ | ||
| 578 | while (!list_empty(&rq_list)) { | ||
| 579 | int ret; | ||
| 580 | |||
| 581 | rq = list_first_entry(&rq_list, struct request, queuelist); | ||
| 582 | list_del_init(&rq->queuelist); | ||
| 583 | blk_mq_start_request(rq); | ||
| 584 | |||
| 585 | /* | ||
| 586 | * Last request in the series. Flag it as such, this | ||
| 587 | * enables drivers to know when IO should be kicked off, | ||
| 588 | * if they don't do it on a per-request basis. | ||
| 589 | * | ||
| 590 | * Note: the flag isn't the only condition drivers | ||
| 591 | * should do kick off. If drive is busy, the last | ||
| 592 | * request might not have the bit set. | ||
| 593 | */ | ||
| 594 | if (list_empty(&rq_list)) | ||
| 595 | rq->cmd_flags |= REQ_END; | ||
| 596 | |||
| 597 | ret = q->mq_ops->queue_rq(hctx, rq); | ||
| 598 | switch (ret) { | ||
| 599 | case BLK_MQ_RQ_QUEUE_OK: | ||
| 600 | queued++; | ||
| 601 | continue; | ||
| 602 | case BLK_MQ_RQ_QUEUE_BUSY: | ||
| 603 | /* | ||
| 604 | * FIXME: we should have a mechanism to stop the queue | ||
| 605 | * like blk_stop_queue, otherwise we will waste cpu | ||
| 606 | * time | ||
| 607 | */ | ||
| 608 | list_add(&rq->queuelist, &rq_list); | ||
| 609 | blk_mq_requeue_request(rq); | ||
| 610 | break; | ||
| 611 | default: | ||
| 612 | pr_err("blk-mq: bad return on queue: %d\n", ret); | ||
| 613 | rq->errors = -EIO; | ||
| 614 | case BLK_MQ_RQ_QUEUE_ERROR: | ||
| 615 | blk_mq_end_io(rq, rq->errors); | ||
| 616 | break; | ||
| 617 | } | ||
| 618 | |||
| 619 | if (ret == BLK_MQ_RQ_QUEUE_BUSY) | ||
| 620 | break; | ||
| 621 | } | ||
| 622 | |||
| 623 | if (!queued) | ||
| 624 | hctx->dispatched[0]++; | ||
| 625 | else if (queued < (1 << (BLK_MQ_MAX_DISPATCH_ORDER - 1))) | ||
| 626 | hctx->dispatched[ilog2(queued) + 1]++; | ||
| 627 | |||
| 628 | /* | ||
| 629 | * Any items that need requeuing? Stuff them into hctx->dispatch, | ||
| 630 | * that is where we will continue on next queue run. | ||
| 631 | */ | ||
| 632 | if (!list_empty(&rq_list)) { | ||
| 633 | spin_lock(&hctx->lock); | ||
| 634 | list_splice(&rq_list, &hctx->dispatch); | ||
| 635 | spin_unlock(&hctx->lock); | ||
| 636 | } | ||
| 637 | } | ||
| 638 | |||
| 639 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | ||
| 640 | { | ||
| 641 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->flags))) | ||
| 642 | return; | ||
| 643 | |||
| 644 | if (!async) | ||
| 645 | __blk_mq_run_hw_queue(hctx); | ||
| 646 | else { | ||
| 647 | struct request_queue *q = hctx->queue; | ||
| 648 | |||
| 649 | kblockd_schedule_delayed_work(q, &hctx->delayed_work, 0); | ||
| 650 | } | ||
| 651 | } | ||
| 652 | |||
| 653 | void blk_mq_run_queues(struct request_queue *q, bool async) | ||
| 654 | { | ||
| 655 | struct blk_mq_hw_ctx *hctx; | ||
| 656 | int i; | ||
| 657 | |||
| 658 | queue_for_each_hw_ctx(q, hctx, i) { | ||
| 659 | if ((!blk_mq_hctx_has_pending(hctx) && | ||
| 660 | list_empty_careful(&hctx->dispatch)) || | ||
| 661 | test_bit(BLK_MQ_S_STOPPED, &hctx->flags)) | ||
| 662 | continue; | ||
| 663 | |||
| 664 | blk_mq_run_hw_queue(hctx, async); | ||
| 665 | } | ||
| 666 | } | ||
| 667 | EXPORT_SYMBOL(blk_mq_run_queues); | ||
| 668 | |||
| 669 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx) | ||
| 670 | { | ||
| 671 | cancel_delayed_work(&hctx->delayed_work); | ||
| 672 | set_bit(BLK_MQ_S_STOPPED, &hctx->state); | ||
| 673 | } | ||
| 674 | EXPORT_SYMBOL(blk_mq_stop_hw_queue); | ||
| 675 | |||
| 676 | void blk_mq_stop_hw_queues(struct request_queue *q) | ||
| 677 | { | ||
| 678 | struct blk_mq_hw_ctx *hctx; | ||
| 679 | int i; | ||
| 680 | |||
| 681 | queue_for_each_hw_ctx(q, hctx, i) | ||
| 682 | blk_mq_stop_hw_queue(hctx); | ||
| 683 | } | ||
| 684 | EXPORT_SYMBOL(blk_mq_stop_hw_queues); | ||
| 685 | |||
| 686 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) | ||
| 687 | { | ||
| 688 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); | ||
| 689 | __blk_mq_run_hw_queue(hctx); | ||
| 690 | } | ||
| 691 | EXPORT_SYMBOL(blk_mq_start_hw_queue); | ||
| 692 | |||
| 693 | void blk_mq_start_stopped_hw_queues(struct request_queue *q) | ||
| 694 | { | ||
| 695 | struct blk_mq_hw_ctx *hctx; | ||
| 696 | int i; | ||
| 697 | |||
| 698 | queue_for_each_hw_ctx(q, hctx, i) { | ||
| 699 | if (!test_bit(BLK_MQ_S_STOPPED, &hctx->state)) | ||
| 700 | continue; | ||
| 701 | |||
| 702 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); | ||
| 703 | blk_mq_run_hw_queue(hctx, true); | ||
| 704 | } | ||
| 705 | } | ||
| 706 | EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); | ||
| 707 | |||
| 708 | static void blk_mq_work_fn(struct work_struct *work) | ||
| 709 | { | ||
| 710 | struct blk_mq_hw_ctx *hctx; | ||
| 711 | |||
| 712 | hctx = container_of(work, struct blk_mq_hw_ctx, delayed_work.work); | ||
| 713 | __blk_mq_run_hw_queue(hctx); | ||
| 714 | } | ||
| 715 | |||
| 716 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | ||
| 717 | struct request *rq) | ||
| 718 | { | ||
| 719 | struct blk_mq_ctx *ctx = rq->mq_ctx; | ||
| 720 | |||
| 721 | list_add_tail(&rq->queuelist, &ctx->rq_list); | ||
| 722 | blk_mq_hctx_mark_pending(hctx, ctx); | ||
| 723 | |||
| 724 | /* | ||
| 725 | * We do this early, to ensure we are on the right CPU. | ||
| 726 | */ | ||
| 727 | blk_mq_add_timer(rq); | ||
| 728 | } | ||
| 729 | |||
| 730 | void blk_mq_insert_request(struct request_queue *q, struct request *rq, | ||
| 731 | bool run_queue) | ||
| 732 | { | ||
| 733 | struct blk_mq_hw_ctx *hctx; | ||
| 734 | struct blk_mq_ctx *ctx, *current_ctx; | ||
| 735 | |||
| 736 | ctx = rq->mq_ctx; | ||
| 737 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
| 738 | |||
| 739 | if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) { | ||
| 740 | blk_insert_flush(rq); | ||
| 741 | } else { | ||
| 742 | current_ctx = blk_mq_get_ctx(q); | ||
| 743 | |||
| 744 | if (!cpu_online(ctx->cpu)) { | ||
| 745 | ctx = current_ctx; | ||
| 746 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
| 747 | rq->mq_ctx = ctx; | ||
| 748 | } | ||
| 749 | spin_lock(&ctx->lock); | ||
| 750 | __blk_mq_insert_request(hctx, rq); | ||
| 751 | spin_unlock(&ctx->lock); | ||
| 752 | |||
| 753 | blk_mq_put_ctx(current_ctx); | ||
| 754 | } | ||
| 755 | |||
| 756 | if (run_queue) | ||
| 757 | __blk_mq_run_hw_queue(hctx); | ||
| 758 | } | ||
| 759 | EXPORT_SYMBOL(blk_mq_insert_request); | ||
| 760 | |||
| 761 | /* | ||
| 762 | * This is a special version of blk_mq_insert_request to bypass FLUSH request | ||
| 763 | * check. Should only be used internally. | ||
| 764 | */ | ||
| 765 | void blk_mq_run_request(struct request *rq, bool run_queue, bool async) | ||
| 766 | { | ||
| 767 | struct request_queue *q = rq->q; | ||
| 768 | struct blk_mq_hw_ctx *hctx; | ||
| 769 | struct blk_mq_ctx *ctx, *current_ctx; | ||
| 770 | |||
| 771 | current_ctx = blk_mq_get_ctx(q); | ||
| 772 | |||
| 773 | ctx = rq->mq_ctx; | ||
| 774 | if (!cpu_online(ctx->cpu)) { | ||
| 775 | ctx = current_ctx; | ||
| 776 | rq->mq_ctx = ctx; | ||
| 777 | } | ||
| 778 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
| 779 | |||
| 780 | /* ctx->cpu might be offline */ | ||
| 781 | spin_lock(&ctx->lock); | ||
| 782 | __blk_mq_insert_request(hctx, rq); | ||
| 783 | spin_unlock(&ctx->lock); | ||
| 784 | |||
| 785 | blk_mq_put_ctx(current_ctx); | ||
| 786 | |||
| 787 | if (run_queue) | ||
| 788 | blk_mq_run_hw_queue(hctx, async); | ||
| 789 | } | ||
| 790 | |||
| 791 | static void blk_mq_insert_requests(struct request_queue *q, | ||
| 792 | struct blk_mq_ctx *ctx, | ||
| 793 | struct list_head *list, | ||
| 794 | int depth, | ||
| 795 | bool from_schedule) | ||
| 796 | |||
| 797 | { | ||
| 798 | struct blk_mq_hw_ctx *hctx; | ||
| 799 | struct blk_mq_ctx *current_ctx; | ||
| 800 | |||
| 801 | trace_block_unplug(q, depth, !from_schedule); | ||
| 802 | |||
| 803 | current_ctx = blk_mq_get_ctx(q); | ||
| 804 | |||
| 805 | if (!cpu_online(ctx->cpu)) | ||
| 806 | ctx = current_ctx; | ||
| 807 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
| 808 | |||
| 809 | /* | ||
| 810 | * preemption doesn't flush plug list, so it's possible ctx->cpu is | ||
| 811 | * offline now | ||
| 812 | */ | ||
| 813 | spin_lock(&ctx->lock); | ||
| 814 | while (!list_empty(list)) { | ||
| 815 | struct request *rq; | ||
| 816 | |||
| 817 | rq = list_first_entry(list, struct request, queuelist); | ||
| 818 | list_del_init(&rq->queuelist); | ||
| 819 | rq->mq_ctx = ctx; | ||
| 820 | __blk_mq_insert_request(hctx, rq); | ||
| 821 | } | ||
| 822 | spin_unlock(&ctx->lock); | ||
| 823 | |||
| 824 | blk_mq_put_ctx(current_ctx); | ||
| 825 | |||
| 826 | blk_mq_run_hw_queue(hctx, from_schedule); | ||
| 827 | } | ||
| 828 | |||
| 829 | static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
| 830 | { | ||
| 831 | struct request *rqa = container_of(a, struct request, queuelist); | ||
| 832 | struct request *rqb = container_of(b, struct request, queuelist); | ||
| 833 | |||
| 834 | return !(rqa->mq_ctx < rqb->mq_ctx || | ||
| 835 | (rqa->mq_ctx == rqb->mq_ctx && | ||
| 836 | blk_rq_pos(rqa) < blk_rq_pos(rqb))); | ||
| 837 | } | ||
| 838 | |||
| 839 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) | ||
| 840 | { | ||
| 841 | struct blk_mq_ctx *this_ctx; | ||
| 842 | struct request_queue *this_q; | ||
| 843 | struct request *rq; | ||
| 844 | LIST_HEAD(list); | ||
| 845 | LIST_HEAD(ctx_list); | ||
| 846 | unsigned int depth; | ||
| 847 | |||
| 848 | list_splice_init(&plug->mq_list, &list); | ||
| 849 | |||
| 850 | list_sort(NULL, &list, plug_ctx_cmp); | ||
| 851 | |||
| 852 | this_q = NULL; | ||
| 853 | this_ctx = NULL; | ||
| 854 | depth = 0; | ||
| 855 | |||
| 856 | while (!list_empty(&list)) { | ||
| 857 | rq = list_entry_rq(list.next); | ||
| 858 | list_del_init(&rq->queuelist); | ||
| 859 | BUG_ON(!rq->q); | ||
| 860 | if (rq->mq_ctx != this_ctx) { | ||
| 861 | if (this_ctx) { | ||
| 862 | blk_mq_insert_requests(this_q, this_ctx, | ||
| 863 | &ctx_list, depth, | ||
| 864 | from_schedule); | ||
| 865 | } | ||
| 866 | |||
| 867 | this_ctx = rq->mq_ctx; | ||
| 868 | this_q = rq->q; | ||
| 869 | depth = 0; | ||
| 870 | } | ||
| 871 | |||
| 872 | depth++; | ||
| 873 | list_add_tail(&rq->queuelist, &ctx_list); | ||
| 874 | } | ||
| 875 | |||
| 876 | /* | ||
| 877 | * If 'this_ctx' is set, we know we have entries to complete | ||
| 878 | * on 'ctx_list'. Do those. | ||
| 879 | */ | ||
| 880 | if (this_ctx) { | ||
| 881 | blk_mq_insert_requests(this_q, this_ctx, &ctx_list, depth, | ||
| 882 | from_schedule); | ||
| 883 | } | ||
| 884 | } | ||
| 885 | |||
| 886 | static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) | ||
| 887 | { | ||
| 888 | init_request_from_bio(rq, bio); | ||
| 889 | blk_account_io_start(rq, 1); | ||
| 890 | } | ||
| 891 | |||
| 892 | static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | ||
| 893 | { | ||
| 894 | struct blk_mq_hw_ctx *hctx; | ||
| 895 | struct blk_mq_ctx *ctx; | ||
| 896 | const int is_sync = rw_is_sync(bio->bi_rw); | ||
| 897 | const int is_flush_fua = bio->bi_rw & (REQ_FLUSH | REQ_FUA); | ||
| 898 | int rw = bio_data_dir(bio); | ||
| 899 | struct request *rq; | ||
| 900 | unsigned int use_plug, request_count = 0; | ||
| 901 | |||
| 902 | /* | ||
| 903 | * If we have multiple hardware queues, just go directly to | ||
| 904 | * one of those for sync IO. | ||
| 905 | */ | ||
| 906 | use_plug = !is_flush_fua && ((q->nr_hw_queues == 1) || !is_sync); | ||
| 907 | |||
| 908 | blk_queue_bounce(q, &bio); | ||
| 909 | |||
| 910 | if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) | ||
| 911 | return; | ||
| 912 | |||
| 913 | if (blk_mq_queue_enter(q)) { | ||
| 914 | bio_endio(bio, -EIO); | ||
| 915 | return; | ||
| 916 | } | ||
| 917 | |||
| 918 | ctx = blk_mq_get_ctx(q); | ||
| 919 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
| 920 | |||
| 921 | trace_block_getrq(q, bio, rw); | ||
| 922 | rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); | ||
| 923 | if (likely(rq)) | ||
| 924 | blk_mq_rq_ctx_init(ctx, rq, rw); | ||
| 925 | else { | ||
| 926 | blk_mq_put_ctx(ctx); | ||
| 927 | trace_block_sleeprq(q, bio, rw); | ||
| 928 | rq = blk_mq_alloc_request_pinned(q, rw, __GFP_WAIT|GFP_ATOMIC, | ||
| 929 | false); | ||
| 930 | ctx = rq->mq_ctx; | ||
| 931 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
| 932 | } | ||
| 933 | |||
| 934 | hctx->queued++; | ||
| 935 | |||
| 936 | if (unlikely(is_flush_fua)) { | ||
| 937 | blk_mq_bio_to_request(rq, bio); | ||
| 938 | blk_mq_put_ctx(ctx); | ||
| 939 | blk_insert_flush(rq); | ||
| 940 | goto run_queue; | ||
| 941 | } | ||
| 942 | |||
| 943 | /* | ||
| 944 | * A task plug currently exists. Since this is completely lockless, | ||
| 945 | * utilize that to temporarily store requests until the task is | ||
| 946 | * either done or scheduled away. | ||
| 947 | */ | ||
| 948 | if (use_plug) { | ||
| 949 | struct blk_plug *plug = current->plug; | ||
| 950 | |||
| 951 | if (plug) { | ||
| 952 | blk_mq_bio_to_request(rq, bio); | ||
| 953 | if (list_empty(&plug->mq_list)) | ||
| 954 | trace_block_plug(q); | ||
| 955 | else if (request_count >= BLK_MAX_REQUEST_COUNT) { | ||
| 956 | blk_flush_plug_list(plug, false); | ||
| 957 | trace_block_plug(q); | ||
| 958 | } | ||
| 959 | list_add_tail(&rq->queuelist, &plug->mq_list); | ||
| 960 | blk_mq_put_ctx(ctx); | ||
| 961 | return; | ||
| 962 | } | ||
| 963 | } | ||
| 964 | |||
| 965 | spin_lock(&ctx->lock); | ||
| 966 | |||
| 967 | if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) && | ||
| 968 | blk_mq_attempt_merge(q, ctx, bio)) | ||
| 969 | __blk_mq_free_request(hctx, ctx, rq); | ||
| 970 | else { | ||
| 971 | blk_mq_bio_to_request(rq, bio); | ||
| 972 | __blk_mq_insert_request(hctx, rq); | ||
| 973 | } | ||
| 974 | |||
| 975 | spin_unlock(&ctx->lock); | ||
| 976 | blk_mq_put_ctx(ctx); | ||
| 977 | |||
| 978 | /* | ||
| 979 | * For a SYNC request, send it to the hardware immediately. For an | ||
| 980 | * ASYNC request, just ensure that we run it later on. The latter | ||
| 981 | * allows for merging opportunities and more efficient dispatching. | ||
| 982 | */ | ||
| 983 | run_queue: | ||
| 984 | blk_mq_run_hw_queue(hctx, !is_sync || is_flush_fua); | ||
| 985 | } | ||
| 986 | |||
| 987 | /* | ||
| 988 | * Default mapping to a software queue, since we use one per CPU. | ||
| 989 | */ | ||
| 990 | struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu) | ||
| 991 | { | ||
| 992 | return q->queue_hw_ctx[q->mq_map[cpu]]; | ||
| 993 | } | ||
| 994 | EXPORT_SYMBOL(blk_mq_map_queue); | ||
| 995 | |||
| 996 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *reg, | ||
| 997 | unsigned int hctx_index) | ||
| 998 | { | ||
| 999 | return kmalloc_node(sizeof(struct blk_mq_hw_ctx), | ||
| 1000 | GFP_KERNEL | __GFP_ZERO, reg->numa_node); | ||
| 1001 | } | ||
| 1002 | EXPORT_SYMBOL(blk_mq_alloc_single_hw_queue); | ||
| 1003 | |||
| 1004 | void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *hctx, | ||
| 1005 | unsigned int hctx_index) | ||
| 1006 | { | ||
| 1007 | kfree(hctx); | ||
| 1008 | } | ||
| 1009 | EXPORT_SYMBOL(blk_mq_free_single_hw_queue); | ||
| 1010 | |||
| 1011 | static void blk_mq_hctx_notify(void *data, unsigned long action, | ||
| 1012 | unsigned int cpu) | ||
| 1013 | { | ||
| 1014 | struct blk_mq_hw_ctx *hctx = data; | ||
| 1015 | struct blk_mq_ctx *ctx; | ||
| 1016 | LIST_HEAD(tmp); | ||
| 1017 | |||
| 1018 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) | ||
| 1019 | return; | ||
| 1020 | |||
| 1021 | /* | ||
| 1022 | * Move ctx entries to new CPU, if this one is going away. | ||
| 1023 | */ | ||
| 1024 | ctx = __blk_mq_get_ctx(hctx->queue, cpu); | ||
| 1025 | |||
| 1026 | spin_lock(&ctx->lock); | ||
| 1027 | if (!list_empty(&ctx->rq_list)) { | ||
| 1028 | list_splice_init(&ctx->rq_list, &tmp); | ||
| 1029 | clear_bit(ctx->index_hw, hctx->ctx_map); | ||
| 1030 | } | ||
| 1031 | spin_unlock(&ctx->lock); | ||
| 1032 | |||
| 1033 | if (list_empty(&tmp)) | ||
| 1034 | return; | ||
| 1035 | |||
| 1036 | ctx = blk_mq_get_ctx(hctx->queue); | ||
| 1037 | spin_lock(&ctx->lock); | ||
| 1038 | |||
| 1039 | while (!list_empty(&tmp)) { | ||
| 1040 | struct request *rq; | ||
| 1041 | |||
| 1042 | rq = list_first_entry(&tmp, struct request, queuelist); | ||
| 1043 | rq->mq_ctx = ctx; | ||
| 1044 | list_move_tail(&rq->queuelist, &ctx->rq_list); | ||
| 1045 | } | ||
| 1046 | |||
| 1047 | blk_mq_hctx_mark_pending(hctx, ctx); | ||
| 1048 | |||
| 1049 | spin_unlock(&ctx->lock); | ||
| 1050 | blk_mq_put_ctx(ctx); | ||
| 1051 | } | ||
| 1052 | |||
| 1053 | static void blk_mq_init_hw_commands(struct blk_mq_hw_ctx *hctx, | ||
| 1054 | void (*init)(void *, struct blk_mq_hw_ctx *, | ||
| 1055 | struct request *, unsigned int), | ||
| 1056 | void *data) | ||
| 1057 | { | ||
| 1058 | unsigned int i; | ||
| 1059 | |||
| 1060 | for (i = 0; i < hctx->queue_depth; i++) { | ||
| 1061 | struct request *rq = hctx->rqs[i]; | ||
| 1062 | |||
| 1063 | init(data, hctx, rq, i); | ||
| 1064 | } | ||
| 1065 | } | ||
| 1066 | |||
| 1067 | void blk_mq_init_commands(struct request_queue *q, | ||
| 1068 | void (*init)(void *, struct blk_mq_hw_ctx *, | ||
| 1069 | struct request *, unsigned int), | ||
| 1070 | void *data) | ||
| 1071 | { | ||
| 1072 | struct blk_mq_hw_ctx *hctx; | ||
| 1073 | unsigned int i; | ||
| 1074 | |||
| 1075 | queue_for_each_hw_ctx(q, hctx, i) | ||
| 1076 | blk_mq_init_hw_commands(hctx, init, data); | ||
| 1077 | } | ||
| 1078 | EXPORT_SYMBOL(blk_mq_init_commands); | ||
| 1079 | |||
| 1080 | static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx) | ||
| 1081 | { | ||
| 1082 | struct page *page; | ||
| 1083 | |||
| 1084 | while (!list_empty(&hctx->page_list)) { | ||
| 1085 | page = list_first_entry(&hctx->page_list, struct page, list); | ||
| 1086 | list_del_init(&page->list); | ||
| 1087 | __free_pages(page, page->private); | ||
| 1088 | } | ||
| 1089 | |||
| 1090 | kfree(hctx->rqs); | ||
| 1091 | |||
| 1092 | if (hctx->tags) | ||
| 1093 | blk_mq_free_tags(hctx->tags); | ||
| 1094 | } | ||
| 1095 | |||
| 1096 | static size_t order_to_size(unsigned int order) | ||
| 1097 | { | ||
| 1098 | size_t ret = PAGE_SIZE; | ||
| 1099 | |||
| 1100 | while (order--) | ||
| 1101 | ret *= 2; | ||
| 1102 | |||
| 1103 | return ret; | ||
| 1104 | } | ||
| 1105 | |||
| 1106 | static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx, | ||
| 1107 | unsigned int reserved_tags, int node) | ||
| 1108 | { | ||
| 1109 | unsigned int i, j, entries_per_page, max_order = 4; | ||
| 1110 | size_t rq_size, left; | ||
| 1111 | |||
| 1112 | INIT_LIST_HEAD(&hctx->page_list); | ||
| 1113 | |||
| 1114 | hctx->rqs = kmalloc_node(hctx->queue_depth * sizeof(struct request *), | ||
| 1115 | GFP_KERNEL, node); | ||
| 1116 | if (!hctx->rqs) | ||
| 1117 | return -ENOMEM; | ||
| 1118 | |||
| 1119 | /* | ||
| 1120 | * rq_size is the size of the request plus driver payload, rounded | ||
| 1121 | * to the cacheline size | ||
| 1122 | */ | ||
| 1123 | rq_size = round_up(sizeof(struct request) + hctx->cmd_size, | ||
| 1124 | cache_line_size()); | ||
| 1125 | left = rq_size * hctx->queue_depth; | ||
| 1126 | |||
| 1127 | for (i = 0; i < hctx->queue_depth;) { | ||
| 1128 | int this_order = max_order; | ||
| 1129 | struct page *page; | ||
| 1130 | int to_do; | ||
| 1131 | void *p; | ||
| 1132 | |||
| 1133 | while (left < order_to_size(this_order - 1) && this_order) | ||
| 1134 | this_order--; | ||
| 1135 | |||
| 1136 | do { | ||
| 1137 | page = alloc_pages_node(node, GFP_KERNEL, this_order); | ||
| 1138 | if (page) | ||
| 1139 | break; | ||
| 1140 | if (!this_order--) | ||
| 1141 | break; | ||
| 1142 | if (order_to_size(this_order) < rq_size) | ||
| 1143 | break; | ||
| 1144 | } while (1); | ||
| 1145 | |||
| 1146 | if (!page) | ||
| 1147 | break; | ||
| 1148 | |||
| 1149 | page->private = this_order; | ||
| 1150 | list_add_tail(&page->list, &hctx->page_list); | ||
| 1151 | |||
| 1152 | p = page_address(page); | ||
| 1153 | entries_per_page = order_to_size(this_order) / rq_size; | ||
| 1154 | to_do = min(entries_per_page, hctx->queue_depth - i); | ||
| 1155 | left -= to_do * rq_size; | ||
| 1156 | for (j = 0; j < to_do; j++) { | ||
| 1157 | hctx->rqs[i] = p; | ||
| 1158 | blk_mq_rq_init(hctx, hctx->rqs[i]); | ||
| 1159 | p += rq_size; | ||
| 1160 | i++; | ||
| 1161 | } | ||
| 1162 | } | ||
| 1163 | |||
| 1164 | if (i < (reserved_tags + BLK_MQ_TAG_MIN)) | ||
| 1165 | goto err_rq_map; | ||
| 1166 | else if (i != hctx->queue_depth) { | ||
| 1167 | hctx->queue_depth = i; | ||
| 1168 | pr_warn("%s: queue depth set to %u because of low memory\n", | ||
| 1169 | __func__, i); | ||
| 1170 | } | ||
| 1171 | |||
| 1172 | hctx->tags = blk_mq_init_tags(hctx->queue_depth, reserved_tags, node); | ||
| 1173 | if (!hctx->tags) { | ||
| 1174 | err_rq_map: | ||
| 1175 | blk_mq_free_rq_map(hctx); | ||
| 1176 | return -ENOMEM; | ||
| 1177 | } | ||
| 1178 | |||
| 1179 | return 0; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | static int blk_mq_init_hw_queues(struct request_queue *q, | ||
| 1183 | struct blk_mq_reg *reg, void *driver_data) | ||
| 1184 | { | ||
| 1185 | struct blk_mq_hw_ctx *hctx; | ||
| 1186 | unsigned int i, j; | ||
| 1187 | |||
| 1188 | /* | ||
| 1189 | * Initialize hardware queues | ||
| 1190 | */ | ||
| 1191 | queue_for_each_hw_ctx(q, hctx, i) { | ||
| 1192 | unsigned int num_maps; | ||
| 1193 | int node; | ||
| 1194 | |||
| 1195 | node = hctx->numa_node; | ||
| 1196 | if (node == NUMA_NO_NODE) | ||
| 1197 | node = hctx->numa_node = reg->numa_node; | ||
| 1198 | |||
| 1199 | INIT_DELAYED_WORK(&hctx->delayed_work, blk_mq_work_fn); | ||
| 1200 | spin_lock_init(&hctx->lock); | ||
| 1201 | INIT_LIST_HEAD(&hctx->dispatch); | ||
| 1202 | hctx->queue = q; | ||
| 1203 | hctx->queue_num = i; | ||
| 1204 | hctx->flags = reg->flags; | ||
| 1205 | hctx->queue_depth = reg->queue_depth; | ||
| 1206 | hctx->cmd_size = reg->cmd_size; | ||
| 1207 | |||
| 1208 | blk_mq_init_cpu_notifier(&hctx->cpu_notifier, | ||
| 1209 | blk_mq_hctx_notify, hctx); | ||
| 1210 | blk_mq_register_cpu_notifier(&hctx->cpu_notifier); | ||
| 1211 | |||
| 1212 | if (blk_mq_init_rq_map(hctx, reg->reserved_tags, node)) | ||
| 1213 | break; | ||
| 1214 | |||
| 1215 | /* | ||
| 1216 | * Allocate space for all possible cpus to avoid allocation in | ||
| 1217 | * runtime | ||
| 1218 | */ | ||
| 1219 | hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *), | ||
| 1220 | GFP_KERNEL, node); | ||
| 1221 | if (!hctx->ctxs) | ||
| 1222 | break; | ||
| 1223 | |||
| 1224 | num_maps = ALIGN(nr_cpu_ids, BITS_PER_LONG) / BITS_PER_LONG; | ||
| 1225 | hctx->ctx_map = kzalloc_node(num_maps * sizeof(unsigned long), | ||
| 1226 | GFP_KERNEL, node); | ||
| 1227 | if (!hctx->ctx_map) | ||
| 1228 | break; | ||
| 1229 | |||
| 1230 | hctx->nr_ctx_map = num_maps; | ||
| 1231 | hctx->nr_ctx = 0; | ||
| 1232 | |||
| 1233 | if (reg->ops->init_hctx && | ||
| 1234 | reg->ops->init_hctx(hctx, driver_data, i)) | ||
| 1235 | break; | ||
| 1236 | } | ||
| 1237 | |||
| 1238 | if (i == q->nr_hw_queues) | ||
| 1239 | return 0; | ||
| 1240 | |||
| 1241 | /* | ||
| 1242 | * Init failed | ||
| 1243 | */ | ||
| 1244 | queue_for_each_hw_ctx(q, hctx, j) { | ||
| 1245 | if (i == j) | ||
| 1246 | break; | ||
| 1247 | |||
| 1248 | if (reg->ops->exit_hctx) | ||
| 1249 | reg->ops->exit_hctx(hctx, j); | ||
| 1250 | |||
| 1251 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); | ||
| 1252 | blk_mq_free_rq_map(hctx); | ||
| 1253 | kfree(hctx->ctxs); | ||
| 1254 | } | ||
| 1255 | |||
| 1256 | return 1; | ||
| 1257 | } | ||
| 1258 | |||
| 1259 | static void blk_mq_init_cpu_queues(struct request_queue *q, | ||
| 1260 | unsigned int nr_hw_queues) | ||
| 1261 | { | ||
| 1262 | unsigned int i; | ||
| 1263 | |||
| 1264 | for_each_possible_cpu(i) { | ||
| 1265 | struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); | ||
| 1266 | struct blk_mq_hw_ctx *hctx; | ||
| 1267 | |||
| 1268 | memset(__ctx, 0, sizeof(*__ctx)); | ||
| 1269 | __ctx->cpu = i; | ||
| 1270 | spin_lock_init(&__ctx->lock); | ||
| 1271 | INIT_LIST_HEAD(&__ctx->rq_list); | ||
| 1272 | __ctx->queue = q; | ||
| 1273 | |||
| 1274 | /* If the cpu isn't online, the cpu is mapped to first hctx */ | ||
| 1275 | hctx = q->mq_ops->map_queue(q, i); | ||
| 1276 | hctx->nr_ctx++; | ||
| 1277 | |||
| 1278 | if (!cpu_online(i)) | ||
| 1279 | continue; | ||
| 1280 | |||
| 1281 | /* | ||
| 1282 | * Set local node, IFF we have more than one hw queue. If | ||
| 1283 | * not, we remain on the home node of the device | ||
| 1284 | */ | ||
| 1285 | if (nr_hw_queues > 1 && hctx->numa_node == NUMA_NO_NODE) | ||
| 1286 | hctx->numa_node = cpu_to_node(i); | ||
| 1287 | } | ||
| 1288 | } | ||
| 1289 | |||
| 1290 | static void blk_mq_map_swqueue(struct request_queue *q) | ||
| 1291 | { | ||
| 1292 | unsigned int i; | ||
| 1293 | struct blk_mq_hw_ctx *hctx; | ||
| 1294 | struct blk_mq_ctx *ctx; | ||
| 1295 | |||
| 1296 | queue_for_each_hw_ctx(q, hctx, i) { | ||
| 1297 | hctx->nr_ctx = 0; | ||
| 1298 | } | ||
| 1299 | |||
| 1300 | /* | ||
| 1301 | * Map software to hardware queues | ||
| 1302 | */ | ||
| 1303 | queue_for_each_ctx(q, ctx, i) { | ||
| 1304 | /* If the cpu isn't online, the cpu is mapped to first hctx */ | ||
| 1305 | hctx = q->mq_ops->map_queue(q, i); | ||
| 1306 | ctx->index_hw = hctx->nr_ctx; | ||
| 1307 | hctx->ctxs[hctx->nr_ctx++] = ctx; | ||
| 1308 | } | ||
| 1309 | } | ||
| 1310 | |||
| 1311 | struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | ||
| 1312 | void *driver_data) | ||
| 1313 | { | ||
| 1314 | struct blk_mq_hw_ctx **hctxs; | ||
| 1315 | struct blk_mq_ctx *ctx; | ||
| 1316 | struct request_queue *q; | ||
| 1317 | int i; | ||
| 1318 | |||
| 1319 | if (!reg->nr_hw_queues || | ||
| 1320 | !reg->ops->queue_rq || !reg->ops->map_queue || | ||
| 1321 | !reg->ops->alloc_hctx || !reg->ops->free_hctx) | ||
| 1322 | return ERR_PTR(-EINVAL); | ||
| 1323 | |||
| 1324 | if (!reg->queue_depth) | ||
| 1325 | reg->queue_depth = BLK_MQ_MAX_DEPTH; | ||
| 1326 | else if (reg->queue_depth > BLK_MQ_MAX_DEPTH) { | ||
| 1327 | pr_err("blk-mq: queuedepth too large (%u)\n", reg->queue_depth); | ||
| 1328 | reg->queue_depth = BLK_MQ_MAX_DEPTH; | ||
| 1329 | } | ||
| 1330 | |||
| 1331 | /* | ||
| 1332 | * Set aside a tag for flush requests. It will only be used while | ||
| 1333 | * another flush request is in progress but outside the driver. | ||
| 1334 | * | ||
| 1335 | * TODO: only allocate if flushes are supported | ||
| 1336 | */ | ||
| 1337 | reg->queue_depth++; | ||
| 1338 | reg->reserved_tags++; | ||
| 1339 | |||
| 1340 | if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) | ||
| 1341 | return ERR_PTR(-EINVAL); | ||
| 1342 | |||
| 1343 | ctx = alloc_percpu(struct blk_mq_ctx); | ||
| 1344 | if (!ctx) | ||
| 1345 | return ERR_PTR(-ENOMEM); | ||
| 1346 | |||
| 1347 | hctxs = kmalloc_node(reg->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, | ||
| 1348 | reg->numa_node); | ||
| 1349 | |||
| 1350 | if (!hctxs) | ||
| 1351 | goto err_percpu; | ||
| 1352 | |||
| 1353 | for (i = 0; i < reg->nr_hw_queues; i++) { | ||
| 1354 | hctxs[i] = reg->ops->alloc_hctx(reg, i); | ||
| 1355 | if (!hctxs[i]) | ||
| 1356 | goto err_hctxs; | ||
| 1357 | |||
| 1358 | hctxs[i]->numa_node = NUMA_NO_NODE; | ||
| 1359 | hctxs[i]->queue_num = i; | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | q = blk_alloc_queue_node(GFP_KERNEL, reg->numa_node); | ||
| 1363 | if (!q) | ||
| 1364 | goto err_hctxs; | ||
| 1365 | |||
| 1366 | q->mq_map = blk_mq_make_queue_map(reg); | ||
| 1367 | if (!q->mq_map) | ||
| 1368 | goto err_map; | ||
| 1369 | |||
| 1370 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); | ||
| 1371 | blk_queue_rq_timeout(q, 30000); | ||
| 1372 | |||
| 1373 | q->nr_queues = nr_cpu_ids; | ||
| 1374 | q->nr_hw_queues = reg->nr_hw_queues; | ||
| 1375 | |||
| 1376 | q->queue_ctx = ctx; | ||
| 1377 | q->queue_hw_ctx = hctxs; | ||
| 1378 | |||
| 1379 | q->mq_ops = reg->ops; | ||
| 1380 | |||
| 1381 | blk_queue_make_request(q, blk_mq_make_request); | ||
| 1382 | blk_queue_rq_timed_out(q, reg->ops->timeout); | ||
| 1383 | if (reg->timeout) | ||
| 1384 | blk_queue_rq_timeout(q, reg->timeout); | ||
| 1385 | |||
| 1386 | blk_mq_init_flush(q); | ||
| 1387 | blk_mq_init_cpu_queues(q, reg->nr_hw_queues); | ||
| 1388 | |||
| 1389 | if (blk_mq_init_hw_queues(q, reg, driver_data)) | ||
| 1390 | goto err_hw; | ||
| 1391 | |||
| 1392 | blk_mq_map_swqueue(q); | ||
| 1393 | |||
| 1394 | mutex_lock(&all_q_mutex); | ||
| 1395 | list_add_tail(&q->all_q_node, &all_q_list); | ||
| 1396 | mutex_unlock(&all_q_mutex); | ||
| 1397 | |||
| 1398 | return q; | ||
| 1399 | err_hw: | ||
| 1400 | kfree(q->mq_map); | ||
| 1401 | err_map: | ||
| 1402 | blk_cleanup_queue(q); | ||
| 1403 | err_hctxs: | ||
| 1404 | for (i = 0; i < reg->nr_hw_queues; i++) { | ||
| 1405 | if (!hctxs[i]) | ||
| 1406 | break; | ||
| 1407 | reg->ops->free_hctx(hctxs[i], i); | ||
| 1408 | } | ||
| 1409 | kfree(hctxs); | ||
| 1410 | err_percpu: | ||
| 1411 | free_percpu(ctx); | ||
| 1412 | return ERR_PTR(-ENOMEM); | ||
| 1413 | } | ||
| 1414 | EXPORT_SYMBOL(blk_mq_init_queue); | ||
| 1415 | |||
| 1416 | void blk_mq_free_queue(struct request_queue *q) | ||
| 1417 | { | ||
| 1418 | struct blk_mq_hw_ctx *hctx; | ||
| 1419 | int i; | ||
| 1420 | |||
| 1421 | queue_for_each_hw_ctx(q, hctx, i) { | ||
| 1422 | cancel_delayed_work_sync(&hctx->delayed_work); | ||
| 1423 | kfree(hctx->ctx_map); | ||
| 1424 | kfree(hctx->ctxs); | ||
| 1425 | blk_mq_free_rq_map(hctx); | ||
| 1426 | blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); | ||
| 1427 | if (q->mq_ops->exit_hctx) | ||
| 1428 | q->mq_ops->exit_hctx(hctx, i); | ||
| 1429 | q->mq_ops->free_hctx(hctx, i); | ||
| 1430 | } | ||
| 1431 | |||
| 1432 | free_percpu(q->queue_ctx); | ||
| 1433 | kfree(q->queue_hw_ctx); | ||
| 1434 | kfree(q->mq_map); | ||
| 1435 | |||
| 1436 | q->queue_ctx = NULL; | ||
| 1437 | q->queue_hw_ctx = NULL; | ||
| 1438 | q->mq_map = NULL; | ||
| 1439 | |||
| 1440 | mutex_lock(&all_q_mutex); | ||
| 1441 | list_del_init(&q->all_q_node); | ||
| 1442 | mutex_unlock(&all_q_mutex); | ||
| 1443 | } | ||
| 1444 | EXPORT_SYMBOL(blk_mq_free_queue); | ||
| 1445 | |||
| 1446 | /* Basically redo blk_mq_init_queue with queue frozen */ | ||
| 1447 | static void __cpuinit blk_mq_queue_reinit(struct request_queue *q) | ||
| 1448 | { | ||
| 1449 | blk_mq_freeze_queue(q); | ||
| 1450 | |||
| 1451 | blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues); | ||
| 1452 | |||
| 1453 | /* | ||
| 1454 | * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe | ||
| 1455 | * we should change hctx numa_node according to new topology (this | ||
| 1456 | * involves free and re-allocate memory, worthy doing?) | ||
| 1457 | */ | ||
| 1458 | |||
| 1459 | blk_mq_map_swqueue(q); | ||
| 1460 | |||
| 1461 | blk_mq_unfreeze_queue(q); | ||
| 1462 | } | ||
| 1463 | |||
| 1464 | static int __cpuinit blk_mq_queue_reinit_notify(struct notifier_block *nb, | ||
| 1465 | unsigned long action, void *hcpu) | ||
| 1466 | { | ||
| 1467 | struct request_queue *q; | ||
| 1468 | |||
| 1469 | /* | ||
| 1470 | * Before new mapping is established, hotadded cpu might already start | ||
| 1471 | * handling requests. This doesn't break anything as we map offline | ||
| 1472 | * CPUs to first hardware queue. We will re-init queue below to get | ||
| 1473 | * optimal settings. | ||
| 1474 | */ | ||
| 1475 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && | ||
| 1476 | action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) | ||
| 1477 | return NOTIFY_OK; | ||
| 1478 | |||
| 1479 | mutex_lock(&all_q_mutex); | ||
| 1480 | list_for_each_entry(q, &all_q_list, all_q_node) | ||
| 1481 | blk_mq_queue_reinit(q); | ||
| 1482 | mutex_unlock(&all_q_mutex); | ||
| 1483 | return NOTIFY_OK; | ||
| 1484 | } | ||
| 1485 | |||
| 1486 | static int __init blk_mq_init(void) | ||
| 1487 | { | ||
| 1488 | unsigned int i; | ||
| 1489 | |||
| 1490 | for_each_possible_cpu(i) | ||
| 1491 | init_llist_head(&per_cpu(ipi_lists, i)); | ||
| 1492 | |||
| 1493 | blk_mq_cpu_init(); | ||
| 1494 | |||
| 1495 | /* Must be called after percpu_counter_hotcpu_callback() */ | ||
| 1496 | hotcpu_notifier(blk_mq_queue_reinit_notify, -10); | ||
| 1497 | |||
| 1498 | return 0; | ||
| 1499 | } | ||
| 1500 | subsys_initcall(blk_mq_init); | ||
diff --git a/block/blk-mq.h b/block/blk-mq.h new file mode 100644 index 000000000000..52bf1f96a2c2 --- /dev/null +++ b/block/blk-mq.h | |||
| @@ -0,0 +1,52 @@ | |||
| 1 | #ifndef INT_BLK_MQ_H | ||
| 2 | #define INT_BLK_MQ_H | ||
| 3 | |||
| 4 | struct blk_mq_ctx { | ||
| 5 | struct { | ||
| 6 | spinlock_t lock; | ||
| 7 | struct list_head rq_list; | ||
| 8 | } ____cacheline_aligned_in_smp; | ||
| 9 | |||
| 10 | unsigned int cpu; | ||
| 11 | unsigned int index_hw; | ||
| 12 | unsigned int ipi_redirect; | ||
| 13 | |||
| 14 | /* incremented at dispatch time */ | ||
| 15 | unsigned long rq_dispatched[2]; | ||
| 16 | unsigned long rq_merged; | ||
| 17 | |||
| 18 | /* incremented at completion time */ | ||
| 19 | unsigned long ____cacheline_aligned_in_smp rq_completed[2]; | ||
| 20 | |||
| 21 | struct request_queue *queue; | ||
| 22 | struct kobject kobj; | ||
| 23 | }; | ||
| 24 | |||
| 25 | void __blk_mq_end_io(struct request *rq, int error); | ||
| 26 | void blk_mq_complete_request(struct request *rq, int error); | ||
| 27 | void blk_mq_run_request(struct request *rq, bool run_queue, bool async); | ||
| 28 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); | ||
| 29 | void blk_mq_init_flush(struct request_queue *q); | ||
| 30 | |||
| 31 | /* | ||
| 32 | * CPU hotplug helpers | ||
| 33 | */ | ||
| 34 | struct blk_mq_cpu_notifier; | ||
| 35 | void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, | ||
| 36 | void (*fn)(void *, unsigned long, unsigned int), | ||
| 37 | void *data); | ||
| 38 | void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); | ||
| 39 | void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); | ||
| 40 | void blk_mq_cpu_init(void); | ||
| 41 | DECLARE_PER_CPU(struct llist_head, ipi_lists); | ||
| 42 | |||
| 43 | /* | ||
| 44 | * CPU -> queue mappings | ||
| 45 | */ | ||
| 46 | struct blk_mq_reg; | ||
| 47 | extern unsigned int *blk_mq_make_queue_map(struct blk_mq_reg *reg); | ||
| 48 | extern int blk_mq_update_queue_map(unsigned int *map, unsigned int nr_queues); | ||
| 49 | |||
| 50 | void blk_mq_add_timer(struct request *rq); | ||
| 51 | |||
| 52 | #endif | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 3aa5b195f4dd..4f8c4d90ec73 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <linux/bio.h> | 7 | #include <linux/bio.h> |
| 8 | #include <linux/blkdev.h> | 8 | #include <linux/blkdev.h> |
| 9 | #include <linux/blktrace_api.h> | 9 | #include <linux/blktrace_api.h> |
| 10 | #include <linux/blk-mq.h> | ||
| 10 | 11 | ||
| 11 | #include "blk.h" | 12 | #include "blk.h" |
| 12 | #include "blk-cgroup.h" | 13 | #include "blk-cgroup.h" |
| @@ -542,6 +543,11 @@ static void blk_release_queue(struct kobject *kobj) | |||
| 542 | if (q->queue_tags) | 543 | if (q->queue_tags) |
| 543 | __blk_queue_free_tags(q); | 544 | __blk_queue_free_tags(q); |
| 544 | 545 | ||
| 546 | percpu_counter_destroy(&q->mq_usage_counter); | ||
| 547 | |||
| 548 | if (q->mq_ops) | ||
| 549 | blk_mq_free_queue(q); | ||
| 550 | |||
| 545 | blk_trace_shutdown(q); | 551 | blk_trace_shutdown(q); |
| 546 | 552 | ||
| 547 | bdi_destroy(&q->backing_dev_info); | 553 | bdi_destroy(&q->backing_dev_info); |
| @@ -575,6 +581,7 @@ int blk_register_queue(struct gendisk *disk) | |||
| 575 | * bypass from queue allocation. | 581 | * bypass from queue allocation. |
| 576 | */ | 582 | */ |
| 577 | blk_queue_bypass_end(q); | 583 | blk_queue_bypass_end(q); |
| 584 | queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); | ||
| 578 | 585 | ||
| 579 | ret = blk_trace_init_sysfs(dev); | 586 | ret = blk_trace_init_sysfs(dev); |
| 580 | if (ret) | 587 | if (ret) |
| @@ -588,6 +595,9 @@ int blk_register_queue(struct gendisk *disk) | |||
| 588 | 595 | ||
| 589 | kobject_uevent(&q->kobj, KOBJ_ADD); | 596 | kobject_uevent(&q->kobj, KOBJ_ADD); |
| 590 | 597 | ||
| 598 | if (q->mq_ops) | ||
| 599 | blk_mq_register_disk(disk); | ||
| 600 | |||
| 591 | if (!q->request_fn) | 601 | if (!q->request_fn) |
| 592 | return 0; | 602 | return 0; |
| 593 | 603 | ||
| @@ -610,6 +620,9 @@ void blk_unregister_queue(struct gendisk *disk) | |||
| 610 | if (WARN_ON(!q)) | 620 | if (WARN_ON(!q)) |
| 611 | return; | 621 | return; |
| 612 | 622 | ||
| 623 | if (q->mq_ops) | ||
| 624 | blk_mq_unregister_disk(disk); | ||
| 625 | |||
| 613 | if (q->request_fn) | 626 | if (q->request_fn) |
| 614 | elv_unregister_queue(q); | 627 | elv_unregister_queue(q); |
| 615 | 628 | ||
diff --git a/block/blk-timeout.c b/block/blk-timeout.c index abf725c655fc..bba81c9348e1 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c | |||
| @@ -7,6 +7,7 @@ | |||
| 7 | #include <linux/fault-inject.h> | 7 | #include <linux/fault-inject.h> |
| 8 | 8 | ||
| 9 | #include "blk.h" | 9 | #include "blk.h" |
| 10 | #include "blk-mq.h" | ||
| 10 | 11 | ||
| 11 | #ifdef CONFIG_FAIL_IO_TIMEOUT | 12 | #ifdef CONFIG_FAIL_IO_TIMEOUT |
| 12 | 13 | ||
| @@ -88,10 +89,18 @@ static void blk_rq_timed_out(struct request *req) | |||
| 88 | ret = q->rq_timed_out_fn(req); | 89 | ret = q->rq_timed_out_fn(req); |
| 89 | switch (ret) { | 90 | switch (ret) { |
| 90 | case BLK_EH_HANDLED: | 91 | case BLK_EH_HANDLED: |
| 91 | __blk_complete_request(req); | 92 | /* Can we use req->errors here? */ |
| 93 | if (q->mq_ops) | ||
| 94 | blk_mq_complete_request(req, req->errors); | ||
| 95 | else | ||
| 96 | __blk_complete_request(req); | ||
| 92 | break; | 97 | break; |
| 93 | case BLK_EH_RESET_TIMER: | 98 | case BLK_EH_RESET_TIMER: |
| 94 | blk_add_timer(req); | 99 | if (q->mq_ops) |
| 100 | blk_mq_add_timer(req); | ||
| 101 | else | ||
| 102 | blk_add_timer(req); | ||
| 103 | |||
| 95 | blk_clear_rq_complete(req); | 104 | blk_clear_rq_complete(req); |
| 96 | break; | 105 | break; |
| 97 | case BLK_EH_NOT_HANDLED: | 106 | case BLK_EH_NOT_HANDLED: |
| @@ -108,6 +117,23 @@ static void blk_rq_timed_out(struct request *req) | |||
| 108 | } | 117 | } |
| 109 | } | 118 | } |
| 110 | 119 | ||
| 120 | void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, | ||
| 121 | unsigned int *next_set) | ||
| 122 | { | ||
| 123 | if (time_after_eq(jiffies, rq->deadline)) { | ||
| 124 | list_del_init(&rq->timeout_list); | ||
| 125 | |||
| 126 | /* | ||
| 127 | * Check if we raced with end io completion | ||
| 128 | */ | ||
| 129 | if (!blk_mark_rq_complete(rq)) | ||
| 130 | blk_rq_timed_out(rq); | ||
| 131 | } else if (!*next_set || time_after(*next_timeout, rq->deadline)) { | ||
| 132 | *next_timeout = rq->deadline; | ||
| 133 | *next_set = 1; | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 111 | void blk_rq_timed_out_timer(unsigned long data) | 137 | void blk_rq_timed_out_timer(unsigned long data) |
| 112 | { | 138 | { |
| 113 | struct request_queue *q = (struct request_queue *) data; | 139 | struct request_queue *q = (struct request_queue *) data; |
| @@ -117,21 +143,8 @@ void blk_rq_timed_out_timer(unsigned long data) | |||
| 117 | 143 | ||
| 118 | spin_lock_irqsave(q->queue_lock, flags); | 144 | spin_lock_irqsave(q->queue_lock, flags); |
| 119 | 145 | ||
| 120 | list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) { | 146 | list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) |
| 121 | if (time_after_eq(jiffies, rq->deadline)) { | 147 | blk_rq_check_expired(rq, &next, &next_set); |
| 122 | list_del_init(&rq->timeout_list); | ||
| 123 | |||
| 124 | /* | ||
| 125 | * Check if we raced with end io completion | ||
| 126 | */ | ||
| 127 | if (blk_mark_rq_complete(rq)) | ||
| 128 | continue; | ||
| 129 | blk_rq_timed_out(rq); | ||
| 130 | } else if (!next_set || time_after(next, rq->deadline)) { | ||
| 131 | next = rq->deadline; | ||
| 132 | next_set = 1; | ||
| 133 | } | ||
| 134 | } | ||
| 135 | 148 | ||
| 136 | if (next_set) | 149 | if (next_set) |
| 137 | mod_timer(&q->timeout, round_jiffies_up(next)); | 150 | mod_timer(&q->timeout, round_jiffies_up(next)); |
| @@ -157,15 +170,7 @@ void blk_abort_request(struct request *req) | |||
| 157 | } | 170 | } |
| 158 | EXPORT_SYMBOL_GPL(blk_abort_request); | 171 | EXPORT_SYMBOL_GPL(blk_abort_request); |
| 159 | 172 | ||
| 160 | /** | 173 | void __blk_add_timer(struct request *req, struct list_head *timeout_list) |
| 161 | * blk_add_timer - Start timeout timer for a single request | ||
| 162 | * @req: request that is about to start running. | ||
| 163 | * | ||
| 164 | * Notes: | ||
| 165 | * Each request has its own timer, and as it is added to the queue, we | ||
| 166 | * set up the timer. When the request completes, we cancel the timer. | ||
| 167 | */ | ||
| 168 | void blk_add_timer(struct request *req) | ||
| 169 | { | 174 | { |
| 170 | struct request_queue *q = req->q; | 175 | struct request_queue *q = req->q; |
| 171 | unsigned long expiry; | 176 | unsigned long expiry; |
| @@ -183,7 +188,8 @@ void blk_add_timer(struct request *req) | |||
| 183 | req->timeout = q->rq_timeout; | 188 | req->timeout = q->rq_timeout; |
| 184 | 189 | ||
| 185 | req->deadline = jiffies + req->timeout; | 190 | req->deadline = jiffies + req->timeout; |
| 186 | list_add_tail(&req->timeout_list, &q->timeout_list); | 191 | if (timeout_list) |
| 192 | list_add_tail(&req->timeout_list, timeout_list); | ||
| 187 | 193 | ||
| 188 | /* | 194 | /* |
| 189 | * If the timer isn't already pending or this timeout is earlier | 195 | * If the timer isn't already pending or this timeout is earlier |
| @@ -195,5 +201,19 @@ void blk_add_timer(struct request *req) | |||
| 195 | if (!timer_pending(&q->timeout) || | 201 | if (!timer_pending(&q->timeout) || |
| 196 | time_before(expiry, q->timeout.expires)) | 202 | time_before(expiry, q->timeout.expires)) |
| 197 | mod_timer(&q->timeout, expiry); | 203 | mod_timer(&q->timeout, expiry); |
| 204 | |||
| 205 | } | ||
| 206 | |||
| 207 | /** | ||
| 208 | * blk_add_timer - Start timeout timer for a single request | ||
| 209 | * @req: request that is about to start running. | ||
| 210 | * | ||
| 211 | * Notes: | ||
| 212 | * Each request has its own timer, and as it is added to the queue, we | ||
| 213 | * set up the timer. When the request completes, we cancel the timer. | ||
| 214 | */ | ||
| 215 | void blk_add_timer(struct request *req) | ||
| 216 | { | ||
| 217 | __blk_add_timer(req, &req->q->timeout_list); | ||
| 198 | } | 218 | } |
| 199 | 219 | ||
diff --git a/block/blk.h b/block/blk.h index e837b8f619b7..c90e1d8f7a2b 100644 --- a/block/blk.h +++ b/block/blk.h | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | #define BLK_BATCH_REQ 32 | 10 | #define BLK_BATCH_REQ 32 |
| 11 | 11 | ||
| 12 | extern struct kmem_cache *blk_requestq_cachep; | 12 | extern struct kmem_cache *blk_requestq_cachep; |
| 13 | extern struct kmem_cache *request_cachep; | ||
| 13 | extern struct kobj_type blk_queue_ktype; | 14 | extern struct kobj_type blk_queue_ktype; |
| 14 | extern struct ida blk_queue_ida; | 15 | extern struct ida blk_queue_ida; |
| 15 | 16 | ||
| @@ -34,14 +35,30 @@ bool __blk_end_bidi_request(struct request *rq, int error, | |||
| 34 | unsigned int nr_bytes, unsigned int bidi_bytes); | 35 | unsigned int nr_bytes, unsigned int bidi_bytes); |
| 35 | 36 | ||
| 36 | void blk_rq_timed_out_timer(unsigned long data); | 37 | void blk_rq_timed_out_timer(unsigned long data); |
| 38 | void blk_rq_check_expired(struct request *rq, unsigned long *next_timeout, | ||
| 39 | unsigned int *next_set); | ||
| 40 | void __blk_add_timer(struct request *req, struct list_head *timeout_list); | ||
| 37 | void blk_delete_timer(struct request *); | 41 | void blk_delete_timer(struct request *); |
| 38 | void blk_add_timer(struct request *); | 42 | void blk_add_timer(struct request *); |
| 39 | 43 | ||
| 44 | |||
| 45 | bool bio_attempt_front_merge(struct request_queue *q, struct request *req, | ||
| 46 | struct bio *bio); | ||
| 47 | bool bio_attempt_back_merge(struct request_queue *q, struct request *req, | ||
| 48 | struct bio *bio); | ||
| 49 | bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, | ||
| 50 | unsigned int *request_count); | ||
| 51 | |||
| 52 | void blk_account_io_start(struct request *req, bool new_io); | ||
| 53 | void blk_account_io_completion(struct request *req, unsigned int bytes); | ||
| 54 | void blk_account_io_done(struct request *req); | ||
| 55 | |||
| 40 | /* | 56 | /* |
| 41 | * Internal atomic flags for request handling | 57 | * Internal atomic flags for request handling |
| 42 | */ | 58 | */ |
| 43 | enum rq_atomic_flags { | 59 | enum rq_atomic_flags { |
| 44 | REQ_ATOM_COMPLETE = 0, | 60 | REQ_ATOM_COMPLETE = 0, |
| 61 | REQ_ATOM_STARTED, | ||
| 45 | }; | 62 | }; |
| 46 | 63 | ||
| 47 | /* | 64 | /* |
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index e07a5fd58ad7..4682546c5da7 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig | |||
| @@ -15,6 +15,9 @@ menuconfig BLK_DEV | |||
| 15 | 15 | ||
| 16 | if BLK_DEV | 16 | if BLK_DEV |
| 17 | 17 | ||
| 18 | config BLK_DEV_NULL_BLK | ||
| 19 | tristate "Null test block driver" | ||
| 20 | |||
| 18 | config BLK_DEV_FD | 21 | config BLK_DEV_FD |
| 19 | tristate "Normal floppy disk support" | 22 | tristate "Normal floppy disk support" |
| 20 | depends on ARCH_MAY_HAVE_PC_FDC | 23 | depends on ARCH_MAY_HAVE_PC_FDC |
diff --git a/drivers/block/Makefile b/drivers/block/Makefile index ca07399a8d99..03b3b4a2bd8a 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile | |||
| @@ -41,6 +41,7 @@ obj-$(CONFIG_BLK_DEV_RBD) += rbd.o | |||
| 41 | obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ | 41 | obj-$(CONFIG_BLK_DEV_PCIESSD_MTIP32XX) += mtip32xx/ |
| 42 | 42 | ||
| 43 | obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ | 43 | obj-$(CONFIG_BLK_DEV_RSXX) += rsxx/ |
| 44 | obj-$(CONFIG_BLK_DEV_NULL_BLK) += null_blk.o | ||
| 44 | 45 | ||
| 45 | nvme-y := nvme-core.o nvme-scsi.o | 46 | nvme-y := nvme-core.o nvme-scsi.o |
| 46 | swim_mod-y := swim.o swim_asm.o | 47 | swim_mod-y := swim.o swim_asm.o |
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index 04ceb7e2fadd..000abe2f105c 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c | |||
| @@ -2886,9 +2886,9 @@ static void do_fd_request(struct request_queue *q) | |||
| 2886 | return; | 2886 | return; |
| 2887 | 2887 | ||
| 2888 | if (WARN(atomic_read(&usage_count) == 0, | 2888 | if (WARN(atomic_read(&usage_count) == 0, |
| 2889 | "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%x\n", | 2889 | "warning: usage count=0, current_req=%p sect=%ld type=%x flags=%llx\n", |
| 2890 | current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, | 2890 | current_req, (long)blk_rq_pos(current_req), current_req->cmd_type, |
| 2891 | current_req->cmd_flags)) | 2891 | (unsigned long long) current_req->cmd_flags)) |
| 2892 | return; | 2892 | return; |
| 2893 | 2893 | ||
| 2894 | if (test_and_set_bit(0, &fdc_busy)) { | 2894 | if (test_and_set_bit(0, &fdc_busy)) { |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c new file mode 100644 index 000000000000..b5d842370cc9 --- /dev/null +++ b/drivers/block/null_blk.c | |||
| @@ -0,0 +1,635 @@ | |||
| 1 | #include <linux/module.h> | ||
| 2 | #include <linux/moduleparam.h> | ||
| 3 | #include <linux/sched.h> | ||
| 4 | #include <linux/fs.h> | ||
| 5 | #include <linux/blkdev.h> | ||
| 6 | #include <linux/init.h> | ||
| 7 | #include <linux/slab.h> | ||
| 8 | #include <linux/blk-mq.h> | ||
| 9 | #include <linux/hrtimer.h> | ||
| 10 | |||
| 11 | struct nullb_cmd { | ||
| 12 | struct list_head list; | ||
| 13 | struct llist_node ll_list; | ||
| 14 | struct call_single_data csd; | ||
| 15 | struct request *rq; | ||
| 16 | struct bio *bio; | ||
| 17 | unsigned int tag; | ||
| 18 | struct nullb_queue *nq; | ||
| 19 | }; | ||
| 20 | |||
| 21 | struct nullb_queue { | ||
| 22 | unsigned long *tag_map; | ||
| 23 | wait_queue_head_t wait; | ||
| 24 | unsigned int queue_depth; | ||
| 25 | |||
| 26 | struct nullb_cmd *cmds; | ||
| 27 | }; | ||
| 28 | |||
| 29 | struct nullb { | ||
| 30 | struct list_head list; | ||
| 31 | unsigned int index; | ||
| 32 | struct request_queue *q; | ||
| 33 | struct gendisk *disk; | ||
| 34 | struct hrtimer timer; | ||
| 35 | unsigned int queue_depth; | ||
| 36 | spinlock_t lock; | ||
| 37 | |||
| 38 | struct nullb_queue *queues; | ||
| 39 | unsigned int nr_queues; | ||
| 40 | }; | ||
| 41 | |||
| 42 | static LIST_HEAD(nullb_list); | ||
| 43 | static struct mutex lock; | ||
| 44 | static int null_major; | ||
| 45 | static int nullb_indexes; | ||
| 46 | |||
| 47 | struct completion_queue { | ||
| 48 | struct llist_head list; | ||
| 49 | struct hrtimer timer; | ||
| 50 | }; | ||
| 51 | |||
| 52 | /* | ||
| 53 | * These are per-cpu for now, they will need to be configured by the | ||
| 54 | * complete_queues parameter and appropriately mapped. | ||
| 55 | */ | ||
| 56 | static DEFINE_PER_CPU(struct completion_queue, completion_queues); | ||
| 57 | |||
| 58 | enum { | ||
| 59 | NULL_IRQ_NONE = 0, | ||
| 60 | NULL_IRQ_SOFTIRQ = 1, | ||
| 61 | NULL_IRQ_TIMER = 2, | ||
| 62 | |||
| 63 | NULL_Q_BIO = 0, | ||
| 64 | NULL_Q_RQ = 1, | ||
| 65 | NULL_Q_MQ = 2, | ||
| 66 | }; | ||
| 67 | |||
| 68 | static int submit_queues = 1; | ||
| 69 | module_param(submit_queues, int, S_IRUGO); | ||
| 70 | MODULE_PARM_DESC(submit_queues, "Number of submission queues"); | ||
| 71 | |||
| 72 | static int home_node = NUMA_NO_NODE; | ||
| 73 | module_param(home_node, int, S_IRUGO); | ||
| 74 | MODULE_PARM_DESC(home_node, "Home node for the device"); | ||
| 75 | |||
| 76 | static int queue_mode = NULL_Q_MQ; | ||
| 77 | module_param(queue_mode, int, S_IRUGO); | ||
| 78 | MODULE_PARM_DESC(use_mq, "Use blk-mq interface (0=bio,1=rq,2=multiqueue)"); | ||
| 79 | |||
| 80 | static int gb = 250; | ||
| 81 | module_param(gb, int, S_IRUGO); | ||
| 82 | MODULE_PARM_DESC(gb, "Size in GB"); | ||
| 83 | |||
| 84 | static int bs = 512; | ||
| 85 | module_param(bs, int, S_IRUGO); | ||
| 86 | MODULE_PARM_DESC(bs, "Block size (in bytes)"); | ||
| 87 | |||
| 88 | static int nr_devices = 2; | ||
| 89 | module_param(nr_devices, int, S_IRUGO); | ||
| 90 | MODULE_PARM_DESC(nr_devices, "Number of devices to register"); | ||
| 91 | |||
| 92 | static int irqmode = NULL_IRQ_SOFTIRQ; | ||
| 93 | module_param(irqmode, int, S_IRUGO); | ||
| 94 | MODULE_PARM_DESC(irqmode, "IRQ completion handler. 0-none, 1-softirq, 2-timer"); | ||
| 95 | |||
| 96 | static int completion_nsec = 10000; | ||
| 97 | module_param(completion_nsec, int, S_IRUGO); | ||
| 98 | MODULE_PARM_DESC(completion_nsec, "Time in ns to complete a request in hardware. Default: 10,000ns"); | ||
| 99 | |||
| 100 | static int hw_queue_depth = 64; | ||
| 101 | module_param(hw_queue_depth, int, S_IRUGO); | ||
| 102 | MODULE_PARM_DESC(hw_queue_depth, "Queue depth for each hardware queue. Default: 64"); | ||
| 103 | |||
| 104 | static bool use_per_node_hctx = true; | ||
| 105 | module_param(use_per_node_hctx, bool, S_IRUGO); | ||
| 106 | MODULE_PARM_DESC(use_per_node_hctx, "Use per-node allocation for hardware context queues. Default: true"); | ||
| 107 | |||
| 108 | static void put_tag(struct nullb_queue *nq, unsigned int tag) | ||
| 109 | { | ||
| 110 | clear_bit_unlock(tag, nq->tag_map); | ||
| 111 | |||
| 112 | if (waitqueue_active(&nq->wait)) | ||
| 113 | wake_up(&nq->wait); | ||
| 114 | } | ||
| 115 | |||
| 116 | static unsigned int get_tag(struct nullb_queue *nq) | ||
| 117 | { | ||
| 118 | unsigned int tag; | ||
| 119 | |||
| 120 | do { | ||
| 121 | tag = find_first_zero_bit(nq->tag_map, nq->queue_depth); | ||
| 122 | if (tag >= nq->queue_depth) | ||
| 123 | return -1U; | ||
| 124 | } while (test_and_set_bit_lock(tag, nq->tag_map)); | ||
| 125 | |||
| 126 | return tag; | ||
| 127 | } | ||
| 128 | |||
| 129 | static void free_cmd(struct nullb_cmd *cmd) | ||
| 130 | { | ||
| 131 | put_tag(cmd->nq, cmd->tag); | ||
| 132 | } | ||
| 133 | |||
| 134 | static struct nullb_cmd *__alloc_cmd(struct nullb_queue *nq) | ||
| 135 | { | ||
| 136 | struct nullb_cmd *cmd; | ||
| 137 | unsigned int tag; | ||
| 138 | |||
| 139 | tag = get_tag(nq); | ||
| 140 | if (tag != -1U) { | ||
| 141 | cmd = &nq->cmds[tag]; | ||
| 142 | cmd->tag = tag; | ||
| 143 | cmd->nq = nq; | ||
| 144 | return cmd; | ||
| 145 | } | ||
| 146 | |||
| 147 | return NULL; | ||
| 148 | } | ||
| 149 | |||
| 150 | static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) | ||
| 151 | { | ||
| 152 | struct nullb_cmd *cmd; | ||
| 153 | DEFINE_WAIT(wait); | ||
| 154 | |||
| 155 | cmd = __alloc_cmd(nq); | ||
| 156 | if (cmd || !can_wait) | ||
| 157 | return cmd; | ||
| 158 | |||
| 159 | do { | ||
| 160 | prepare_to_wait(&nq->wait, &wait, TASK_UNINTERRUPTIBLE); | ||
| 161 | cmd = __alloc_cmd(nq); | ||
| 162 | if (cmd) | ||
| 163 | break; | ||
| 164 | |||
| 165 | io_schedule(); | ||
| 166 | } while (1); | ||
| 167 | |||
| 168 | finish_wait(&nq->wait, &wait); | ||
| 169 | return cmd; | ||
| 170 | } | ||
| 171 | |||
| 172 | static void end_cmd(struct nullb_cmd *cmd) | ||
| 173 | { | ||
| 174 | if (cmd->rq) { | ||
| 175 | if (queue_mode == NULL_Q_MQ) | ||
| 176 | blk_mq_end_io(cmd->rq, 0); | ||
| 177 | else { | ||
| 178 | INIT_LIST_HEAD(&cmd->rq->queuelist); | ||
| 179 | blk_end_request_all(cmd->rq, 0); | ||
| 180 | } | ||
| 181 | } else if (cmd->bio) | ||
| 182 | bio_endio(cmd->bio, 0); | ||
| 183 | |||
| 184 | if (queue_mode != NULL_Q_MQ) | ||
| 185 | free_cmd(cmd); | ||
| 186 | } | ||
| 187 | |||
| 188 | static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) | ||
| 189 | { | ||
| 190 | struct completion_queue *cq; | ||
| 191 | struct llist_node *entry; | ||
| 192 | struct nullb_cmd *cmd; | ||
| 193 | |||
| 194 | cq = &per_cpu(completion_queues, smp_processor_id()); | ||
| 195 | |||
| 196 | while ((entry = llist_del_all(&cq->list)) != NULL) { | ||
| 197 | do { | ||
| 198 | cmd = container_of(entry, struct nullb_cmd, ll_list); | ||
| 199 | end_cmd(cmd); | ||
| 200 | entry = entry->next; | ||
| 201 | } while (entry); | ||
| 202 | } | ||
| 203 | |||
| 204 | return HRTIMER_NORESTART; | ||
| 205 | } | ||
| 206 | |||
| 207 | static void null_cmd_end_timer(struct nullb_cmd *cmd) | ||
| 208 | { | ||
| 209 | struct completion_queue *cq = &per_cpu(completion_queues, get_cpu()); | ||
| 210 | |||
| 211 | cmd->ll_list.next = NULL; | ||
| 212 | if (llist_add(&cmd->ll_list, &cq->list)) { | ||
| 213 | ktime_t kt = ktime_set(0, completion_nsec); | ||
| 214 | |||
| 215 | hrtimer_start(&cq->timer, kt, HRTIMER_MODE_REL); | ||
| 216 | } | ||
| 217 | |||
| 218 | put_cpu(); | ||
| 219 | } | ||
| 220 | |||
| 221 | static void null_softirq_done_fn(struct request *rq) | ||
| 222 | { | ||
| 223 | blk_end_request_all(rq, 0); | ||
| 224 | } | ||
| 225 | |||
| 226 | #if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) | ||
| 227 | |||
| 228 | static void null_ipi_cmd_end_io(void *data) | ||
| 229 | { | ||
| 230 | struct completion_queue *cq; | ||
| 231 | struct llist_node *entry, *next; | ||
| 232 | struct nullb_cmd *cmd; | ||
| 233 | |||
| 234 | cq = &per_cpu(completion_queues, smp_processor_id()); | ||
| 235 | |||
| 236 | entry = llist_del_all(&cq->list); | ||
| 237 | |||
| 238 | while (entry) { | ||
| 239 | next = entry->next; | ||
| 240 | cmd = llist_entry(entry, struct nullb_cmd, ll_list); | ||
| 241 | end_cmd(cmd); | ||
| 242 | entry = next; | ||
| 243 | } | ||
| 244 | } | ||
| 245 | |||
| 246 | static void null_cmd_end_ipi(struct nullb_cmd *cmd) | ||
| 247 | { | ||
| 248 | struct call_single_data *data = &cmd->csd; | ||
| 249 | int cpu = get_cpu(); | ||
| 250 | struct completion_queue *cq = &per_cpu(completion_queues, cpu); | ||
| 251 | |||
| 252 | cmd->ll_list.next = NULL; | ||
| 253 | |||
| 254 | if (llist_add(&cmd->ll_list, &cq->list)) { | ||
| 255 | data->func = null_ipi_cmd_end_io; | ||
| 256 | data->flags = 0; | ||
| 257 | __smp_call_function_single(cpu, data, 0); | ||
| 258 | } | ||
| 259 | |||
| 260 | put_cpu(); | ||
| 261 | } | ||
| 262 | |||
| 263 | #endif /* CONFIG_SMP && CONFIG_USE_GENERIC_SMP_HELPERS */ | ||
| 264 | |||
| 265 | static inline void null_handle_cmd(struct nullb_cmd *cmd) | ||
| 266 | { | ||
| 267 | /* Complete IO by inline, softirq or timer */ | ||
| 268 | switch (irqmode) { | ||
| 269 | case NULL_IRQ_NONE: | ||
| 270 | end_cmd(cmd); | ||
| 271 | break; | ||
| 272 | case NULL_IRQ_SOFTIRQ: | ||
| 273 | #if defined(CONFIG_SMP) && defined(CONFIG_USE_GENERIC_SMP_HELPERS) | ||
| 274 | null_cmd_end_ipi(cmd); | ||
| 275 | #else | ||
| 276 | end_cmd(cmd); | ||
| 277 | #endif | ||
| 278 | break; | ||
| 279 | case NULL_IRQ_TIMER: | ||
| 280 | null_cmd_end_timer(cmd); | ||
| 281 | break; | ||
| 282 | } | ||
| 283 | } | ||
| 284 | |||
| 285 | static struct nullb_queue *nullb_to_queue(struct nullb *nullb) | ||
| 286 | { | ||
| 287 | int index = 0; | ||
| 288 | |||
| 289 | if (nullb->nr_queues != 1) | ||
| 290 | index = raw_smp_processor_id() / ((nr_cpu_ids + nullb->nr_queues - 1) / nullb->nr_queues); | ||
| 291 | |||
| 292 | return &nullb->queues[index]; | ||
| 293 | } | ||
| 294 | |||
| 295 | static void null_queue_bio(struct request_queue *q, struct bio *bio) | ||
| 296 | { | ||
| 297 | struct nullb *nullb = q->queuedata; | ||
| 298 | struct nullb_queue *nq = nullb_to_queue(nullb); | ||
| 299 | struct nullb_cmd *cmd; | ||
| 300 | |||
| 301 | cmd = alloc_cmd(nq, 1); | ||
| 302 | cmd->bio = bio; | ||
| 303 | |||
| 304 | null_handle_cmd(cmd); | ||
| 305 | } | ||
| 306 | |||
| 307 | static int null_rq_prep_fn(struct request_queue *q, struct request *req) | ||
| 308 | { | ||
| 309 | struct nullb *nullb = q->queuedata; | ||
| 310 | struct nullb_queue *nq = nullb_to_queue(nullb); | ||
| 311 | struct nullb_cmd *cmd; | ||
| 312 | |||
| 313 | cmd = alloc_cmd(nq, 0); | ||
| 314 | if (cmd) { | ||
| 315 | cmd->rq = req; | ||
| 316 | req->special = cmd; | ||
| 317 | return BLKPREP_OK; | ||
| 318 | } | ||
| 319 | |||
| 320 | return BLKPREP_DEFER; | ||
| 321 | } | ||
| 322 | |||
| 323 | static void null_request_fn(struct request_queue *q) | ||
| 324 | { | ||
| 325 | struct request *rq; | ||
| 326 | |||
| 327 | while ((rq = blk_fetch_request(q)) != NULL) { | ||
| 328 | struct nullb_cmd *cmd = rq->special; | ||
| 329 | |||
| 330 | spin_unlock_irq(q->queue_lock); | ||
| 331 | null_handle_cmd(cmd); | ||
| 332 | spin_lock_irq(q->queue_lock); | ||
| 333 | } | ||
| 334 | } | ||
| 335 | |||
| 336 | static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq) | ||
| 337 | { | ||
| 338 | struct nullb_cmd *cmd = rq->special; | ||
| 339 | |||
| 340 | cmd->rq = rq; | ||
| 341 | cmd->nq = hctx->driver_data; | ||
| 342 | |||
| 343 | null_handle_cmd(cmd); | ||
| 344 | return BLK_MQ_RQ_QUEUE_OK; | ||
| 345 | } | ||
| 346 | |||
| 347 | static struct blk_mq_hw_ctx *null_alloc_hctx(struct blk_mq_reg *reg, unsigned int hctx_index) | ||
| 348 | { | ||
| 349 | return kzalloc_node(sizeof(struct blk_mq_hw_ctx), GFP_KERNEL, | ||
| 350 | hctx_index); | ||
| 351 | } | ||
| 352 | |||
| 353 | static void null_free_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_index) | ||
| 354 | { | ||
| 355 | kfree(hctx); | ||
| 356 | } | ||
| 357 | |||
| 358 | static int null_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, | ||
| 359 | unsigned int index) | ||
| 360 | { | ||
| 361 | struct nullb *nullb = data; | ||
| 362 | struct nullb_queue *nq = &nullb->queues[index]; | ||
| 363 | |||
| 364 | init_waitqueue_head(&nq->wait); | ||
| 365 | nq->queue_depth = nullb->queue_depth; | ||
| 366 | nullb->nr_queues++; | ||
| 367 | hctx->driver_data = nq; | ||
| 368 | |||
| 369 | return 0; | ||
| 370 | } | ||
| 371 | |||
| 372 | static struct blk_mq_ops null_mq_ops = { | ||
| 373 | .queue_rq = null_queue_rq, | ||
| 374 | .map_queue = blk_mq_map_queue, | ||
| 375 | .init_hctx = null_init_hctx, | ||
| 376 | }; | ||
| 377 | |||
| 378 | static struct blk_mq_reg null_mq_reg = { | ||
| 379 | .ops = &null_mq_ops, | ||
| 380 | .queue_depth = 64, | ||
| 381 | .cmd_size = sizeof(struct nullb_cmd), | ||
| 382 | .flags = BLK_MQ_F_SHOULD_MERGE, | ||
| 383 | }; | ||
| 384 | |||
| 385 | static void null_del_dev(struct nullb *nullb) | ||
| 386 | { | ||
| 387 | list_del_init(&nullb->list); | ||
| 388 | |||
| 389 | del_gendisk(nullb->disk); | ||
| 390 | if (queue_mode == NULL_Q_MQ) | ||
| 391 | blk_mq_free_queue(nullb->q); | ||
| 392 | else | ||
| 393 | blk_cleanup_queue(nullb->q); | ||
| 394 | put_disk(nullb->disk); | ||
| 395 | kfree(nullb); | ||
| 396 | } | ||
| 397 | |||
| 398 | static int null_open(struct block_device *bdev, fmode_t mode) | ||
| 399 | { | ||
| 400 | return 0; | ||
| 401 | } | ||
| 402 | |||
| 403 | static void null_release(struct gendisk *disk, fmode_t mode) | ||
| 404 | { | ||
| 405 | } | ||
| 406 | |||
| 407 | static const struct block_device_operations null_fops = { | ||
| 408 | .owner = THIS_MODULE, | ||
| 409 | .open = null_open, | ||
| 410 | .release = null_release, | ||
| 411 | }; | ||
| 412 | |||
| 413 | static int setup_commands(struct nullb_queue *nq) | ||
| 414 | { | ||
| 415 | struct nullb_cmd *cmd; | ||
| 416 | int i, tag_size; | ||
| 417 | |||
| 418 | nq->cmds = kzalloc(nq->queue_depth * sizeof(*cmd), GFP_KERNEL); | ||
| 419 | if (!nq->cmds) | ||
| 420 | return 1; | ||
| 421 | |||
| 422 | tag_size = ALIGN(nq->queue_depth, BITS_PER_LONG) / BITS_PER_LONG; | ||
| 423 | nq->tag_map = kzalloc(tag_size * sizeof(unsigned long), GFP_KERNEL); | ||
| 424 | if (!nq->tag_map) { | ||
| 425 | kfree(nq->cmds); | ||
| 426 | return 1; | ||
| 427 | } | ||
| 428 | |||
| 429 | for (i = 0; i < nq->queue_depth; i++) { | ||
| 430 | cmd = &nq->cmds[i]; | ||
| 431 | INIT_LIST_HEAD(&cmd->list); | ||
| 432 | cmd->ll_list.next = NULL; | ||
| 433 | cmd->tag = -1U; | ||
| 434 | } | ||
| 435 | |||
| 436 | return 0; | ||
| 437 | } | ||
| 438 | |||
| 439 | static void cleanup_queue(struct nullb_queue *nq) | ||
| 440 | { | ||
| 441 | kfree(nq->tag_map); | ||
| 442 | kfree(nq->cmds); | ||
| 443 | } | ||
| 444 | |||
| 445 | static void cleanup_queues(struct nullb *nullb) | ||
| 446 | { | ||
| 447 | int i; | ||
| 448 | |||
| 449 | for (i = 0; i < nullb->nr_queues; i++) | ||
| 450 | cleanup_queue(&nullb->queues[i]); | ||
| 451 | |||
| 452 | kfree(nullb->queues); | ||
| 453 | } | ||
| 454 | |||
| 455 | static int setup_queues(struct nullb *nullb) | ||
| 456 | { | ||
| 457 | struct nullb_queue *nq; | ||
| 458 | int i; | ||
| 459 | |||
| 460 | nullb->queues = kzalloc(submit_queues * sizeof(*nq), GFP_KERNEL); | ||
| 461 | if (!nullb->queues) | ||
| 462 | return 1; | ||
| 463 | |||
| 464 | nullb->nr_queues = 0; | ||
| 465 | nullb->queue_depth = hw_queue_depth; | ||
| 466 | |||
| 467 | if (queue_mode == NULL_Q_MQ) | ||
| 468 | return 0; | ||
| 469 | |||
| 470 | for (i = 0; i < submit_queues; i++) { | ||
| 471 | nq = &nullb->queues[i]; | ||
| 472 | init_waitqueue_head(&nq->wait); | ||
| 473 | nq->queue_depth = hw_queue_depth; | ||
| 474 | if (setup_commands(nq)) | ||
| 475 | break; | ||
| 476 | nullb->nr_queues++; | ||
| 477 | } | ||
| 478 | |||
| 479 | if (i == submit_queues) | ||
| 480 | return 0; | ||
| 481 | |||
| 482 | cleanup_queues(nullb); | ||
| 483 | return 1; | ||
| 484 | } | ||
| 485 | |||
| 486 | static int null_add_dev(void) | ||
| 487 | { | ||
| 488 | struct gendisk *disk; | ||
| 489 | struct nullb *nullb; | ||
| 490 | sector_t size; | ||
| 491 | |||
| 492 | nullb = kzalloc_node(sizeof(*nullb), GFP_KERNEL, home_node); | ||
| 493 | if (!nullb) | ||
| 494 | return -ENOMEM; | ||
| 495 | |||
| 496 | spin_lock_init(&nullb->lock); | ||
| 497 | |||
| 498 | if (setup_queues(nullb)) | ||
| 499 | goto err; | ||
| 500 | |||
| 501 | if (queue_mode == NULL_Q_MQ) { | ||
| 502 | null_mq_reg.numa_node = home_node; | ||
| 503 | null_mq_reg.queue_depth = hw_queue_depth; | ||
| 504 | |||
| 505 | if (use_per_node_hctx) { | ||
| 506 | null_mq_reg.ops->alloc_hctx = null_alloc_hctx; | ||
| 507 | null_mq_reg.ops->free_hctx = null_free_hctx; | ||
| 508 | |||
| 509 | null_mq_reg.nr_hw_queues = nr_online_nodes; | ||
| 510 | } else { | ||
| 511 | null_mq_reg.ops->alloc_hctx = blk_mq_alloc_single_hw_queue; | ||
| 512 | null_mq_reg.ops->free_hctx = blk_mq_free_single_hw_queue; | ||
| 513 | |||
| 514 | null_mq_reg.nr_hw_queues = submit_queues; | ||
| 515 | } | ||
| 516 | |||
| 517 | nullb->q = blk_mq_init_queue(&null_mq_reg, nullb); | ||
| 518 | } else if (queue_mode == NULL_Q_BIO) { | ||
| 519 | nullb->q = blk_alloc_queue_node(GFP_KERNEL, home_node); | ||
| 520 | blk_queue_make_request(nullb->q, null_queue_bio); | ||
| 521 | } else { | ||
| 522 | nullb->q = blk_init_queue_node(null_request_fn, &nullb->lock, home_node); | ||
| 523 | blk_queue_prep_rq(nullb->q, null_rq_prep_fn); | ||
| 524 | if (nullb->q) | ||
| 525 | blk_queue_softirq_done(nullb->q, null_softirq_done_fn); | ||
| 526 | } | ||
| 527 | |||
| 528 | if (!nullb->q) | ||
| 529 | goto queue_fail; | ||
| 530 | |||
| 531 | nullb->q->queuedata = nullb; | ||
| 532 | queue_flag_set_unlocked(QUEUE_FLAG_NONROT, nullb->q); | ||
| 533 | |||
| 534 | disk = nullb->disk = alloc_disk_node(1, home_node); | ||
| 535 | if (!disk) { | ||
| 536 | queue_fail: | ||
| 537 | if (queue_mode == NULL_Q_MQ) | ||
| 538 | blk_mq_free_queue(nullb->q); | ||
| 539 | else | ||
| 540 | blk_cleanup_queue(nullb->q); | ||
| 541 | cleanup_queues(nullb); | ||
| 542 | err: | ||
| 543 | kfree(nullb); | ||
| 544 | return -ENOMEM; | ||
| 545 | } | ||
| 546 | |||
| 547 | mutex_lock(&lock); | ||
| 548 | list_add_tail(&nullb->list, &nullb_list); | ||
| 549 | nullb->index = nullb_indexes++; | ||
| 550 | mutex_unlock(&lock); | ||
| 551 | |||
| 552 | blk_queue_logical_block_size(nullb->q, bs); | ||
| 553 | blk_queue_physical_block_size(nullb->q, bs); | ||
| 554 | |||
| 555 | size = gb * 1024 * 1024 * 1024ULL; | ||
| 556 | sector_div(size, bs); | ||
| 557 | set_capacity(disk, size); | ||
| 558 | |||
| 559 | disk->flags |= GENHD_FL_EXT_DEVT; | ||
| 560 | disk->major = null_major; | ||
| 561 | disk->first_minor = nullb->index; | ||
| 562 | disk->fops = &null_fops; | ||
| 563 | disk->private_data = nullb; | ||
| 564 | disk->queue = nullb->q; | ||
| 565 | sprintf(disk->disk_name, "nullb%d", nullb->index); | ||
| 566 | add_disk(disk); | ||
| 567 | return 0; | ||
| 568 | } | ||
| 569 | |||
| 570 | static int __init null_init(void) | ||
| 571 | { | ||
| 572 | unsigned int i; | ||
| 573 | |||
| 574 | #if !defined(CONFIG_SMP) || !defined(CONFIG_USE_GENERIC_SMP_HELPERS) | ||
| 575 | if (irqmode == NULL_IRQ_SOFTIRQ) { | ||
| 576 | pr_warn("null_blk: softirq completions not available.\n"); | ||
| 577 | pr_warn("null_blk: using direct completions.\n"); | ||
| 578 | irqmode = NULL_IRQ_NONE; | ||
| 579 | } | ||
| 580 | #endif | ||
| 581 | |||
| 582 | if (submit_queues > nr_cpu_ids) | ||
| 583 | submit_queues = nr_cpu_ids; | ||
| 584 | else if (!submit_queues) | ||
| 585 | submit_queues = 1; | ||
| 586 | |||
| 587 | mutex_init(&lock); | ||
| 588 | |||
| 589 | /* Initialize a separate list for each CPU for issuing softirqs */ | ||
| 590 | for_each_possible_cpu(i) { | ||
| 591 | struct completion_queue *cq = &per_cpu(completion_queues, i); | ||
| 592 | |||
| 593 | init_llist_head(&cq->list); | ||
| 594 | |||
| 595 | if (irqmode != NULL_IRQ_TIMER) | ||
| 596 | continue; | ||
| 597 | |||
| 598 | hrtimer_init(&cq->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
| 599 | cq->timer.function = null_cmd_timer_expired; | ||
| 600 | } | ||
| 601 | |||
| 602 | null_major = register_blkdev(0, "nullb"); | ||
| 603 | if (null_major < 0) | ||
| 604 | return null_major; | ||
| 605 | |||
| 606 | for (i = 0; i < nr_devices; i++) { | ||
| 607 | if (null_add_dev()) { | ||
| 608 | unregister_blkdev(null_major, "nullb"); | ||
| 609 | return -EINVAL; | ||
| 610 | } | ||
| 611 | } | ||
| 612 | |||
| 613 | pr_info("null: module loaded\n"); | ||
| 614 | return 0; | ||
| 615 | } | ||
| 616 | |||
| 617 | static void __exit null_exit(void) | ||
| 618 | { | ||
| 619 | struct nullb *nullb; | ||
| 620 | |||
| 621 | unregister_blkdev(null_major, "nullb"); | ||
| 622 | |||
| 623 | mutex_lock(&lock); | ||
| 624 | while (!list_empty(&nullb_list)) { | ||
| 625 | nullb = list_entry(nullb_list.next, struct nullb, list); | ||
| 626 | null_del_dev(nullb); | ||
| 627 | } | ||
| 628 | mutex_unlock(&lock); | ||
| 629 | } | ||
| 630 | |||
| 631 | module_init(null_init); | ||
| 632 | module_exit(null_exit); | ||
| 633 | |||
| 634 | MODULE_AUTHOR("Jens Axboe <jaxboe@fusionio.com>"); | ||
| 635 | MODULE_LICENSE("GPL"); | ||
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 5693f6d7eddb..7fe4faaa149b 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c | |||
| @@ -1002,7 +1002,7 @@ static int sd_prep_fn(struct request_queue *q, struct request *rq) | |||
| 1002 | SCpnt->cmnd[0] = READ_6; | 1002 | SCpnt->cmnd[0] = READ_6; |
| 1003 | SCpnt->sc_data_direction = DMA_FROM_DEVICE; | 1003 | SCpnt->sc_data_direction = DMA_FROM_DEVICE; |
| 1004 | } else { | 1004 | } else { |
| 1005 | scmd_printk(KERN_ERR, SCpnt, "Unknown command %x\n", rq->cmd_flags); | 1005 | scmd_printk(KERN_ERR, SCpnt, "Unknown command %llx\n", (unsigned long long) rq->cmd_flags); |
| 1006 | goto out; | 1006 | goto out; |
| 1007 | } | 1007 | } |
| 1008 | 1008 | ||
diff --git a/include/linux/bio.h b/include/linux/bio.h index 162036aca741..060ff695085c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
| @@ -420,6 +420,8 @@ static inline void bio_list_init(struct bio_list *bl) | |||
| 420 | bl->head = bl->tail = NULL; | 420 | bl->head = bl->tail = NULL; |
| 421 | } | 421 | } |
| 422 | 422 | ||
| 423 | #define BIO_EMPTY_LIST { NULL, NULL } | ||
| 424 | |||
| 423 | #define bio_list_for_each(bio, bl) \ | 425 | #define bio_list_for_each(bio, bl) \ |
| 424 | for (bio = (bl)->head; bio; bio = bio->bi_next) | 426 | for (bio = (bl)->head; bio; bio = bio->bi_next) |
| 425 | 427 | ||
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h new file mode 100644 index 000000000000..ab0e9b2025b3 --- /dev/null +++ b/include/linux/blk-mq.h | |||
| @@ -0,0 +1,183 @@ | |||
| 1 | #ifndef BLK_MQ_H | ||
| 2 | #define BLK_MQ_H | ||
| 3 | |||
| 4 | #include <linux/blkdev.h> | ||
| 5 | |||
| 6 | struct blk_mq_tags; | ||
| 7 | |||
| 8 | struct blk_mq_cpu_notifier { | ||
| 9 | struct list_head list; | ||
| 10 | void *data; | ||
| 11 | void (*notify)(void *data, unsigned long action, unsigned int cpu); | ||
| 12 | }; | ||
| 13 | |||
| 14 | struct blk_mq_hw_ctx { | ||
| 15 | struct { | ||
| 16 | spinlock_t lock; | ||
| 17 | struct list_head dispatch; | ||
| 18 | } ____cacheline_aligned_in_smp; | ||
| 19 | |||
| 20 | unsigned long state; /* BLK_MQ_S_* flags */ | ||
| 21 | struct delayed_work delayed_work; | ||
| 22 | |||
| 23 | unsigned long flags; /* BLK_MQ_F_* flags */ | ||
| 24 | |||
| 25 | struct request_queue *queue; | ||
| 26 | unsigned int queue_num; | ||
| 27 | |||
| 28 | void *driver_data; | ||
| 29 | |||
| 30 | unsigned int nr_ctx; | ||
| 31 | struct blk_mq_ctx **ctxs; | ||
| 32 | unsigned int nr_ctx_map; | ||
| 33 | unsigned long *ctx_map; | ||
| 34 | |||
| 35 | struct request **rqs; | ||
| 36 | struct list_head page_list; | ||
| 37 | struct blk_mq_tags *tags; | ||
| 38 | |||
| 39 | unsigned long queued; | ||
| 40 | unsigned long run; | ||
| 41 | #define BLK_MQ_MAX_DISPATCH_ORDER 10 | ||
| 42 | unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; | ||
| 43 | |||
| 44 | unsigned int queue_depth; | ||
| 45 | unsigned int numa_node; | ||
| 46 | unsigned int cmd_size; /* per-request extra data */ | ||
| 47 | |||
| 48 | struct blk_mq_cpu_notifier cpu_notifier; | ||
| 49 | struct kobject kobj; | ||
| 50 | }; | ||
| 51 | |||
| 52 | struct blk_mq_reg { | ||
| 53 | struct blk_mq_ops *ops; | ||
| 54 | unsigned int nr_hw_queues; | ||
| 55 | unsigned int queue_depth; | ||
| 56 | unsigned int reserved_tags; | ||
| 57 | unsigned int cmd_size; /* per-request extra data */ | ||
| 58 | int numa_node; | ||
| 59 | unsigned int timeout; | ||
| 60 | unsigned int flags; /* BLK_MQ_F_* */ | ||
| 61 | }; | ||
| 62 | |||
| 63 | typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); | ||
| 64 | typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); | ||
| 65 | typedef struct blk_mq_hw_ctx *(alloc_hctx_fn)(struct blk_mq_reg *,unsigned int); | ||
| 66 | typedef void (free_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); | ||
| 67 | typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); | ||
| 68 | typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); | ||
| 69 | |||
| 70 | struct blk_mq_ops { | ||
| 71 | /* | ||
| 72 | * Queue request | ||
| 73 | */ | ||
| 74 | queue_rq_fn *queue_rq; | ||
| 75 | |||
| 76 | /* | ||
| 77 | * Map to specific hardware queue | ||
| 78 | */ | ||
| 79 | map_queue_fn *map_queue; | ||
| 80 | |||
| 81 | /* | ||
| 82 | * Called on request timeout | ||
| 83 | */ | ||
| 84 | rq_timed_out_fn *timeout; | ||
| 85 | |||
| 86 | /* | ||
| 87 | * Override for hctx allocations (should probably go) | ||
| 88 | */ | ||
| 89 | alloc_hctx_fn *alloc_hctx; | ||
| 90 | free_hctx_fn *free_hctx; | ||
| 91 | |||
| 92 | /* | ||
| 93 | * Called when the block layer side of a hardware queue has been | ||
| 94 | * set up, allowing the driver to allocate/init matching structures. | ||
| 95 | * Ditto for exit/teardown. | ||
| 96 | */ | ||
| 97 | init_hctx_fn *init_hctx; | ||
| 98 | exit_hctx_fn *exit_hctx; | ||
| 99 | }; | ||
| 100 | |||
| 101 | enum { | ||
| 102 | BLK_MQ_RQ_QUEUE_OK = 0, /* queued fine */ | ||
| 103 | BLK_MQ_RQ_QUEUE_BUSY = 1, /* requeue IO for later */ | ||
| 104 | BLK_MQ_RQ_QUEUE_ERROR = 2, /* end IO with error */ | ||
| 105 | |||
| 106 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, | ||
| 107 | BLK_MQ_F_SHOULD_SORT = 1 << 1, | ||
| 108 | BLK_MQ_F_SHOULD_IPI = 1 << 2, | ||
| 109 | |||
| 110 | BLK_MQ_S_STOPPED = 1 << 0, | ||
| 111 | |||
| 112 | BLK_MQ_MAX_DEPTH = 2048, | ||
| 113 | }; | ||
| 114 | |||
| 115 | struct request_queue *blk_mq_init_queue(struct blk_mq_reg *, void *); | ||
| 116 | void blk_mq_free_queue(struct request_queue *); | ||
| 117 | int blk_mq_register_disk(struct gendisk *); | ||
| 118 | void blk_mq_unregister_disk(struct gendisk *); | ||
| 119 | void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struct blk_mq_hw_ctx *, struct request *, unsigned int), void *data); | ||
| 120 | |||
| 121 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); | ||
| 122 | |||
| 123 | void blk_mq_insert_request(struct request_queue *, struct request *, bool); | ||
| 124 | void blk_mq_run_queues(struct request_queue *q, bool async); | ||
| 125 | void blk_mq_free_request(struct request *rq); | ||
| 126 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); | ||
| 127 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); | ||
| 128 | struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); | ||
| 129 | struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); | ||
| 130 | |||
| 131 | struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); | ||
| 132 | struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_reg *, unsigned int); | ||
| 133 | void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); | ||
| 134 | |||
| 135 | void blk_mq_end_io(struct request *rq, int error); | ||
| 136 | |||
| 137 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); | ||
| 138 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); | ||
| 139 | void blk_mq_stop_hw_queues(struct request_queue *q); | ||
| 140 | void blk_mq_start_stopped_hw_queues(struct request_queue *q); | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Driver command data is immediately after the request. So subtract request | ||
| 144 | * size to get back to the original request. | ||
| 145 | */ | ||
| 146 | static inline struct request *blk_mq_rq_from_pdu(void *pdu) | ||
| 147 | { | ||
| 148 | return pdu - sizeof(struct request); | ||
| 149 | } | ||
| 150 | static inline void *blk_mq_rq_to_pdu(struct request *rq) | ||
| 151 | { | ||
| 152 | return (void *) rq + sizeof(*rq); | ||
| 153 | } | ||
| 154 | |||
| 155 | static inline struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, | ||
| 156 | unsigned int tag) | ||
| 157 | { | ||
| 158 | return hctx->rqs[tag]; | ||
| 159 | } | ||
| 160 | |||
| 161 | #define queue_for_each_hw_ctx(q, hctx, i) \ | ||
| 162 | for ((i) = 0, hctx = (q)->queue_hw_ctx[0]; \ | ||
| 163 | (i) < (q)->nr_hw_queues; (i)++, hctx = (q)->queue_hw_ctx[i]) | ||
| 164 | |||
| 165 | #define queue_for_each_ctx(q, ctx, i) \ | ||
| 166 | for ((i) = 0, ctx = per_cpu_ptr((q)->queue_ctx, 0); \ | ||
| 167 | (i) < (q)->nr_queues; (i)++, ctx = per_cpu_ptr(q->queue_ctx, (i))) | ||
| 168 | |||
| 169 | #define hctx_for_each_ctx(hctx, ctx, i) \ | ||
| 170 | for ((i) = 0, ctx = (hctx)->ctxs[0]; \ | ||
| 171 | (i) < (hctx)->nr_ctx; (i)++, ctx = (hctx)->ctxs[(i)]) | ||
| 172 | |||
| 173 | #define blk_ctx_sum(q, sum) \ | ||
| 174 | ({ \ | ||
| 175 | struct blk_mq_ctx *__x; \ | ||
| 176 | unsigned int __ret = 0, __i; \ | ||
| 177 | \ | ||
| 178 | queue_for_each_ctx((q), __x, __i) \ | ||
| 179 | __ret += sum; \ | ||
| 180 | __ret; \ | ||
| 181 | }) | ||
| 182 | |||
| 183 | #endif | ||
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fa1abeb45b76..238ef0ed62f8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
| @@ -178,19 +178,20 @@ enum rq_flag_bits { | |||
| 178 | __REQ_MIXED_MERGE, /* merge of different types, fail separately */ | 178 | __REQ_MIXED_MERGE, /* merge of different types, fail separately */ |
| 179 | __REQ_KERNEL, /* direct IO to kernel pages */ | 179 | __REQ_KERNEL, /* direct IO to kernel pages */ |
| 180 | __REQ_PM, /* runtime pm request */ | 180 | __REQ_PM, /* runtime pm request */ |
| 181 | __REQ_END, /* last of chain of requests */ | ||
| 181 | __REQ_NR_BITS, /* stops here */ | 182 | __REQ_NR_BITS, /* stops here */ |
| 182 | }; | 183 | }; |
| 183 | 184 | ||
| 184 | #define REQ_WRITE (1 << __REQ_WRITE) | 185 | #define REQ_WRITE (1ULL << __REQ_WRITE) |
| 185 | #define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) | 186 | #define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) |
| 186 | #define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) | 187 | #define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) |
| 187 | #define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) | 188 | #define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) |
| 188 | #define REQ_SYNC (1 << __REQ_SYNC) | 189 | #define REQ_SYNC (1ULL << __REQ_SYNC) |
| 189 | #define REQ_META (1 << __REQ_META) | 190 | #define REQ_META (1ULL << __REQ_META) |
| 190 | #define REQ_PRIO (1 << __REQ_PRIO) | 191 | #define REQ_PRIO (1ULL << __REQ_PRIO) |
| 191 | #define REQ_DISCARD (1 << __REQ_DISCARD) | 192 | #define REQ_DISCARD (1ULL << __REQ_DISCARD) |
| 192 | #define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME) | 193 | #define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME) |
| 193 | #define REQ_NOIDLE (1 << __REQ_NOIDLE) | 194 | #define REQ_NOIDLE (1ULL << __REQ_NOIDLE) |
| 194 | 195 | ||
| 195 | #define REQ_FAILFAST_MASK \ | 196 | #define REQ_FAILFAST_MASK \ |
| 196 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) | 197 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) |
| @@ -206,28 +207,29 @@ enum rq_flag_bits { | |||
| 206 | #define REQ_NOMERGE_FLAGS \ | 207 | #define REQ_NOMERGE_FLAGS \ |
| 207 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) | 208 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) |
| 208 | 209 | ||
| 209 | #define REQ_RAHEAD (1 << __REQ_RAHEAD) | 210 | #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) |
| 210 | #define REQ_THROTTLED (1 << __REQ_THROTTLED) | 211 | #define REQ_THROTTLED (1ULL << __REQ_THROTTLED) |
| 211 | 212 | ||
| 212 | #define REQ_SORTED (1 << __REQ_SORTED) | 213 | #define REQ_SORTED (1ULL << __REQ_SORTED) |
| 213 | #define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) | 214 | #define REQ_SOFTBARRIER (1ULL << __REQ_SOFTBARRIER) |
| 214 | #define REQ_FUA (1 << __REQ_FUA) | 215 | #define REQ_FUA (1ULL << __REQ_FUA) |
| 215 | #define REQ_NOMERGE (1 << __REQ_NOMERGE) | 216 | #define REQ_NOMERGE (1ULL << __REQ_NOMERGE) |
| 216 | #define REQ_STARTED (1 << __REQ_STARTED) | 217 | #define REQ_STARTED (1ULL << __REQ_STARTED) |
| 217 | #define REQ_DONTPREP (1 << __REQ_DONTPREP) | 218 | #define REQ_DONTPREP (1ULL << __REQ_DONTPREP) |
| 218 | #define REQ_QUEUED (1 << __REQ_QUEUED) | 219 | #define REQ_QUEUED (1ULL << __REQ_QUEUED) |
| 219 | #define REQ_ELVPRIV (1 << __REQ_ELVPRIV) | 220 | #define REQ_ELVPRIV (1ULL << __REQ_ELVPRIV) |
| 220 | #define REQ_FAILED (1 << __REQ_FAILED) | 221 | #define REQ_FAILED (1ULL << __REQ_FAILED) |
| 221 | #define REQ_QUIET (1 << __REQ_QUIET) | 222 | #define REQ_QUIET (1ULL << __REQ_QUIET) |
| 222 | #define REQ_PREEMPT (1 << __REQ_PREEMPT) | 223 | #define REQ_PREEMPT (1ULL << __REQ_PREEMPT) |
| 223 | #define REQ_ALLOCED (1 << __REQ_ALLOCED) | 224 | #define REQ_ALLOCED (1ULL << __REQ_ALLOCED) |
| 224 | #define REQ_COPY_USER (1 << __REQ_COPY_USER) | 225 | #define REQ_COPY_USER (1ULL << __REQ_COPY_USER) |
| 225 | #define REQ_FLUSH (1 << __REQ_FLUSH) | 226 | #define REQ_FLUSH (1ULL << __REQ_FLUSH) |
| 226 | #define REQ_FLUSH_SEQ (1 << __REQ_FLUSH_SEQ) | 227 | #define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ) |
| 227 | #define REQ_IO_STAT (1 << __REQ_IO_STAT) | 228 | #define REQ_IO_STAT (1ULL << __REQ_IO_STAT) |
| 228 | #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) | 229 | #define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE) |
| 229 | #define REQ_SECURE (1 << __REQ_SECURE) | 230 | #define REQ_SECURE (1ULL << __REQ_SECURE) |
| 230 | #define REQ_KERNEL (1 << __REQ_KERNEL) | 231 | #define REQ_KERNEL (1ULL << __REQ_KERNEL) |
| 231 | #define REQ_PM (1 << __REQ_PM) | 232 | #define REQ_PM (1ULL << __REQ_PM) |
| 233 | #define REQ_END (1ULL << __REQ_END) | ||
| 232 | 234 | ||
| 233 | #endif /* __LINUX_BLK_TYPES_H */ | 235 | #endif /* __LINUX_BLK_TYPES_H */ |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0e6f765aa1f5..f26ec20f6354 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
| @@ -8,6 +8,7 @@ | |||
| 8 | #include <linux/major.h> | 8 | #include <linux/major.h> |
| 9 | #include <linux/genhd.h> | 9 | #include <linux/genhd.h> |
| 10 | #include <linux/list.h> | 10 | #include <linux/list.h> |
| 11 | #include <linux/llist.h> | ||
| 11 | #include <linux/timer.h> | 12 | #include <linux/timer.h> |
| 12 | #include <linux/workqueue.h> | 13 | #include <linux/workqueue.h> |
| 13 | #include <linux/pagemap.h> | 14 | #include <linux/pagemap.h> |
| @@ -94,12 +95,19 @@ enum rq_cmd_type_bits { | |||
| 94 | * as well! | 95 | * as well! |
| 95 | */ | 96 | */ |
| 96 | struct request { | 97 | struct request { |
| 97 | struct list_head queuelist; | 98 | union { |
| 98 | struct call_single_data csd; | 99 | struct list_head queuelist; |
| 100 | struct llist_node ll_list; | ||
| 101 | }; | ||
| 102 | union { | ||
| 103 | struct call_single_data csd; | ||
| 104 | struct work_struct mq_flush_data; | ||
| 105 | }; | ||
| 99 | 106 | ||
| 100 | struct request_queue *q; | 107 | struct request_queue *q; |
| 108 | struct blk_mq_ctx *mq_ctx; | ||
| 101 | 109 | ||
| 102 | unsigned int cmd_flags; | 110 | u64 cmd_flags; |
| 103 | enum rq_cmd_type_bits cmd_type; | 111 | enum rq_cmd_type_bits cmd_type; |
| 104 | unsigned long atomic_flags; | 112 | unsigned long atomic_flags; |
| 105 | 113 | ||
| @@ -160,8 +168,6 @@ struct request { | |||
| 160 | 168 | ||
| 161 | unsigned short ioprio; | 169 | unsigned short ioprio; |
| 162 | 170 | ||
| 163 | int ref_count; | ||
| 164 | |||
| 165 | void *special; /* opaque pointer available for LLD use */ | 171 | void *special; /* opaque pointer available for LLD use */ |
| 166 | char *buffer; /* kaddr of the current segment if available */ | 172 | char *buffer; /* kaddr of the current segment if available */ |
| 167 | 173 | ||
| @@ -215,6 +221,8 @@ struct request_pm_state | |||
| 215 | 221 | ||
| 216 | #include <linux/elevator.h> | 222 | #include <linux/elevator.h> |
| 217 | 223 | ||
| 224 | struct blk_queue_ctx; | ||
| 225 | |||
| 218 | typedef void (request_fn_proc) (struct request_queue *q); | 226 | typedef void (request_fn_proc) (struct request_queue *q); |
| 219 | typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); | 227 | typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); |
| 220 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); | 228 | typedef int (prep_rq_fn) (struct request_queue *, struct request *); |
| @@ -313,6 +321,18 @@ struct request_queue { | |||
| 313 | dma_drain_needed_fn *dma_drain_needed; | 321 | dma_drain_needed_fn *dma_drain_needed; |
| 314 | lld_busy_fn *lld_busy_fn; | 322 | lld_busy_fn *lld_busy_fn; |
| 315 | 323 | ||
| 324 | struct blk_mq_ops *mq_ops; | ||
| 325 | |||
| 326 | unsigned int *mq_map; | ||
| 327 | |||
| 328 | /* sw queues */ | ||
| 329 | struct blk_mq_ctx *queue_ctx; | ||
| 330 | unsigned int nr_queues; | ||
| 331 | |||
| 332 | /* hw dispatch queues */ | ||
| 333 | struct blk_mq_hw_ctx **queue_hw_ctx; | ||
| 334 | unsigned int nr_hw_queues; | ||
| 335 | |||
| 316 | /* | 336 | /* |
| 317 | * Dispatch queue sorting | 337 | * Dispatch queue sorting |
| 318 | */ | 338 | */ |
| @@ -361,6 +381,11 @@ struct request_queue { | |||
| 361 | */ | 381 | */ |
| 362 | struct kobject kobj; | 382 | struct kobject kobj; |
| 363 | 383 | ||
| 384 | /* | ||
| 385 | * mq queue kobject | ||
| 386 | */ | ||
| 387 | struct kobject mq_kobj; | ||
| 388 | |||
| 364 | #ifdef CONFIG_PM_RUNTIME | 389 | #ifdef CONFIG_PM_RUNTIME |
| 365 | struct device *dev; | 390 | struct device *dev; |
| 366 | int rpm_status; | 391 | int rpm_status; |
| @@ -425,7 +450,13 @@ struct request_queue { | |||
| 425 | unsigned long flush_pending_since; | 450 | unsigned long flush_pending_since; |
| 426 | struct list_head flush_queue[2]; | 451 | struct list_head flush_queue[2]; |
| 427 | struct list_head flush_data_in_flight; | 452 | struct list_head flush_data_in_flight; |
| 428 | struct request flush_rq; | 453 | union { |
| 454 | struct request flush_rq; | ||
| 455 | struct { | ||
| 456 | spinlock_t mq_flush_lock; | ||
| 457 | struct work_struct mq_flush_work; | ||
| 458 | }; | ||
| 459 | }; | ||
| 429 | 460 | ||
| 430 | struct mutex sysfs_lock; | 461 | struct mutex sysfs_lock; |
| 431 | 462 | ||
| @@ -437,14 +468,14 @@ struct request_queue { | |||
| 437 | struct bsg_class_device bsg_dev; | 468 | struct bsg_class_device bsg_dev; |
| 438 | #endif | 469 | #endif |
| 439 | 470 | ||
| 440 | #ifdef CONFIG_BLK_CGROUP | ||
| 441 | struct list_head all_q_node; | ||
| 442 | #endif | ||
| 443 | #ifdef CONFIG_BLK_DEV_THROTTLING | 471 | #ifdef CONFIG_BLK_DEV_THROTTLING |
| 444 | /* Throttle data */ | 472 | /* Throttle data */ |
| 445 | struct throtl_data *td; | 473 | struct throtl_data *td; |
| 446 | #endif | 474 | #endif |
| 447 | struct rcu_head rcu_head; | 475 | struct rcu_head rcu_head; |
| 476 | wait_queue_head_t mq_freeze_wq; | ||
| 477 | struct percpu_counter mq_usage_counter; | ||
| 478 | struct list_head all_q_node; | ||
| 448 | }; | 479 | }; |
| 449 | 480 | ||
| 450 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ | 481 | #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ |
| @@ -467,6 +498,7 @@ struct request_queue { | |||
| 467 | #define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ | 498 | #define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ |
| 468 | #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ | 499 | #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ |
| 469 | #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ | 500 | #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ |
| 501 | #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ | ||
| 470 | 502 | ||
| 471 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ | 503 | #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ |
| 472 | (1 << QUEUE_FLAG_STACKABLE) | \ | 504 | (1 << QUEUE_FLAG_STACKABLE) | \ |
| @@ -539,6 +571,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) | |||
| 539 | #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) | 571 | #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) |
| 540 | #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) | 572 | #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) |
| 541 | #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) | 573 | #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) |
| 574 | #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) | ||
| 542 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) | 575 | #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) |
| 543 | #define blk_queue_noxmerges(q) \ | 576 | #define blk_queue_noxmerges(q) \ |
| 544 | test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) | 577 | test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) |
| @@ -570,7 +603,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) | |||
| 570 | 603 | ||
| 571 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) | 604 | #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) |
| 572 | 605 | ||
| 573 | #define rq_data_dir(rq) ((rq)->cmd_flags & 1) | 606 | #define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0) |
| 574 | 607 | ||
| 575 | static inline unsigned int blk_queue_cluster(struct request_queue *q) | 608 | static inline unsigned int blk_queue_cluster(struct request_queue *q) |
| 576 | { | 609 | { |
| @@ -1013,6 +1046,7 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {} | |||
| 1013 | struct blk_plug { | 1046 | struct blk_plug { |
| 1014 | unsigned long magic; /* detect uninitialized use-cases */ | 1047 | unsigned long magic; /* detect uninitialized use-cases */ |
| 1015 | struct list_head list; /* requests */ | 1048 | struct list_head list; /* requests */ |
| 1049 | struct list_head mq_list; /* blk-mq requests */ | ||
| 1016 | struct list_head cb_list; /* md requires an unplug callback */ | 1050 | struct list_head cb_list; /* md requires an unplug callback */ |
| 1017 | }; | 1051 | }; |
| 1018 | #define BLK_MAX_REQUEST_COUNT 16 | 1052 | #define BLK_MAX_REQUEST_COUNT 16 |
| @@ -1050,7 +1084,10 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) | |||
| 1050 | { | 1084 | { |
| 1051 | struct blk_plug *plug = tsk->plug; | 1085 | struct blk_plug *plug = tsk->plug; |
| 1052 | 1086 | ||
| 1053 | return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list)); | 1087 | return plug && |
| 1088 | (!list_empty(&plug->list) || | ||
| 1089 | !list_empty(&plug->mq_list) || | ||
| 1090 | !list_empty(&plug->cb_list)); | ||
| 1054 | } | 1091 | } |
| 1055 | 1092 | ||
| 1056 | /* | 1093 | /* |
| @@ -1325,6 +1362,7 @@ static inline void put_dev_sector(Sector p) | |||
| 1325 | 1362 | ||
| 1326 | struct work_struct; | 1363 | struct work_struct; |
| 1327 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); | 1364 | int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); |
| 1365 | int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); | ||
| 1328 | 1366 | ||
| 1329 | #ifdef CONFIG_BLK_CGROUP | 1367 | #ifdef CONFIG_BLK_CGROUP |
| 1330 | /* | 1368 | /* |
diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h index 0b23edbee309..1900bd0fa639 100644 --- a/include/linux/percpu_ida.h +++ b/include/linux/percpu_ida.h | |||
| @@ -16,6 +16,8 @@ struct percpu_ida { | |||
| 16 | * percpu_ida_init() | 16 | * percpu_ida_init() |
| 17 | */ | 17 | */ |
| 18 | unsigned nr_tags; | 18 | unsigned nr_tags; |
| 19 | unsigned percpu_max_size; | ||
| 20 | unsigned percpu_batch_size; | ||
| 19 | 21 | ||
| 20 | struct percpu_ida_cpu __percpu *tag_cpu; | 22 | struct percpu_ida_cpu __percpu *tag_cpu; |
| 21 | 23 | ||
| @@ -51,10 +53,29 @@ struct percpu_ida { | |||
| 51 | } ____cacheline_aligned_in_smp; | 53 | } ____cacheline_aligned_in_smp; |
| 52 | }; | 54 | }; |
| 53 | 55 | ||
| 56 | /* | ||
| 57 | * Number of tags we move between the percpu freelist and the global freelist at | ||
| 58 | * a time | ||
| 59 | */ | ||
| 60 | #define IDA_DEFAULT_PCPU_BATCH_MOVE 32U | ||
| 61 | /* Max size of percpu freelist, */ | ||
| 62 | #define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2) | ||
| 63 | |||
| 54 | int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); | 64 | int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp); |
| 55 | void percpu_ida_free(struct percpu_ida *pool, unsigned tag); | 65 | void percpu_ida_free(struct percpu_ida *pool, unsigned tag); |
| 56 | 66 | ||
| 57 | void percpu_ida_destroy(struct percpu_ida *pool); | 67 | void percpu_ida_destroy(struct percpu_ida *pool); |
| 58 | int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags); | 68 | int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, |
| 69 | unsigned long max_size, unsigned long batch_size); | ||
| 70 | static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) | ||
| 71 | { | ||
| 72 | return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE, | ||
| 73 | IDA_DEFAULT_PCPU_BATCH_MOVE); | ||
| 74 | } | ||
| 75 | |||
| 76 | typedef int (*percpu_ida_cb)(unsigned, void *); | ||
| 77 | int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | ||
| 78 | void *data); | ||
| 59 | 79 | ||
| 80 | unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu); | ||
| 60 | #endif /* __PERCPU_IDA_H__ */ | 81 | #endif /* __PERCPU_IDA_H__ */ |
diff --git a/kernel/smp.c b/kernel/smp.c index 0564571dcdf7..dbc3d42d2c68 100644 --- a/kernel/smp.c +++ b/kernel/smp.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS | 18 | #ifdef CONFIG_USE_GENERIC_SMP_HELPERS |
| 19 | enum { | 19 | enum { |
| 20 | CSD_FLAG_LOCK = 0x01, | 20 | CSD_FLAG_LOCK = 0x01, |
| 21 | CSD_FLAG_WAIT = 0x02, | ||
| 21 | }; | 22 | }; |
| 22 | 23 | ||
| 23 | struct call_function_data { | 24 | struct call_function_data { |
| @@ -124,7 +125,7 @@ static void csd_lock(struct call_single_data *csd) | |||
| 124 | 125 | ||
| 125 | static void csd_unlock(struct call_single_data *csd) | 126 | static void csd_unlock(struct call_single_data *csd) |
| 126 | { | 127 | { |
| 127 | WARN_ON(!(csd->flags & CSD_FLAG_LOCK)); | 128 | WARN_ON((csd->flags & CSD_FLAG_WAIT) && !(csd->flags & CSD_FLAG_LOCK)); |
| 128 | 129 | ||
| 129 | /* | 130 | /* |
| 130 | * ensure we're all done before releasing data: | 131 | * ensure we're all done before releasing data: |
| @@ -146,6 +147,9 @@ void generic_exec_single(int cpu, struct call_single_data *csd, int wait) | |||
| 146 | unsigned long flags; | 147 | unsigned long flags; |
| 147 | int ipi; | 148 | int ipi; |
| 148 | 149 | ||
| 150 | if (wait) | ||
| 151 | csd->flags |= CSD_FLAG_WAIT; | ||
| 152 | |||
| 149 | raw_spin_lock_irqsave(&dst->lock, flags); | 153 | raw_spin_lock_irqsave(&dst->lock, flags); |
| 150 | ipi = list_empty(&dst->list); | 154 | ipi = list_empty(&dst->list); |
| 151 | list_add_tail(&csd->list, &dst->list); | 155 | list_add_tail(&csd->list, &dst->list); |
| @@ -340,6 +344,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *csd, | |||
| 340 | } | 344 | } |
| 341 | put_cpu(); | 345 | put_cpu(); |
| 342 | } | 346 | } |
| 347 | EXPORT_SYMBOL_GPL(__smp_call_function_single); | ||
| 343 | 348 | ||
| 344 | /** | 349 | /** |
| 345 | * smp_call_function_many(): Run a function on a set of other CPUs. | 350 | * smp_call_function_many(): Run a function on a set of other CPUs. |
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 93c5d5ecff4e..7473ee3b4ee7 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c | |||
| @@ -60,14 +60,15 @@ static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc) | |||
| 60 | void percpu_counter_set(struct percpu_counter *fbc, s64 amount) | 60 | void percpu_counter_set(struct percpu_counter *fbc, s64 amount) |
| 61 | { | 61 | { |
| 62 | int cpu; | 62 | int cpu; |
| 63 | unsigned long flags; | ||
| 63 | 64 | ||
| 64 | raw_spin_lock(&fbc->lock); | 65 | raw_spin_lock_irqsave(&fbc->lock, flags); |
| 65 | for_each_possible_cpu(cpu) { | 66 | for_each_possible_cpu(cpu) { |
| 66 | s32 *pcount = per_cpu_ptr(fbc->counters, cpu); | 67 | s32 *pcount = per_cpu_ptr(fbc->counters, cpu); |
| 67 | *pcount = 0; | 68 | *pcount = 0; |
| 68 | } | 69 | } |
| 69 | fbc->count = amount; | 70 | fbc->count = amount; |
| 70 | raw_spin_unlock(&fbc->lock); | 71 | raw_spin_unlock_irqrestore(&fbc->lock, flags); |
| 71 | } | 72 | } |
| 72 | EXPORT_SYMBOL(percpu_counter_set); | 73 | EXPORT_SYMBOL(percpu_counter_set); |
| 73 | 74 | ||
| @@ -78,9 +79,10 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) | |||
| 78 | preempt_disable(); | 79 | preempt_disable(); |
| 79 | count = __this_cpu_read(*fbc->counters) + amount; | 80 | count = __this_cpu_read(*fbc->counters) + amount; |
| 80 | if (count >= batch || count <= -batch) { | 81 | if (count >= batch || count <= -batch) { |
| 81 | raw_spin_lock(&fbc->lock); | 82 | unsigned long flags; |
| 83 | raw_spin_lock_irqsave(&fbc->lock, flags); | ||
| 82 | fbc->count += count; | 84 | fbc->count += count; |
| 83 | raw_spin_unlock(&fbc->lock); | 85 | raw_spin_unlock_irqrestore(&fbc->lock, flags); |
| 84 | __this_cpu_write(*fbc->counters, 0); | 86 | __this_cpu_write(*fbc->counters, 0); |
| 85 | } else { | 87 | } else { |
| 86 | __this_cpu_write(*fbc->counters, count); | 88 | __this_cpu_write(*fbc->counters, count); |
| @@ -97,14 +99,15 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc) | |||
| 97 | { | 99 | { |
| 98 | s64 ret; | 100 | s64 ret; |
| 99 | int cpu; | 101 | int cpu; |
| 102 | unsigned long flags; | ||
| 100 | 103 | ||
| 101 | raw_spin_lock(&fbc->lock); | 104 | raw_spin_lock_irqsave(&fbc->lock, flags); |
| 102 | ret = fbc->count; | 105 | ret = fbc->count; |
| 103 | for_each_online_cpu(cpu) { | 106 | for_each_online_cpu(cpu) { |
| 104 | s32 *pcount = per_cpu_ptr(fbc->counters, cpu); | 107 | s32 *pcount = per_cpu_ptr(fbc->counters, cpu); |
| 105 | ret += *pcount; | 108 | ret += *pcount; |
| 106 | } | 109 | } |
| 107 | raw_spin_unlock(&fbc->lock); | 110 | raw_spin_unlock_irqrestore(&fbc->lock, flags); |
| 108 | return ret; | 111 | return ret; |
| 109 | } | 112 | } |
| 110 | EXPORT_SYMBOL(__percpu_counter_sum); | 113 | EXPORT_SYMBOL(__percpu_counter_sum); |
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index bab1ba2a4c71..b0698ea972c6 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c | |||
| @@ -30,15 +30,6 @@ | |||
| 30 | #include <linux/spinlock.h> | 30 | #include <linux/spinlock.h> |
| 31 | #include <linux/percpu_ida.h> | 31 | #include <linux/percpu_ida.h> |
| 32 | 32 | ||
| 33 | /* | ||
| 34 | * Number of tags we move between the percpu freelist and the global freelist at | ||
| 35 | * a time | ||
| 36 | */ | ||
| 37 | #define IDA_PCPU_BATCH_MOVE 32U | ||
| 38 | |||
| 39 | /* Max size of percpu freelist, */ | ||
| 40 | #define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2) | ||
| 41 | |||
| 42 | struct percpu_ida_cpu { | 33 | struct percpu_ida_cpu { |
| 43 | /* | 34 | /* |
| 44 | * Even though this is percpu, we need a lock for tag stealing by remote | 35 | * Even though this is percpu, we need a lock for tag stealing by remote |
| @@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool, | |||
| 78 | struct percpu_ida_cpu *remote; | 69 | struct percpu_ida_cpu *remote; |
| 79 | 70 | ||
| 80 | for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); | 71 | for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); |
| 81 | cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2; | 72 | cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2; |
| 82 | cpus_have_tags--) { | 73 | cpus_have_tags--) { |
| 83 | cpu = cpumask_next(cpu, &pool->cpus_have_tags); | 74 | cpu = cpumask_next(cpu, &pool->cpus_have_tags); |
| 84 | 75 | ||
| @@ -123,7 +114,7 @@ static inline void alloc_global_tags(struct percpu_ida *pool, | |||
| 123 | { | 114 | { |
| 124 | move_tags(tags->freelist, &tags->nr_free, | 115 | move_tags(tags->freelist, &tags->nr_free, |
| 125 | pool->freelist, &pool->nr_free, | 116 | pool->freelist, &pool->nr_free, |
| 126 | min(pool->nr_free, IDA_PCPU_BATCH_MOVE)); | 117 | min(pool->nr_free, pool->percpu_batch_size)); |
| 127 | } | 118 | } |
| 128 | 119 | ||
| 129 | static inline unsigned alloc_local_tag(struct percpu_ida *pool, | 120 | static inline unsigned alloc_local_tag(struct percpu_ida *pool, |
| @@ -245,17 +236,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag) | |||
| 245 | wake_up(&pool->wait); | 236 | wake_up(&pool->wait); |
| 246 | } | 237 | } |
| 247 | 238 | ||
| 248 | if (nr_free == IDA_PCPU_SIZE) { | 239 | if (nr_free == pool->percpu_max_size) { |
| 249 | spin_lock(&pool->lock); | 240 | spin_lock(&pool->lock); |
| 250 | 241 | ||
| 251 | /* | 242 | /* |
| 252 | * Global lock held and irqs disabled, don't need percpu | 243 | * Global lock held and irqs disabled, don't need percpu |
| 253 | * lock | 244 | * lock |
| 254 | */ | 245 | */ |
| 255 | if (tags->nr_free == IDA_PCPU_SIZE) { | 246 | if (tags->nr_free == pool->percpu_max_size) { |
| 256 | move_tags(pool->freelist, &pool->nr_free, | 247 | move_tags(pool->freelist, &pool->nr_free, |
| 257 | tags->freelist, &tags->nr_free, | 248 | tags->freelist, &tags->nr_free, |
| 258 | IDA_PCPU_BATCH_MOVE); | 249 | pool->percpu_batch_size); |
| 259 | 250 | ||
| 260 | wake_up(&pool->wait); | 251 | wake_up(&pool->wait); |
| 261 | } | 252 | } |
| @@ -292,7 +283,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy); | |||
| 292 | * Allocation is percpu, but sharding is limited by nr_tags - for best | 283 | * Allocation is percpu, but sharding is limited by nr_tags - for best |
| 293 | * performance, the workload should not span more cpus than nr_tags / 128. | 284 | * performance, the workload should not span more cpus than nr_tags / 128. |
| 294 | */ | 285 | */ |
| 295 | int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) | 286 | int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, |
| 287 | unsigned long max_size, unsigned long batch_size) | ||
| 296 | { | 288 | { |
| 297 | unsigned i, cpu, order; | 289 | unsigned i, cpu, order; |
| 298 | 290 | ||
| @@ -301,6 +293,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) | |||
| 301 | init_waitqueue_head(&pool->wait); | 293 | init_waitqueue_head(&pool->wait); |
| 302 | spin_lock_init(&pool->lock); | 294 | spin_lock_init(&pool->lock); |
| 303 | pool->nr_tags = nr_tags; | 295 | pool->nr_tags = nr_tags; |
| 296 | pool->percpu_max_size = max_size; | ||
| 297 | pool->percpu_batch_size = batch_size; | ||
| 304 | 298 | ||
| 305 | /* Guard against overflow */ | 299 | /* Guard against overflow */ |
| 306 | if (nr_tags > (unsigned) INT_MAX + 1) { | 300 | if (nr_tags > (unsigned) INT_MAX + 1) { |
| @@ -319,7 +313,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) | |||
| 319 | pool->nr_free = nr_tags; | 313 | pool->nr_free = nr_tags; |
| 320 | 314 | ||
| 321 | pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + | 315 | pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + |
| 322 | IDA_PCPU_SIZE * sizeof(unsigned), | 316 | pool->percpu_max_size * sizeof(unsigned), |
| 323 | sizeof(unsigned)); | 317 | sizeof(unsigned)); |
| 324 | if (!pool->tag_cpu) | 318 | if (!pool->tag_cpu) |
| 325 | goto err; | 319 | goto err; |
| @@ -332,4 +326,65 @@ err: | |||
| 332 | percpu_ida_destroy(pool); | 326 | percpu_ida_destroy(pool); |
| 333 | return -ENOMEM; | 327 | return -ENOMEM; |
| 334 | } | 328 | } |
| 335 | EXPORT_SYMBOL_GPL(percpu_ida_init); | 329 | EXPORT_SYMBOL_GPL(__percpu_ida_init); |
| 330 | |||
| 331 | /** | ||
| 332 | * percpu_ida_for_each_free - iterate free ids of a pool | ||
| 333 | * @pool: pool to iterate | ||
| 334 | * @fn: interate callback function | ||
| 335 | * @data: parameter for @fn | ||
| 336 | * | ||
| 337 | * Note, this doesn't guarantee to iterate all free ids restrictly. Some free | ||
| 338 | * ids might be missed, some might be iterated duplicated, and some might | ||
| 339 | * be iterated and not free soon. | ||
| 340 | */ | ||
| 341 | int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, | ||
| 342 | void *data) | ||
| 343 | { | ||
| 344 | unsigned long flags; | ||
| 345 | struct percpu_ida_cpu *remote; | ||
| 346 | unsigned cpu, i, err = 0; | ||
| 347 | |||
| 348 | local_irq_save(flags); | ||
| 349 | for_each_possible_cpu(cpu) { | ||
| 350 | remote = per_cpu_ptr(pool->tag_cpu, cpu); | ||
| 351 | spin_lock(&remote->lock); | ||
| 352 | for (i = 0; i < remote->nr_free; i++) { | ||
| 353 | err = fn(remote->freelist[i], data); | ||
| 354 | if (err) | ||
| 355 | break; | ||
| 356 | } | ||
| 357 | spin_unlock(&remote->lock); | ||
| 358 | if (err) | ||
| 359 | goto out; | ||
| 360 | } | ||
| 361 | |||
| 362 | spin_lock(&pool->lock); | ||
| 363 | for (i = 0; i < pool->nr_free; i++) { | ||
| 364 | err = fn(pool->freelist[i], data); | ||
| 365 | if (err) | ||
| 366 | break; | ||
| 367 | } | ||
| 368 | spin_unlock(&pool->lock); | ||
| 369 | out: | ||
| 370 | local_irq_restore(flags); | ||
| 371 | return err; | ||
| 372 | } | ||
| 373 | EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); | ||
| 374 | |||
| 375 | /** | ||
| 376 | * percpu_ida_free_tags - return free tags number of a specific cpu or global pool | ||
| 377 | * @pool: pool related | ||
| 378 | * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids | ||
| 379 | * | ||
| 380 | * Note: this just returns a snapshot of free tags number. | ||
| 381 | */ | ||
| 382 | unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu) | ||
| 383 | { | ||
| 384 | struct percpu_ida_cpu *remote; | ||
| 385 | if (cpu == nr_cpu_ids) | ||
| 386 | return pool->nr_free; | ||
| 387 | remote = per_cpu_ptr(pool->tag_cpu, cpu); | ||
| 388 | return remote->nr_free; | ||
| 389 | } | ||
| 390 | EXPORT_SYMBOL_GPL(percpu_ida_free_tags); | ||
