aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2015-03-10 23:49:26 -0400
committerMike Snitzer <snitzer@redhat.com>2015-04-15 12:10:17 -0400
commit022333427a8aa4ccb318a9db90cea4e69ca1826b (patch)
tree9f3fad66840616cd281b449a99df4d30bfdfd6c2
parentbfebd1cdb497a57757c83f5fbf1a29931591e2a4 (diff)
dm: optimize dm_mq_queue_rq to _not_ use kthread if using pure blk-mq
dm_mq_queue_rq() is in atomic context so care must be taken to not sleep -- as such GFP_ATOMIC is used for the md->bs bioset allocations and dm-mpath's call to blk_get_request(). In the future the bioset allocations will hopefully go away (by removing support for partial completions of bios in a cloned request). Also prepare for supporting DM blk-mq ontop of old-style request_fn device(s) if a new dm-mod 'use_blk_mq' parameter is set. The kthread will still be used to queue work if blk-mq is used ontop of old-style request_fn device(s). Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r--drivers/md/dm-mpath.c2
-rw-r--r--drivers/md/dm.c64
2 files changed, 50 insertions, 16 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c8f07e5a9a17..63953477a07c 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -428,7 +428,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone,
428 } else { 428 } else {
429 /* blk-mq request-based interface */ 429 /* blk-mq request-based interface */
430 *__clone = blk_get_request(bdev_get_queue(bdev), 430 *__clone = blk_get_request(bdev_get_queue(bdev),
431 rq_data_dir(rq), GFP_KERNEL); 431 rq_data_dir(rq), GFP_ATOMIC);
432 if (IS_ERR(*__clone)) 432 if (IS_ERR(*__clone))
433 /* ENOMEM, requeue */ 433 /* ENOMEM, requeue */
434 return r; 434 return r;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3a66baac76ed..55cadb1a2735 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1077,9 +1077,10 @@ static void free_rq_clone(struct request *clone)
1077 1077
1078 blk_rq_unprep_clone(clone); 1078 blk_rq_unprep_clone(clone);
1079 1079
1080 if (clone->q && clone->q->mq_ops) 1080 if (clone->q->mq_ops)
1081 tio->ti->type->release_clone_rq(clone); 1081 tio->ti->type->release_clone_rq(clone);
1082 else 1082 else if (!md->queue->mq_ops)
1083 /* request_fn queue stacked on request_fn queue(s) */
1083 free_clone_request(md, clone); 1084 free_clone_request(md, clone);
1084 1085
1085 if (!md->queue->mq_ops) 1086 if (!md->queue->mq_ops)
@@ -1838,15 +1839,25 @@ static int setup_clone(struct request *clone, struct request *rq,
1838static struct request *clone_rq(struct request *rq, struct mapped_device *md, 1839static struct request *clone_rq(struct request *rq, struct mapped_device *md,
1839 struct dm_rq_target_io *tio, gfp_t gfp_mask) 1840 struct dm_rq_target_io *tio, gfp_t gfp_mask)
1840{ 1841{
1841 struct request *clone = alloc_clone_request(md, gfp_mask); 1842 /*
1843 * Do not allocate a clone if tio->clone was already set
1844 * (see: dm_mq_queue_rq).
1845 */
1846 bool alloc_clone = !tio->clone;
1847 struct request *clone;
1842 1848
1843 if (!clone) 1849 if (alloc_clone) {
1844 return NULL; 1850 clone = alloc_clone_request(md, gfp_mask);
1851 if (!clone)
1852 return NULL;
1853 } else
1854 clone = tio->clone;
1845 1855
1846 blk_rq_init(NULL, clone); 1856 blk_rq_init(NULL, clone);
1847 if (setup_clone(clone, rq, tio, gfp_mask)) { 1857 if (setup_clone(clone, rq, tio, gfp_mask)) {
1848 /* -ENOMEM */ 1858 /* -ENOMEM */
1849 free_clone_request(md, clone); 1859 if (alloc_clone)
1860 free_clone_request(md, clone);
1850 return NULL; 1861 return NULL;
1851 } 1862 }
1852 1863
@@ -1864,7 +1875,8 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
1864 tio->orig = rq; 1875 tio->orig = rq;
1865 tio->error = 0; 1876 tio->error = 0;
1866 memset(&tio->info, 0, sizeof(tio->info)); 1877 memset(&tio->info, 0, sizeof(tio->info));
1867 init_kthread_work(&tio->work, map_tio_request); 1878 if (md->kworker_task)
1879 init_kthread_work(&tio->work, map_tio_request);
1868} 1880}
1869 1881
1870static struct dm_rq_target_io *prep_tio(struct request *rq, 1882static struct dm_rq_target_io *prep_tio(struct request *rq,
@@ -1941,7 +1953,7 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq,
1941 } 1953 }
1942 if (IS_ERR(clone)) 1954 if (IS_ERR(clone))
1943 return DM_MAPIO_REQUEUE; 1955 return DM_MAPIO_REQUEUE;
1944 if (setup_clone(clone, rq, tio, GFP_NOIO)) { 1956 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
1945 /* -ENOMEM */ 1957 /* -ENOMEM */
1946 ti->type->release_clone_rq(clone); 1958 ti->type->release_clone_rq(clone);
1947 return DM_MAPIO_REQUEUE; 1959 return DM_MAPIO_REQUEUE;
@@ -2408,7 +2420,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
2408 p->bs = NULL; 2420 p->bs = NULL;
2409 2421
2410out: 2422out:
2411 /* mempool bind completed, now no need any mempools in the table */ 2423 /* mempool bind completed, no longer need any mempools in the table */
2412 dm_table_free_md_mempools(t); 2424 dm_table_free_md_mempools(t);
2413} 2425}
2414 2426
@@ -2713,9 +2725,24 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
2713 /* Init tio using md established in .init_request */ 2725 /* Init tio using md established in .init_request */
2714 init_tio(tio, rq, md); 2726 init_tio(tio, rq, md);
2715 2727
2716 /* Establish tio->ti before queuing work (map_tio_request) */ 2728 /*
2729 * Establish tio->ti before queuing work (map_tio_request)
2730 * or making direct call to map_request().
2731 */
2717 tio->ti = ti; 2732 tio->ti = ti;
2718 queue_kthread_work(&md->kworker, &tio->work); 2733
2734 /* Clone the request if underlying devices aren't blk-mq */
2735 if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
2736 /* clone request is allocated at the end of the pdu */
2737 tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
2738 if (!clone_rq(rq, md, tio, GFP_ATOMIC))
2739 return BLK_MQ_RQ_QUEUE_BUSY;
2740 queue_kthread_work(&md->kworker, &tio->work);
2741 } else {
2742 /* Direct call is fine since .queue_rq allows allocations */
2743 if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
2744 dm_requeue_unmapped_original_request(md, rq);
2745 }
2719 2746
2720 return BLK_MQ_RQ_QUEUE_OK; 2747 return BLK_MQ_RQ_QUEUE_OK;
2721} 2748}
@@ -2729,6 +2756,7 @@ static struct blk_mq_ops dm_mq_ops = {
2729 2756
2730static int dm_init_request_based_blk_mq_queue(struct mapped_device *md) 2757static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
2731{ 2758{
2759 unsigned md_type = dm_get_md_type(md);
2732 struct request_queue *q; 2760 struct request_queue *q;
2733 int err; 2761 int err;
2734 2762
@@ -2738,7 +2766,11 @@ static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
2738 md->tag_set.numa_node = NUMA_NO_NODE; 2766 md->tag_set.numa_node = NUMA_NO_NODE;
2739 md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; 2767 md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
2740 md->tag_set.nr_hw_queues = 1; 2768 md->tag_set.nr_hw_queues = 1;
2741 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io); 2769 if (md_type == DM_TYPE_REQUEST_BASED) {
2770 /* make the memory for non-blk-mq clone part of the pdu */
2771 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request);
2772 } else
2773 md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
2742 md->tag_set.driver_data = md; 2774 md->tag_set.driver_data = md;
2743 2775
2744 err = blk_mq_alloc_tag_set(&md->tag_set); 2776 err = blk_mq_alloc_tag_set(&md->tag_set);
@@ -2756,7 +2788,8 @@ static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
2756 /* backfill 'mq' sysfs registration normally done in blk_register_queue */ 2788 /* backfill 'mq' sysfs registration normally done in blk_register_queue */
2757 blk_mq_register_disk(md->disk); 2789 blk_mq_register_disk(md->disk);
2758 2790
2759 init_rq_based_worker_thread(md); 2791 if (md_type == DM_TYPE_REQUEST_BASED)
2792 init_rq_based_worker_thread(md);
2760 2793
2761 return 0; 2794 return 0;
2762 2795
@@ -2876,7 +2909,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
2876 set_bit(DMF_FREEING, &md->flags); 2909 set_bit(DMF_FREEING, &md->flags);
2877 spin_unlock(&_minor_lock); 2910 spin_unlock(&_minor_lock);
2878 2911
2879 if (dm_request_based(md)) 2912 if (dm_request_based(md) && md->kworker_task)
2880 flush_kthread_worker(&md->kworker); 2913 flush_kthread_worker(&md->kworker);
2881 2914
2882 /* 2915 /*
@@ -3130,7 +3163,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
3130 */ 3163 */
3131 if (dm_request_based(md)) { 3164 if (dm_request_based(md)) {
3132 stop_queue(md->queue); 3165 stop_queue(md->queue);
3133 flush_kthread_worker(&md->kworker); 3166 if (md->kworker_task)
3167 flush_kthread_worker(&md->kworker);
3134 } 3168 }
3135 3169
3136 flush_workqueue(md->wq); 3170 flush_workqueue(md->wq);