diff options
author | Mike Snitzer <snitzer@redhat.com> | 2015-03-10 23:49:26 -0400 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2015-04-15 12:10:17 -0400 |
commit | 022333427a8aa4ccb318a9db90cea4e69ca1826b (patch) | |
tree | 9f3fad66840616cd281b449a99df4d30bfdfd6c2 | |
parent | bfebd1cdb497a57757c83f5fbf1a29931591e2a4 (diff) |
dm: optimize dm_mq_queue_rq to _not_ use kthread if using pure blk-mq
dm_mq_queue_rq() is in atomic context so care must be taken to not
sleep -- as such GFP_ATOMIC is used for the md->bs bioset allocations
and dm-mpath's call to blk_get_request(). In the future the bioset
allocations will hopefully go away (by removing support for partial
completions of bios in a cloned request).
Also prepare for supporting DM blk-mq ontop of old-style request_fn
device(s) if a new dm-mod 'use_blk_mq' parameter is set. The kthread
will still be used to queue work if blk-mq is used ontop of old-style
request_fn device(s).
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r-- | drivers/md/dm-mpath.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.c | 64 |
2 files changed, 50 insertions, 16 deletions
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index c8f07e5a9a17..63953477a07c 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c | |||
@@ -428,7 +428,7 @@ static int __multipath_map(struct dm_target *ti, struct request *clone, | |||
428 | } else { | 428 | } else { |
429 | /* blk-mq request-based interface */ | 429 | /* blk-mq request-based interface */ |
430 | *__clone = blk_get_request(bdev_get_queue(bdev), | 430 | *__clone = blk_get_request(bdev_get_queue(bdev), |
431 | rq_data_dir(rq), GFP_KERNEL); | 431 | rq_data_dir(rq), GFP_ATOMIC); |
432 | if (IS_ERR(*__clone)) | 432 | if (IS_ERR(*__clone)) |
433 | /* ENOMEM, requeue */ | 433 | /* ENOMEM, requeue */ |
434 | return r; | 434 | return r; |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 3a66baac76ed..55cadb1a2735 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -1077,9 +1077,10 @@ static void free_rq_clone(struct request *clone) | |||
1077 | 1077 | ||
1078 | blk_rq_unprep_clone(clone); | 1078 | blk_rq_unprep_clone(clone); |
1079 | 1079 | ||
1080 | if (clone->q && clone->q->mq_ops) | 1080 | if (clone->q->mq_ops) |
1081 | tio->ti->type->release_clone_rq(clone); | 1081 | tio->ti->type->release_clone_rq(clone); |
1082 | else | 1082 | else if (!md->queue->mq_ops) |
1083 | /* request_fn queue stacked on request_fn queue(s) */ | ||
1083 | free_clone_request(md, clone); | 1084 | free_clone_request(md, clone); |
1084 | 1085 | ||
1085 | if (!md->queue->mq_ops) | 1086 | if (!md->queue->mq_ops) |
@@ -1838,15 +1839,25 @@ static int setup_clone(struct request *clone, struct request *rq, | |||
1838 | static struct request *clone_rq(struct request *rq, struct mapped_device *md, | 1839 | static struct request *clone_rq(struct request *rq, struct mapped_device *md, |
1839 | struct dm_rq_target_io *tio, gfp_t gfp_mask) | 1840 | struct dm_rq_target_io *tio, gfp_t gfp_mask) |
1840 | { | 1841 | { |
1841 | struct request *clone = alloc_clone_request(md, gfp_mask); | 1842 | /* |
1843 | * Do not allocate a clone if tio->clone was already set | ||
1844 | * (see: dm_mq_queue_rq). | ||
1845 | */ | ||
1846 | bool alloc_clone = !tio->clone; | ||
1847 | struct request *clone; | ||
1842 | 1848 | ||
1843 | if (!clone) | 1849 | if (alloc_clone) { |
1844 | return NULL; | 1850 | clone = alloc_clone_request(md, gfp_mask); |
1851 | if (!clone) | ||
1852 | return NULL; | ||
1853 | } else | ||
1854 | clone = tio->clone; | ||
1845 | 1855 | ||
1846 | blk_rq_init(NULL, clone); | 1856 | blk_rq_init(NULL, clone); |
1847 | if (setup_clone(clone, rq, tio, gfp_mask)) { | 1857 | if (setup_clone(clone, rq, tio, gfp_mask)) { |
1848 | /* -ENOMEM */ | 1858 | /* -ENOMEM */ |
1849 | free_clone_request(md, clone); | 1859 | if (alloc_clone) |
1860 | free_clone_request(md, clone); | ||
1850 | return NULL; | 1861 | return NULL; |
1851 | } | 1862 | } |
1852 | 1863 | ||
@@ -1864,7 +1875,8 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq, | |||
1864 | tio->orig = rq; | 1875 | tio->orig = rq; |
1865 | tio->error = 0; | 1876 | tio->error = 0; |
1866 | memset(&tio->info, 0, sizeof(tio->info)); | 1877 | memset(&tio->info, 0, sizeof(tio->info)); |
1867 | init_kthread_work(&tio->work, map_tio_request); | 1878 | if (md->kworker_task) |
1879 | init_kthread_work(&tio->work, map_tio_request); | ||
1868 | } | 1880 | } |
1869 | 1881 | ||
1870 | static struct dm_rq_target_io *prep_tio(struct request *rq, | 1882 | static struct dm_rq_target_io *prep_tio(struct request *rq, |
@@ -1941,7 +1953,7 @@ static int map_request(struct dm_rq_target_io *tio, struct request *rq, | |||
1941 | } | 1953 | } |
1942 | if (IS_ERR(clone)) | 1954 | if (IS_ERR(clone)) |
1943 | return DM_MAPIO_REQUEUE; | 1955 | return DM_MAPIO_REQUEUE; |
1944 | if (setup_clone(clone, rq, tio, GFP_NOIO)) { | 1956 | if (setup_clone(clone, rq, tio, GFP_ATOMIC)) { |
1945 | /* -ENOMEM */ | 1957 | /* -ENOMEM */ |
1946 | ti->type->release_clone_rq(clone); | 1958 | ti->type->release_clone_rq(clone); |
1947 | return DM_MAPIO_REQUEUE; | 1959 | return DM_MAPIO_REQUEUE; |
@@ -2408,7 +2420,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t) | |||
2408 | p->bs = NULL; | 2420 | p->bs = NULL; |
2409 | 2421 | ||
2410 | out: | 2422 | out: |
2411 | /* mempool bind completed, now no need any mempools in the table */ | 2423 | /* mempool bind completed, no longer need any mempools in the table */ |
2412 | dm_table_free_md_mempools(t); | 2424 | dm_table_free_md_mempools(t); |
2413 | } | 2425 | } |
2414 | 2426 | ||
@@ -2713,9 +2725,24 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
2713 | /* Init tio using md established in .init_request */ | 2725 | /* Init tio using md established in .init_request */ |
2714 | init_tio(tio, rq, md); | 2726 | init_tio(tio, rq, md); |
2715 | 2727 | ||
2716 | /* Establish tio->ti before queuing work (map_tio_request) */ | 2728 | /* |
2729 | * Establish tio->ti before queuing work (map_tio_request) | ||
2730 | * or making direct call to map_request(). | ||
2731 | */ | ||
2717 | tio->ti = ti; | 2732 | tio->ti = ti; |
2718 | queue_kthread_work(&md->kworker, &tio->work); | 2733 | |
2734 | /* Clone the request if underlying devices aren't blk-mq */ | ||
2735 | if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) { | ||
2736 | /* clone request is allocated at the end of the pdu */ | ||
2737 | tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io); | ||
2738 | if (!clone_rq(rq, md, tio, GFP_ATOMIC)) | ||
2739 | return BLK_MQ_RQ_QUEUE_BUSY; | ||
2740 | queue_kthread_work(&md->kworker, &tio->work); | ||
2741 | } else { | ||
2742 | /* Direct call is fine since .queue_rq allows allocations */ | ||
2743 | if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) | ||
2744 | dm_requeue_unmapped_original_request(md, rq); | ||
2745 | } | ||
2719 | 2746 | ||
2720 | return BLK_MQ_RQ_QUEUE_OK; | 2747 | return BLK_MQ_RQ_QUEUE_OK; |
2721 | } | 2748 | } |
@@ -2729,6 +2756,7 @@ static struct blk_mq_ops dm_mq_ops = { | |||
2729 | 2756 | ||
2730 | static int dm_init_request_based_blk_mq_queue(struct mapped_device *md) | 2757 | static int dm_init_request_based_blk_mq_queue(struct mapped_device *md) |
2731 | { | 2758 | { |
2759 | unsigned md_type = dm_get_md_type(md); | ||
2732 | struct request_queue *q; | 2760 | struct request_queue *q; |
2733 | int err; | 2761 | int err; |
2734 | 2762 | ||
@@ -2738,7 +2766,11 @@ static int dm_init_request_based_blk_mq_queue(struct mapped_device *md) | |||
2738 | md->tag_set.numa_node = NUMA_NO_NODE; | 2766 | md->tag_set.numa_node = NUMA_NO_NODE; |
2739 | md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; | 2767 | md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; |
2740 | md->tag_set.nr_hw_queues = 1; | 2768 | md->tag_set.nr_hw_queues = 1; |
2741 | md->tag_set.cmd_size = sizeof(struct dm_rq_target_io); | 2769 | if (md_type == DM_TYPE_REQUEST_BASED) { |
2770 | /* make the memory for non-blk-mq clone part of the pdu */ | ||
2771 | md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request); | ||
2772 | } else | ||
2773 | md->tag_set.cmd_size = sizeof(struct dm_rq_target_io); | ||
2742 | md->tag_set.driver_data = md; | 2774 | md->tag_set.driver_data = md; |
2743 | 2775 | ||
2744 | err = blk_mq_alloc_tag_set(&md->tag_set); | 2776 | err = blk_mq_alloc_tag_set(&md->tag_set); |
@@ -2756,7 +2788,8 @@ static int dm_init_request_based_blk_mq_queue(struct mapped_device *md) | |||
2756 | /* backfill 'mq' sysfs registration normally done in blk_register_queue */ | 2788 | /* backfill 'mq' sysfs registration normally done in blk_register_queue */ |
2757 | blk_mq_register_disk(md->disk); | 2789 | blk_mq_register_disk(md->disk); |
2758 | 2790 | ||
2759 | init_rq_based_worker_thread(md); | 2791 | if (md_type == DM_TYPE_REQUEST_BASED) |
2792 | init_rq_based_worker_thread(md); | ||
2760 | 2793 | ||
2761 | return 0; | 2794 | return 0; |
2762 | 2795 | ||
@@ -2876,7 +2909,7 @@ static void __dm_destroy(struct mapped_device *md, bool wait) | |||
2876 | set_bit(DMF_FREEING, &md->flags); | 2909 | set_bit(DMF_FREEING, &md->flags); |
2877 | spin_unlock(&_minor_lock); | 2910 | spin_unlock(&_minor_lock); |
2878 | 2911 | ||
2879 | if (dm_request_based(md)) | 2912 | if (dm_request_based(md) && md->kworker_task) |
2880 | flush_kthread_worker(&md->kworker); | 2913 | flush_kthread_worker(&md->kworker); |
2881 | 2914 | ||
2882 | /* | 2915 | /* |
@@ -3130,7 +3163,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, | |||
3130 | */ | 3163 | */ |
3131 | if (dm_request_based(md)) { | 3164 | if (dm_request_based(md)) { |
3132 | stop_queue(md->queue); | 3165 | stop_queue(md->queue); |
3133 | flush_kthread_worker(&md->kworker); | 3166 | if (md->kworker_task) |
3167 | flush_kthread_worker(&md->kworker); | ||
3134 | } | 3168 | } |
3135 | 3169 | ||
3136 | flush_workqueue(md->wq); | 3170 | flush_workqueue(md->wq); |