diff options
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r-- | block/blk-mq.c | 118 |
1 files changed, 77 insertions, 41 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index f2d67b4047a0..7785ae96267a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -393,14 +393,16 @@ void __blk_mq_complete_request(struct request *rq) | |||
393 | * Ends all I/O on a request. It does not handle partial completions. | 393 | * Ends all I/O on a request. It does not handle partial completions. |
394 | * The actual completion happens out-of-order, through a IPI handler. | 394 | * The actual completion happens out-of-order, through a IPI handler. |
395 | **/ | 395 | **/ |
396 | void blk_mq_complete_request(struct request *rq) | 396 | void blk_mq_complete_request(struct request *rq, int error) |
397 | { | 397 | { |
398 | struct request_queue *q = rq->q; | 398 | struct request_queue *q = rq->q; |
399 | 399 | ||
400 | if (unlikely(blk_should_fake_timeout(q))) | 400 | if (unlikely(blk_should_fake_timeout(q))) |
401 | return; | 401 | return; |
402 | if (!blk_mark_rq_complete(rq)) | 402 | if (!blk_mark_rq_complete(rq)) { |
403 | rq->errors = error; | ||
403 | __blk_mq_complete_request(rq); | 404 | __blk_mq_complete_request(rq); |
405 | } | ||
404 | } | 406 | } |
405 | EXPORT_SYMBOL(blk_mq_complete_request); | 407 | EXPORT_SYMBOL(blk_mq_complete_request); |
406 | 408 | ||
@@ -616,10 +618,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx, | |||
616 | * If a request wasn't started before the queue was | 618 | * If a request wasn't started before the queue was |
617 | * marked dying, kill it here or it'll go unnoticed. | 619 | * marked dying, kill it here or it'll go unnoticed. |
618 | */ | 620 | */ |
619 | if (unlikely(blk_queue_dying(rq->q))) { | 621 | if (unlikely(blk_queue_dying(rq->q))) |
620 | rq->errors = -EIO; | 622 | blk_mq_complete_request(rq, -EIO); |
621 | blk_mq_complete_request(rq); | ||
622 | } | ||
623 | return; | 623 | return; |
624 | } | 624 | } |
625 | if (rq->cmd_flags & REQ_NO_TIMEOUT) | 625 | if (rq->cmd_flags & REQ_NO_TIMEOUT) |
@@ -641,24 +641,16 @@ static void blk_mq_rq_timer(unsigned long priv) | |||
641 | .next = 0, | 641 | .next = 0, |
642 | .next_set = 0, | 642 | .next_set = 0, |
643 | }; | 643 | }; |
644 | struct blk_mq_hw_ctx *hctx; | ||
645 | int i; | 644 | int i; |
646 | 645 | ||
647 | queue_for_each_hw_ctx(q, hctx, i) { | 646 | blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data); |
648 | /* | ||
649 | * If not software queues are currently mapped to this | ||
650 | * hardware queue, there's nothing to check | ||
651 | */ | ||
652 | if (!blk_mq_hw_queue_mapped(hctx)) | ||
653 | continue; | ||
654 | |||
655 | blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data); | ||
656 | } | ||
657 | 647 | ||
658 | if (data.next_set) { | 648 | if (data.next_set) { |
659 | data.next = blk_rq_timeout(round_jiffies_up(data.next)); | 649 | data.next = blk_rq_timeout(round_jiffies_up(data.next)); |
660 | mod_timer(&q->timeout, data.next); | 650 | mod_timer(&q->timeout, data.next); |
661 | } else { | 651 | } else { |
652 | struct blk_mq_hw_ctx *hctx; | ||
653 | |||
662 | queue_for_each_hw_ctx(q, hctx, i) { | 654 | queue_for_each_hw_ctx(q, hctx, i) { |
663 | /* the hctx may be unmapped, so check it here */ | 655 | /* the hctx may be unmapped, so check it here */ |
664 | if (blk_mq_hw_queue_mapped(hctx)) | 656 | if (blk_mq_hw_queue_mapped(hctx)) |
@@ -1789,13 +1781,19 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||
1789 | } | 1781 | } |
1790 | } | 1782 | } |
1791 | 1783 | ||
1792 | static void blk_mq_map_swqueue(struct request_queue *q) | 1784 | static void blk_mq_map_swqueue(struct request_queue *q, |
1785 | const struct cpumask *online_mask) | ||
1793 | { | 1786 | { |
1794 | unsigned int i; | 1787 | unsigned int i; |
1795 | struct blk_mq_hw_ctx *hctx; | 1788 | struct blk_mq_hw_ctx *hctx; |
1796 | struct blk_mq_ctx *ctx; | 1789 | struct blk_mq_ctx *ctx; |
1797 | struct blk_mq_tag_set *set = q->tag_set; | 1790 | struct blk_mq_tag_set *set = q->tag_set; |
1798 | 1791 | ||
1792 | /* | ||
1793 | * Avoid others reading imcomplete hctx->cpumask through sysfs | ||
1794 | */ | ||
1795 | mutex_lock(&q->sysfs_lock); | ||
1796 | |||
1799 | queue_for_each_hw_ctx(q, hctx, i) { | 1797 | queue_for_each_hw_ctx(q, hctx, i) { |
1800 | cpumask_clear(hctx->cpumask); | 1798 | cpumask_clear(hctx->cpumask); |
1801 | hctx->nr_ctx = 0; | 1799 | hctx->nr_ctx = 0; |
@@ -1806,16 +1804,17 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1806 | */ | 1804 | */ |
1807 | queue_for_each_ctx(q, ctx, i) { | 1805 | queue_for_each_ctx(q, ctx, i) { |
1808 | /* If the cpu isn't online, the cpu is mapped to first hctx */ | 1806 | /* If the cpu isn't online, the cpu is mapped to first hctx */ |
1809 | if (!cpu_online(i)) | 1807 | if (!cpumask_test_cpu(i, online_mask)) |
1810 | continue; | 1808 | continue; |
1811 | 1809 | ||
1812 | hctx = q->mq_ops->map_queue(q, i); | 1810 | hctx = q->mq_ops->map_queue(q, i); |
1813 | cpumask_set_cpu(i, hctx->cpumask); | 1811 | cpumask_set_cpu(i, hctx->cpumask); |
1814 | cpumask_set_cpu(i, hctx->tags->cpumask); | ||
1815 | ctx->index_hw = hctx->nr_ctx; | 1812 | ctx->index_hw = hctx->nr_ctx; |
1816 | hctx->ctxs[hctx->nr_ctx++] = ctx; | 1813 | hctx->ctxs[hctx->nr_ctx++] = ctx; |
1817 | } | 1814 | } |
1818 | 1815 | ||
1816 | mutex_unlock(&q->sysfs_lock); | ||
1817 | |||
1819 | queue_for_each_hw_ctx(q, hctx, i) { | 1818 | queue_for_each_hw_ctx(q, hctx, i) { |
1820 | struct blk_mq_ctxmap *map = &hctx->ctx_map; | 1819 | struct blk_mq_ctxmap *map = &hctx->ctx_map; |
1821 | 1820 | ||
@@ -1851,6 +1850,14 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1851 | hctx->next_cpu = cpumask_first(hctx->cpumask); | 1850 | hctx->next_cpu = cpumask_first(hctx->cpumask); |
1852 | hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; | 1851 | hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; |
1853 | } | 1852 | } |
1853 | |||
1854 | queue_for_each_ctx(q, ctx, i) { | ||
1855 | if (!cpumask_test_cpu(i, online_mask)) | ||
1856 | continue; | ||
1857 | |||
1858 | hctx = q->mq_ops->map_queue(q, i); | ||
1859 | cpumask_set_cpu(i, hctx->tags->cpumask); | ||
1860 | } | ||
1854 | } | 1861 | } |
1855 | 1862 | ||
1856 | static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) | 1863 | static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) |
@@ -1918,6 +1925,9 @@ void blk_mq_release(struct request_queue *q) | |||
1918 | kfree(hctx); | 1925 | kfree(hctx); |
1919 | } | 1926 | } |
1920 | 1927 | ||
1928 | kfree(q->mq_map); | ||
1929 | q->mq_map = NULL; | ||
1930 | |||
1921 | kfree(q->queue_hw_ctx); | 1931 | kfree(q->queue_hw_ctx); |
1922 | 1932 | ||
1923 | /* ctx kobj stays in queue_ctx */ | 1933 | /* ctx kobj stays in queue_ctx */ |
@@ -2027,13 +2037,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, | |||
2027 | if (blk_mq_init_hw_queues(q, set)) | 2037 | if (blk_mq_init_hw_queues(q, set)) |
2028 | goto err_hctxs; | 2038 | goto err_hctxs; |
2029 | 2039 | ||
2040 | get_online_cpus(); | ||
2030 | mutex_lock(&all_q_mutex); | 2041 | mutex_lock(&all_q_mutex); |
2031 | list_add_tail(&q->all_q_node, &all_q_list); | ||
2032 | mutex_unlock(&all_q_mutex); | ||
2033 | 2042 | ||
2043 | list_add_tail(&q->all_q_node, &all_q_list); | ||
2034 | blk_mq_add_queue_tag_set(set, q); | 2044 | blk_mq_add_queue_tag_set(set, q); |
2045 | blk_mq_map_swqueue(q, cpu_online_mask); | ||
2035 | 2046 | ||
2036 | blk_mq_map_swqueue(q); | 2047 | mutex_unlock(&all_q_mutex); |
2048 | put_online_cpus(); | ||
2037 | 2049 | ||
2038 | return q; | 2050 | return q; |
2039 | 2051 | ||
@@ -2057,30 +2069,27 @@ void blk_mq_free_queue(struct request_queue *q) | |||
2057 | { | 2069 | { |
2058 | struct blk_mq_tag_set *set = q->tag_set; | 2070 | struct blk_mq_tag_set *set = q->tag_set; |
2059 | 2071 | ||
2072 | mutex_lock(&all_q_mutex); | ||
2073 | list_del_init(&q->all_q_node); | ||
2074 | mutex_unlock(&all_q_mutex); | ||
2075 | |||
2060 | blk_mq_del_queue_tag_set(q); | 2076 | blk_mq_del_queue_tag_set(q); |
2061 | 2077 | ||
2062 | blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); | 2078 | blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); |
2063 | blk_mq_free_hw_queues(q, set); | 2079 | blk_mq_free_hw_queues(q, set); |
2064 | 2080 | ||
2065 | percpu_ref_exit(&q->mq_usage_counter); | 2081 | percpu_ref_exit(&q->mq_usage_counter); |
2066 | |||
2067 | kfree(q->mq_map); | ||
2068 | |||
2069 | q->mq_map = NULL; | ||
2070 | |||
2071 | mutex_lock(&all_q_mutex); | ||
2072 | list_del_init(&q->all_q_node); | ||
2073 | mutex_unlock(&all_q_mutex); | ||
2074 | } | 2082 | } |
2075 | 2083 | ||
2076 | /* Basically redo blk_mq_init_queue with queue frozen */ | 2084 | /* Basically redo blk_mq_init_queue with queue frozen */ |
2077 | static void blk_mq_queue_reinit(struct request_queue *q) | 2085 | static void blk_mq_queue_reinit(struct request_queue *q, |
2086 | const struct cpumask *online_mask) | ||
2078 | { | 2087 | { |
2079 | WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); | 2088 | WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); |
2080 | 2089 | ||
2081 | blk_mq_sysfs_unregister(q); | 2090 | blk_mq_sysfs_unregister(q); |
2082 | 2091 | ||
2083 | blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues); | 2092 | blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask); |
2084 | 2093 | ||
2085 | /* | 2094 | /* |
2086 | * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe | 2095 | * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe |
@@ -2088,7 +2097,7 @@ static void blk_mq_queue_reinit(struct request_queue *q) | |||
2088 | * involves free and re-allocate memory, worthy doing?) | 2097 | * involves free and re-allocate memory, worthy doing?) |
2089 | */ | 2098 | */ |
2090 | 2099 | ||
2091 | blk_mq_map_swqueue(q); | 2100 | blk_mq_map_swqueue(q, online_mask); |
2092 | 2101 | ||
2093 | blk_mq_sysfs_register(q); | 2102 | blk_mq_sysfs_register(q); |
2094 | } | 2103 | } |
@@ -2097,16 +2106,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
2097 | unsigned long action, void *hcpu) | 2106 | unsigned long action, void *hcpu) |
2098 | { | 2107 | { |
2099 | struct request_queue *q; | 2108 | struct request_queue *q; |
2109 | int cpu = (unsigned long)hcpu; | ||
2110 | /* | ||
2111 | * New online cpumask which is going to be set in this hotplug event. | ||
2112 | * Declare this cpumasks as global as cpu-hotplug operation is invoked | ||
2113 | * one-by-one and dynamically allocating this could result in a failure. | ||
2114 | */ | ||
2115 | static struct cpumask online_new; | ||
2100 | 2116 | ||
2101 | /* | 2117 | /* |
2102 | * Before new mappings are established, hotadded cpu might already | 2118 | * Before hotadded cpu starts handling requests, new mappings must |
2103 | * start handling requests. This doesn't break anything as we map | 2119 | * be established. Otherwise, these requests in hw queue might |
2104 | * offline CPUs to first hardware queue. We will re-init the queue | 2120 | * never be dispatched. |
2105 | * below to get optimal settings. | 2121 | * |
2122 | * For example, there is a single hw queue (hctx) and two CPU queues | ||
2123 | * (ctx0 for CPU0, and ctx1 for CPU1). | ||
2124 | * | ||
2125 | * Now CPU1 is just onlined and a request is inserted into | ||
2126 | * ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is | ||
2127 | * still zero. | ||
2128 | * | ||
2129 | * And then while running hw queue, flush_busy_ctxs() finds bit0 is | ||
2130 | * set in pending bitmap and tries to retrieve requests in | ||
2131 | * hctx->ctxs[0]->rq_list. But htx->ctxs[0] is a pointer to ctx0, | ||
2132 | * so the request in ctx1->rq_list is ignored. | ||
2106 | */ | 2133 | */ |
2107 | if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && | 2134 | switch (action & ~CPU_TASKS_FROZEN) { |
2108 | action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) | 2135 | case CPU_DEAD: |
2136 | case CPU_UP_CANCELED: | ||
2137 | cpumask_copy(&online_new, cpu_online_mask); | ||
2138 | break; | ||
2139 | case CPU_UP_PREPARE: | ||
2140 | cpumask_copy(&online_new, cpu_online_mask); | ||
2141 | cpumask_set_cpu(cpu, &online_new); | ||
2142 | break; | ||
2143 | default: | ||
2109 | return NOTIFY_OK; | 2144 | return NOTIFY_OK; |
2145 | } | ||
2110 | 2146 | ||
2111 | mutex_lock(&all_q_mutex); | 2147 | mutex_lock(&all_q_mutex); |
2112 | 2148 | ||
@@ -2130,7 +2166,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, | |||
2130 | } | 2166 | } |
2131 | 2167 | ||
2132 | list_for_each_entry(q, &all_q_list, all_q_node) | 2168 | list_for_each_entry(q, &all_q_list, all_q_node) |
2133 | blk_mq_queue_reinit(q); | 2169 | blk_mq_queue_reinit(q, &online_new); |
2134 | 2170 | ||
2135 | list_for_each_entry(q, &all_q_list, all_q_node) | 2171 | list_for_each_entry(q, &all_q_list, all_q_node) |
2136 | blk_mq_unfreeze_queue(q); | 2172 | blk_mq_unfreeze_queue(q); |