aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c118
1 files changed, 77 insertions, 41 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f2d67b4047a0..7785ae96267a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -393,14 +393,16 @@ void __blk_mq_complete_request(struct request *rq)
393 * Ends all I/O on a request. It does not handle partial completions. 393 * Ends all I/O on a request. It does not handle partial completions.
394 * The actual completion happens out-of-order, through a IPI handler. 394 * The actual completion happens out-of-order, through a IPI handler.
395 **/ 395 **/
396void blk_mq_complete_request(struct request *rq) 396void blk_mq_complete_request(struct request *rq, int error)
397{ 397{
398 struct request_queue *q = rq->q; 398 struct request_queue *q = rq->q;
399 399
400 if (unlikely(blk_should_fake_timeout(q))) 400 if (unlikely(blk_should_fake_timeout(q)))
401 return; 401 return;
402 if (!blk_mark_rq_complete(rq)) 402 if (!blk_mark_rq_complete(rq)) {
403 rq->errors = error;
403 __blk_mq_complete_request(rq); 404 __blk_mq_complete_request(rq);
405 }
404} 406}
405EXPORT_SYMBOL(blk_mq_complete_request); 407EXPORT_SYMBOL(blk_mq_complete_request);
406 408
@@ -616,10 +618,8 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
616 * If a request wasn't started before the queue was 618 * If a request wasn't started before the queue was
617 * marked dying, kill it here or it'll go unnoticed. 619 * marked dying, kill it here or it'll go unnoticed.
618 */ 620 */
619 if (unlikely(blk_queue_dying(rq->q))) { 621 if (unlikely(blk_queue_dying(rq->q)))
620 rq->errors = -EIO; 622 blk_mq_complete_request(rq, -EIO);
621 blk_mq_complete_request(rq);
622 }
623 return; 623 return;
624 } 624 }
625 if (rq->cmd_flags & REQ_NO_TIMEOUT) 625 if (rq->cmd_flags & REQ_NO_TIMEOUT)
@@ -641,24 +641,16 @@ static void blk_mq_rq_timer(unsigned long priv)
641 .next = 0, 641 .next = 0,
642 .next_set = 0, 642 .next_set = 0,
643 }; 643 };
644 struct blk_mq_hw_ctx *hctx;
645 int i; 644 int i;
646 645
647 queue_for_each_hw_ctx(q, hctx, i) { 646 blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
648 /*
649 * If not software queues are currently mapped to this
650 * hardware queue, there's nothing to check
651 */
652 if (!blk_mq_hw_queue_mapped(hctx))
653 continue;
654
655 blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
656 }
657 647
658 if (data.next_set) { 648 if (data.next_set) {
659 data.next = blk_rq_timeout(round_jiffies_up(data.next)); 649 data.next = blk_rq_timeout(round_jiffies_up(data.next));
660 mod_timer(&q->timeout, data.next); 650 mod_timer(&q->timeout, data.next);
661 } else { 651 } else {
652 struct blk_mq_hw_ctx *hctx;
653
662 queue_for_each_hw_ctx(q, hctx, i) { 654 queue_for_each_hw_ctx(q, hctx, i) {
663 /* the hctx may be unmapped, so check it here */ 655 /* the hctx may be unmapped, so check it here */
664 if (blk_mq_hw_queue_mapped(hctx)) 656 if (blk_mq_hw_queue_mapped(hctx))
@@ -1789,13 +1781,19 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
1789 } 1781 }
1790} 1782}
1791 1783
1792static void blk_mq_map_swqueue(struct request_queue *q) 1784static void blk_mq_map_swqueue(struct request_queue *q,
1785 const struct cpumask *online_mask)
1793{ 1786{
1794 unsigned int i; 1787 unsigned int i;
1795 struct blk_mq_hw_ctx *hctx; 1788 struct blk_mq_hw_ctx *hctx;
1796 struct blk_mq_ctx *ctx; 1789 struct blk_mq_ctx *ctx;
1797 struct blk_mq_tag_set *set = q->tag_set; 1790 struct blk_mq_tag_set *set = q->tag_set;
1798 1791
1792 /*
1793 * Avoid others reading imcomplete hctx->cpumask through sysfs
1794 */
1795 mutex_lock(&q->sysfs_lock);
1796
1799 queue_for_each_hw_ctx(q, hctx, i) { 1797 queue_for_each_hw_ctx(q, hctx, i) {
1800 cpumask_clear(hctx->cpumask); 1798 cpumask_clear(hctx->cpumask);
1801 hctx->nr_ctx = 0; 1799 hctx->nr_ctx = 0;
@@ -1806,16 +1804,17 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1806 */ 1804 */
1807 queue_for_each_ctx(q, ctx, i) { 1805 queue_for_each_ctx(q, ctx, i) {
1808 /* If the cpu isn't online, the cpu is mapped to first hctx */ 1806 /* If the cpu isn't online, the cpu is mapped to first hctx */
1809 if (!cpu_online(i)) 1807 if (!cpumask_test_cpu(i, online_mask))
1810 continue; 1808 continue;
1811 1809
1812 hctx = q->mq_ops->map_queue(q, i); 1810 hctx = q->mq_ops->map_queue(q, i);
1813 cpumask_set_cpu(i, hctx->cpumask); 1811 cpumask_set_cpu(i, hctx->cpumask);
1814 cpumask_set_cpu(i, hctx->tags->cpumask);
1815 ctx->index_hw = hctx->nr_ctx; 1812 ctx->index_hw = hctx->nr_ctx;
1816 hctx->ctxs[hctx->nr_ctx++] = ctx; 1813 hctx->ctxs[hctx->nr_ctx++] = ctx;
1817 } 1814 }
1818 1815
1816 mutex_unlock(&q->sysfs_lock);
1817
1819 queue_for_each_hw_ctx(q, hctx, i) { 1818 queue_for_each_hw_ctx(q, hctx, i) {
1820 struct blk_mq_ctxmap *map = &hctx->ctx_map; 1819 struct blk_mq_ctxmap *map = &hctx->ctx_map;
1821 1820
@@ -1851,6 +1850,14 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1851 hctx->next_cpu = cpumask_first(hctx->cpumask); 1850 hctx->next_cpu = cpumask_first(hctx->cpumask);
1852 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; 1851 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
1853 } 1852 }
1853
1854 queue_for_each_ctx(q, ctx, i) {
1855 if (!cpumask_test_cpu(i, online_mask))
1856 continue;
1857
1858 hctx = q->mq_ops->map_queue(q, i);
1859 cpumask_set_cpu(i, hctx->tags->cpumask);
1860 }
1854} 1861}
1855 1862
1856static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) 1863static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set)
@@ -1918,6 +1925,9 @@ void blk_mq_release(struct request_queue *q)
1918 kfree(hctx); 1925 kfree(hctx);
1919 } 1926 }
1920 1927
1928 kfree(q->mq_map);
1929 q->mq_map = NULL;
1930
1921 kfree(q->queue_hw_ctx); 1931 kfree(q->queue_hw_ctx);
1922 1932
1923 /* ctx kobj stays in queue_ctx */ 1933 /* ctx kobj stays in queue_ctx */
@@ -2027,13 +2037,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
2027 if (blk_mq_init_hw_queues(q, set)) 2037 if (blk_mq_init_hw_queues(q, set))
2028 goto err_hctxs; 2038 goto err_hctxs;
2029 2039
2040 get_online_cpus();
2030 mutex_lock(&all_q_mutex); 2041 mutex_lock(&all_q_mutex);
2031 list_add_tail(&q->all_q_node, &all_q_list);
2032 mutex_unlock(&all_q_mutex);
2033 2042
2043 list_add_tail(&q->all_q_node, &all_q_list);
2034 blk_mq_add_queue_tag_set(set, q); 2044 blk_mq_add_queue_tag_set(set, q);
2045 blk_mq_map_swqueue(q, cpu_online_mask);
2035 2046
2036 blk_mq_map_swqueue(q); 2047 mutex_unlock(&all_q_mutex);
2048 put_online_cpus();
2037 2049
2038 return q; 2050 return q;
2039 2051
@@ -2057,30 +2069,27 @@ void blk_mq_free_queue(struct request_queue *q)
2057{ 2069{
2058 struct blk_mq_tag_set *set = q->tag_set; 2070 struct blk_mq_tag_set *set = q->tag_set;
2059 2071
2072 mutex_lock(&all_q_mutex);
2073 list_del_init(&q->all_q_node);
2074 mutex_unlock(&all_q_mutex);
2075
2060 blk_mq_del_queue_tag_set(q); 2076 blk_mq_del_queue_tag_set(q);
2061 2077
2062 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); 2078 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
2063 blk_mq_free_hw_queues(q, set); 2079 blk_mq_free_hw_queues(q, set);
2064 2080
2065 percpu_ref_exit(&q->mq_usage_counter); 2081 percpu_ref_exit(&q->mq_usage_counter);
2066
2067 kfree(q->mq_map);
2068
2069 q->mq_map = NULL;
2070
2071 mutex_lock(&all_q_mutex);
2072 list_del_init(&q->all_q_node);
2073 mutex_unlock(&all_q_mutex);
2074} 2082}
2075 2083
2076/* Basically redo blk_mq_init_queue with queue frozen */ 2084/* Basically redo blk_mq_init_queue with queue frozen */
2077static void blk_mq_queue_reinit(struct request_queue *q) 2085static void blk_mq_queue_reinit(struct request_queue *q,
2086 const struct cpumask *online_mask)
2078{ 2087{
2079 WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth)); 2088 WARN_ON_ONCE(!atomic_read(&q->mq_freeze_depth));
2080 2089
2081 blk_mq_sysfs_unregister(q); 2090 blk_mq_sysfs_unregister(q);
2082 2091
2083 blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues); 2092 blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask);
2084 2093
2085 /* 2094 /*
2086 * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe 2095 * redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
@@ -2088,7 +2097,7 @@ static void blk_mq_queue_reinit(struct request_queue *q)
2088 * involves free and re-allocate memory, worthy doing?) 2097 * involves free and re-allocate memory, worthy doing?)
2089 */ 2098 */
2090 2099
2091 blk_mq_map_swqueue(q); 2100 blk_mq_map_swqueue(q, online_mask);
2092 2101
2093 blk_mq_sysfs_register(q); 2102 blk_mq_sysfs_register(q);
2094} 2103}
@@ -2097,16 +2106,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
2097 unsigned long action, void *hcpu) 2106 unsigned long action, void *hcpu)
2098{ 2107{
2099 struct request_queue *q; 2108 struct request_queue *q;
2109 int cpu = (unsigned long)hcpu;
2110 /*
2111 * New online cpumask which is going to be set in this hotplug event.
2112 * Declare this cpumasks as global as cpu-hotplug operation is invoked
2113 * one-by-one and dynamically allocating this could result in a failure.
2114 */
2115 static struct cpumask online_new;
2100 2116
2101 /* 2117 /*
2102 * Before new mappings are established, hotadded cpu might already 2118 * Before hotadded cpu starts handling requests, new mappings must
2103 * start handling requests. This doesn't break anything as we map 2119 * be established. Otherwise, these requests in hw queue might
2104 * offline CPUs to first hardware queue. We will re-init the queue 2120 * never be dispatched.
2105 * below to get optimal settings. 2121 *
2122 * For example, there is a single hw queue (hctx) and two CPU queues
2123 * (ctx0 for CPU0, and ctx1 for CPU1).
2124 *
2125 * Now CPU1 is just onlined and a request is inserted into
2126 * ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is
2127 * still zero.
2128 *
2129 * And then while running hw queue, flush_busy_ctxs() finds bit0 is
2130 * set in pending bitmap and tries to retrieve requests in
2131 * hctx->ctxs[0]->rq_list. But htx->ctxs[0] is a pointer to ctx0,
2132 * so the request in ctx1->rq_list is ignored.
2106 */ 2133 */
2107 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN && 2134 switch (action & ~CPU_TASKS_FROZEN) {
2108 action != CPU_ONLINE && action != CPU_ONLINE_FROZEN) 2135 case CPU_DEAD:
2136 case CPU_UP_CANCELED:
2137 cpumask_copy(&online_new, cpu_online_mask);
2138 break;
2139 case CPU_UP_PREPARE:
2140 cpumask_copy(&online_new, cpu_online_mask);
2141 cpumask_set_cpu(cpu, &online_new);
2142 break;
2143 default:
2109 return NOTIFY_OK; 2144 return NOTIFY_OK;
2145 }
2110 2146
2111 mutex_lock(&all_q_mutex); 2147 mutex_lock(&all_q_mutex);
2112 2148
@@ -2130,7 +2166,7 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
2130 } 2166 }
2131 2167
2132 list_for_each_entry(q, &all_q_list, all_q_node) 2168 list_for_each_entry(q, &all_q_list, all_q_node)
2133 blk_mq_queue_reinit(q); 2169 blk_mq_queue_reinit(q, &online_new);
2134 2170
2135 list_for_each_entry(q, &all_q_list, all_q_node) 2171 list_for_each_entry(q, &all_q_list, all_q_node)
2136 blk_mq_unfreeze_queue(q); 2172 blk_mq_unfreeze_queue(q);