aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c20
-rw-r--r--block/blk-exec.c2
-rw-r--r--block/blk-flush.c105
-rw-r--r--block/blk-lib.c8
-rw-r--r--block/blk-merge.c91
-rw-r--r--block/blk-mq-cpu.c14
-rw-r--r--block/blk-mq-tag.c2
-rw-r--r--block/blk-mq.c237
-rw-r--r--block/blk-mq.h5
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/blk-timeout.c2
-rw-r--r--block/blk.h2
12 files changed, 243 insertions, 247 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index c00e0bdeab4a..853f92749202 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
693 if (!uninit_q) 693 if (!uninit_q)
694 return NULL; 694 return NULL;
695 695
696 uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
697 if (!uninit_q->flush_rq)
698 goto out_cleanup_queue;
699
696 q = blk_init_allocated_queue(uninit_q, rfn, lock); 700 q = blk_init_allocated_queue(uninit_q, rfn, lock);
697 if (!q) 701 if (!q)
698 blk_cleanup_queue(uninit_q); 702 goto out_free_flush_rq;
699
700 return q; 703 return q;
704
705out_free_flush_rq:
706 kfree(uninit_q->flush_rq);
707out_cleanup_queue:
708 blk_cleanup_queue(uninit_q);
709 return NULL;
701} 710}
702EXPORT_SYMBOL(blk_init_queue_node); 711EXPORT_SYMBOL(blk_init_queue_node);
703 712
@@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
1127struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 1136struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
1128{ 1137{
1129 if (q->mq_ops) 1138 if (q->mq_ops)
1130 return blk_mq_alloc_request(q, rw, gfp_mask, false); 1139 return blk_mq_alloc_request(q, rw, gfp_mask);
1131 else 1140 else
1132 return blk_old_get_request(q, rw, gfp_mask); 1141 return blk_old_get_request(q, rw, gfp_mask);
1133} 1142}
@@ -1278,6 +1287,11 @@ void __blk_put_request(struct request_queue *q, struct request *req)
1278 if (unlikely(!q)) 1287 if (unlikely(!q))
1279 return; 1288 return;
1280 1289
1290 if (q->mq_ops) {
1291 blk_mq_free_request(req);
1292 return;
1293 }
1294
1281 blk_pm_put_request(req); 1295 blk_pm_put_request(req);
1282 1296
1283 elv_completed_request(q, req); 1297 elv_completed_request(q, req);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index bbfc072a79c2..dbf4502b1d67 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
65 * be resued after dying flag is set 65 * be resued after dying flag is set
66 */ 66 */
67 if (q->mq_ops) { 67 if (q->mq_ops) {
68 blk_mq_insert_request(q, rq, true); 68 blk_mq_insert_request(rq, at_head, true, false);
69 return; 69 return;
70 } 70 }
71 71
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 9288aaf35c21..f598f794c3c6 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq)
130 blk_clear_rq_complete(rq); 130 blk_clear_rq_complete(rq);
131} 131}
132 132
133static void mq_flush_data_run(struct work_struct *work) 133static void mq_flush_run(struct work_struct *work)
134{ 134{
135 struct request *rq; 135 struct request *rq;
136 136
137 rq = container_of(work, struct request, mq_flush_data); 137 rq = container_of(work, struct request, mq_flush_work);
138 138
139 memset(&rq->csd, 0, sizeof(rq->csd)); 139 memset(&rq->csd, 0, sizeof(rq->csd));
140 blk_mq_run_request(rq, true, false); 140 blk_mq_insert_request(rq, false, true, false);
141} 141}
142 142
143static void blk_mq_flush_data_insert(struct request *rq) 143static bool blk_flush_queue_rq(struct request *rq)
144{ 144{
145 INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); 145 if (rq->q->mq_ops) {
146 kblockd_schedule_work(rq->q, &rq->mq_flush_data); 146 INIT_WORK(&rq->mq_flush_work, mq_flush_run);
147 kblockd_schedule_work(rq->q, &rq->mq_flush_work);
148 return false;
149 } else {
150 list_add_tail(&rq->queuelist, &rq->q->queue_head);
151 return true;
152 }
147} 153}
148 154
149/** 155/**
@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
187 193
188 case REQ_FSEQ_DATA: 194 case REQ_FSEQ_DATA:
189 list_move_tail(&rq->flush.list, &q->flush_data_in_flight); 195 list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
190 if (q->mq_ops) 196 queued = blk_flush_queue_rq(rq);
191 blk_mq_flush_data_insert(rq);
192 else {
193 list_add(&rq->queuelist, &q->queue_head);
194 queued = true;
195 }
196 break; 197 break;
197 198
198 case REQ_FSEQ_DONE: 199 case REQ_FSEQ_DONE:
@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
216 } 217 }
217 218
218 kicked = blk_kick_flush(q); 219 kicked = blk_kick_flush(q);
219 /* blk_mq_run_flush will run queue */
220 if (q->mq_ops)
221 return queued;
222 return kicked | queued; 220 return kicked | queued;
223} 221}
224 222
@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error)
230 struct request *rq, *n; 228 struct request *rq, *n;
231 unsigned long flags = 0; 229 unsigned long flags = 0;
232 230
233 if (q->mq_ops) { 231 if (q->mq_ops)
234 blk_mq_free_request(flush_rq);
235 spin_lock_irqsave(&q->mq_flush_lock, flags); 232 spin_lock_irqsave(&q->mq_flush_lock, flags);
236 } 233
237 running = &q->flush_queue[q->flush_running_idx]; 234 running = &q->flush_queue[q->flush_running_idx];
238 BUG_ON(q->flush_pending_idx == q->flush_running_idx); 235 BUG_ON(q->flush_pending_idx == q->flush_running_idx);
239 236
@@ -263,49 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error)
263 * kblockd. 260 * kblockd.
264 */ 261 */
265 if (queued || q->flush_queue_delayed) { 262 if (queued || q->flush_queue_delayed) {
266 if (!q->mq_ops) 263 WARN_ON(q->mq_ops);
267 blk_run_queue_async(q); 264 blk_run_queue_async(q);
268 else
269 /*
270 * This can be optimized to only run queues with requests
271 * queued if necessary.
272 */
273 blk_mq_run_queues(q, true);
274 } 265 }
275 q->flush_queue_delayed = 0; 266 q->flush_queue_delayed = 0;
276 if (q->mq_ops) 267 if (q->mq_ops)
277 spin_unlock_irqrestore(&q->mq_flush_lock, flags); 268 spin_unlock_irqrestore(&q->mq_flush_lock, flags);
278} 269}
279 270
280static void mq_flush_work(struct work_struct *work)
281{
282 struct request_queue *q;
283 struct request *rq;
284
285 q = container_of(work, struct request_queue, mq_flush_work);
286
287 /* We don't need set REQ_FLUSH_SEQ, it's for consistency */
288 rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
289 __GFP_WAIT|GFP_ATOMIC, true);
290 rq->cmd_type = REQ_TYPE_FS;
291 rq->end_io = flush_end_io;
292
293 blk_mq_run_request(rq, true, false);
294}
295
296/*
297 * We can't directly use q->flush_rq, because it doesn't have tag and is not in
298 * hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
299 * so offload the work to workqueue.
300 *
301 * Note: we assume a flush request finished in any hardware queue will flush
302 * the whole disk cache.
303 */
304static void mq_run_flush(struct request_queue *q)
305{
306 kblockd_schedule_work(q, &q->mq_flush_work);
307}
308
309/** 271/**
310 * blk_kick_flush - consider issuing flush request 272 * blk_kick_flush - consider issuing flush request
311 * @q: request_queue being kicked 273 * @q: request_queue being kicked
@@ -340,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q)
340 * different from running_idx, which means flush is in flight. 302 * different from running_idx, which means flush is in flight.
341 */ 303 */
342 q->flush_pending_idx ^= 1; 304 q->flush_pending_idx ^= 1;
305
343 if (q->mq_ops) { 306 if (q->mq_ops) {
344 mq_run_flush(q); 307 struct blk_mq_ctx *ctx = first_rq->mq_ctx;
345 return true; 308 struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
309
310 blk_mq_rq_init(hctx, q->flush_rq);
311 q->flush_rq->mq_ctx = ctx;
312
313 /*
314 * Reuse the tag value from the fist waiting request,
315 * with blk-mq the tag is generated during request
316 * allocation and drivers can rely on it being inside
317 * the range they asked for.
318 */
319 q->flush_rq->tag = first_rq->tag;
320 } else {
321 blk_rq_init(q, q->flush_rq);
346 } 322 }
347 323
348 blk_rq_init(q, &q->flush_rq); 324 q->flush_rq->cmd_type = REQ_TYPE_FS;
349 q->flush_rq.cmd_type = REQ_TYPE_FS; 325 q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
350 q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; 326 q->flush_rq->rq_disk = first_rq->rq_disk;
351 q->flush_rq.rq_disk = first_rq->rq_disk; 327 q->flush_rq->end_io = flush_end_io;
352 q->flush_rq.end_io = flush_end_io;
353 328
354 list_add_tail(&q->flush_rq.queuelist, &q->queue_head); 329 return blk_flush_queue_rq(q->flush_rq);
355 return true;
356} 330}
357 331
358static void flush_data_end_io(struct request *rq, int error) 332static void flush_data_end_io(struct request *rq, int error)
@@ -437,7 +411,7 @@ void blk_insert_flush(struct request *rq)
437 if ((policy & REQ_FSEQ_DATA) && 411 if ((policy & REQ_FSEQ_DATA) &&
438 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { 412 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
439 if (q->mq_ops) { 413 if (q->mq_ops) {
440 blk_mq_run_request(rq, false, true); 414 blk_mq_insert_request(rq, false, false, true);
441 } else 415 } else
442 list_add_tail(&rq->queuelist, &q->queue_head); 416 list_add_tail(&rq->queuelist, &q->queue_head);
443 return; 417 return;
@@ -558,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush);
558void blk_mq_init_flush(struct request_queue *q) 532void blk_mq_init_flush(struct request_queue *q)
559{ 533{
560 spin_lock_init(&q->mq_flush_lock); 534 spin_lock_init(&q->mq_flush_lock);
561 INIT_WORK(&q->mq_flush_work, mq_flush_work);
562} 535}
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 2da76c999ef3..97a733cf3d5f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -119,6 +119,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
119 119
120 atomic_inc(&bb.done); 120 atomic_inc(&bb.done);
121 submit_bio(type, bio); 121 submit_bio(type, bio);
122
123 /*
124 * We can loop for a long time in here, if someone does
125 * full device discards (like mkfs). Be nice and allow
126 * us to schedule out to avoid softlocking if preempt
127 * is disabled.
128 */
129 cond_resched();
122 } 130 }
123 blk_finish_plug(&plug); 131 blk_finish_plug(&plug);
124 132
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 8f8adaa95466..6c583f9c5b65 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -21,6 +21,16 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
21 if (!bio) 21 if (!bio)
22 return 0; 22 return 0;
23 23
24 /*
25 * This should probably be returning 0, but blk_add_request_payload()
26 * (Christoph!!!!)
27 */
28 if (bio->bi_rw & REQ_DISCARD)
29 return 1;
30
31 if (bio->bi_rw & REQ_WRITE_SAME)
32 return 1;
33
24 fbio = bio; 34 fbio = bio;
25 cluster = blk_queue_cluster(q); 35 cluster = blk_queue_cluster(q);
26 seg_size = 0; 36 seg_size = 0;
@@ -161,30 +171,60 @@ new_segment:
161 *bvprv = *bvec; 171 *bvprv = *bvec;
162} 172}
163 173
164/* 174static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
165 * map a request to scatterlist, return number of sg entries setup. Caller 175 struct scatterlist *sglist,
166 * must make sure sg can hold rq->nr_phys_segments entries 176 struct scatterlist **sg)
167 */
168int blk_rq_map_sg(struct request_queue *q, struct request *rq,
169 struct scatterlist *sglist)
170{ 177{
171 struct bio_vec bvec, bvprv = { NULL }; 178 struct bio_vec bvec, bvprv = { NULL };
172 struct req_iterator iter; 179 struct bvec_iter iter;
173 struct scatterlist *sg;
174 int nsegs, cluster; 180 int nsegs, cluster;
175 181
176 nsegs = 0; 182 nsegs = 0;
177 cluster = blk_queue_cluster(q); 183 cluster = blk_queue_cluster(q);
178 184
179 /* 185 if (bio->bi_rw & REQ_DISCARD) {
180 * for each bio in rq 186 /*
181 */ 187 * This is a hack - drivers should be neither modifying the
182 sg = NULL; 188 * biovec, nor relying on bi_vcnt - but because of
183 rq_for_each_segment(bvec, rq, iter) { 189 * blk_add_request_payload(), a discard bio may or may not have
184 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, 190 * a payload we need to set up here (thank you Christoph) and
185 &nsegs, &cluster); 191 * bi_vcnt is really the only way of telling if we need to.
186 } /* segments in rq */ 192 */
193
194 if (bio->bi_vcnt)
195 goto single_segment;
196
197 return 0;
198 }
199
200 if (bio->bi_rw & REQ_WRITE_SAME) {
201single_segment:
202 *sg = sglist;
203 bvec = bio_iovec(bio);
204 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
205 return 1;
206 }
207
208 for_each_bio(bio)
209 bio_for_each_segment(bvec, bio, iter)
210 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg,
211 &nsegs, &cluster);
187 212
213 return nsegs;
214}
215
216/*
217 * map a request to scatterlist, return number of sg entries setup. Caller
218 * must make sure sg can hold rq->nr_phys_segments entries
219 */
220int blk_rq_map_sg(struct request_queue *q, struct request *rq,
221 struct scatterlist *sglist)
222{
223 struct scatterlist *sg = NULL;
224 int nsegs = 0;
225
226 if (rq->bio)
227 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
188 228
189 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 229 if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
190 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 230 (blk_rq_bytes(rq) & q->dma_pad_mask)) {
@@ -230,20 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg);
230int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 270int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
231 struct scatterlist *sglist) 271 struct scatterlist *sglist)
232{ 272{
233 struct bio_vec bvec, bvprv = { NULL }; 273 struct scatterlist *sg = NULL;
234 struct scatterlist *sg; 274 int nsegs;
235 int nsegs, cluster; 275 struct bio *next = bio->bi_next;
236 struct bvec_iter iter; 276 bio->bi_next = NULL;
237
238 nsegs = 0;
239 cluster = blk_queue_cluster(q);
240
241 sg = NULL;
242 bio_for_each_segment(bvec, bio, iter) {
243 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
244 &nsegs, &cluster);
245 } /* segments in bio */
246 277
278 nsegs = __blk_bios_map_sg(q, bio, sglist, &sg);
279 bio->bi_next = next;
247 if (sg) 280 if (sg)
248 sg_mark_end(sg); 281 sg_mark_end(sg);
249 282
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
index 3146befb56aa..136ef8643bba 100644
--- a/block/blk-mq-cpu.c
+++ b/block/blk-mq-cpu.c
@@ -11,7 +11,7 @@
11#include "blk-mq.h" 11#include "blk-mq.h"
12 12
13static LIST_HEAD(blk_mq_cpu_notify_list); 13static LIST_HEAD(blk_mq_cpu_notify_list);
14static DEFINE_SPINLOCK(blk_mq_cpu_notify_lock); 14static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
15 15
16static int blk_mq_main_cpu_notify(struct notifier_block *self, 16static int blk_mq_main_cpu_notify(struct notifier_block *self,
17 unsigned long action, void *hcpu) 17 unsigned long action, void *hcpu)
@@ -19,12 +19,12 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
19 unsigned int cpu = (unsigned long) hcpu; 19 unsigned int cpu = (unsigned long) hcpu;
20 struct blk_mq_cpu_notifier *notify; 20 struct blk_mq_cpu_notifier *notify;
21 21
22 spin_lock(&blk_mq_cpu_notify_lock); 22 raw_spin_lock(&blk_mq_cpu_notify_lock);
23 23
24 list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) 24 list_for_each_entry(notify, &blk_mq_cpu_notify_list, list)
25 notify->notify(notify->data, action, cpu); 25 notify->notify(notify->data, action, cpu);
26 26
27 spin_unlock(&blk_mq_cpu_notify_lock); 27 raw_spin_unlock(&blk_mq_cpu_notify_lock);
28 return NOTIFY_OK; 28 return NOTIFY_OK;
29} 29}
30 30
@@ -32,16 +32,16 @@ void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
32{ 32{
33 BUG_ON(!notifier->notify); 33 BUG_ON(!notifier->notify);
34 34
35 spin_lock(&blk_mq_cpu_notify_lock); 35 raw_spin_lock(&blk_mq_cpu_notify_lock);
36 list_add_tail(&notifier->list, &blk_mq_cpu_notify_list); 36 list_add_tail(&notifier->list, &blk_mq_cpu_notify_list);
37 spin_unlock(&blk_mq_cpu_notify_lock); 37 raw_spin_unlock(&blk_mq_cpu_notify_lock);
38} 38}
39 39
40void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) 40void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
41{ 41{
42 spin_lock(&blk_mq_cpu_notify_lock); 42 raw_spin_lock(&blk_mq_cpu_notify_lock);
43 list_del(&notifier->list); 43 list_del(&notifier->list);
44 spin_unlock(&blk_mq_cpu_notify_lock); 44 raw_spin_unlock(&blk_mq_cpu_notify_lock);
45} 45}
46 46
47void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, 47void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 5d70edc9855f..83ae96c51a27 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -184,7 +184,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
184ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) 184ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
185{ 185{
186 char *orig_page = page; 186 char *orig_page = page;
187 int cpu; 187 unsigned int cpu;
188 188
189 if (!tags) 189 if (!tags)
190 return 0; 190 return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 57039fcd9c93..883f72089015 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -73,8 +73,8 @@ static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
73 set_bit(ctx->index_hw, hctx->ctx_map); 73 set_bit(ctx->index_hw, hctx->ctx_map);
74} 74}
75 75
76static struct request *blk_mq_alloc_rq(struct blk_mq_hw_ctx *hctx, gfp_t gfp, 76static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
77 bool reserved) 77 gfp_t gfp, bool reserved)
78{ 78{
79 struct request *rq; 79 struct request *rq;
80 unsigned int tag; 80 unsigned int tag;
@@ -193,12 +193,6 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
193 ctx->rq_dispatched[rw_is_sync(rw_flags)]++; 193 ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
194} 194}
195 195
196static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
197 gfp_t gfp, bool reserved)
198{
199 return blk_mq_alloc_rq(hctx, gfp, reserved);
200}
201
202static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, 196static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
203 int rw, gfp_t gfp, 197 int rw, gfp_t gfp,
204 bool reserved) 198 bool reserved)
@@ -226,15 +220,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
226 return rq; 220 return rq;
227} 221}
228 222
229struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 223struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
230 gfp_t gfp, bool reserved)
231{ 224{
232 struct request *rq; 225 struct request *rq;
233 226
234 if (blk_mq_queue_enter(q)) 227 if (blk_mq_queue_enter(q))
235 return NULL; 228 return NULL;
236 229
237 rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); 230 rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
238 if (rq) 231 if (rq)
239 blk_mq_put_ctx(rq->mq_ctx); 232 blk_mq_put_ctx(rq->mq_ctx);
240 return rq; 233 return rq;
@@ -258,7 +251,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
258/* 251/*
259 * Re-init and set pdu, if we have it 252 * Re-init and set pdu, if we have it
260 */ 253 */
261static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) 254void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq)
262{ 255{
263 blk_rq_init(hctx->queue, rq); 256 blk_rq_init(hctx->queue, rq);
264 257
@@ -290,38 +283,10 @@ void blk_mq_free_request(struct request *rq)
290 __blk_mq_free_request(hctx, ctx, rq); 283 __blk_mq_free_request(hctx, ctx, rq);
291} 284}
292 285
293static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) 286bool blk_mq_end_io_partial(struct request *rq, int error, unsigned int nr_bytes)
294{
295 if (error)
296 clear_bit(BIO_UPTODATE, &bio->bi_flags);
297 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
298 error = -EIO;
299
300 if (unlikely(rq->cmd_flags & REQ_QUIET))
301 set_bit(BIO_QUIET, &bio->bi_flags);
302
303 /* don't actually finish bio if it's part of flush sequence */
304 if (!(rq->cmd_flags & REQ_FLUSH_SEQ))
305 bio_endio(bio, error);
306}
307
308void blk_mq_complete_request(struct request *rq, int error)
309{ 287{
310 struct bio *bio = rq->bio; 288 if (blk_update_request(rq, error, blk_rq_bytes(rq)))
311 unsigned int bytes = 0; 289 return true;
312
313 trace_block_rq_complete(rq->q, rq);
314
315 while (bio) {
316 struct bio *next = bio->bi_next;
317
318 bio->bi_next = NULL;
319 bytes += bio->bi_iter.bi_size;
320 blk_mq_bio_endio(rq, bio, error);
321 bio = next;
322 }
323
324 blk_account_io_completion(rq, bytes);
325 290
326 blk_account_io_done(rq); 291 blk_account_io_done(rq);
327 292
@@ -329,49 +294,57 @@ void blk_mq_complete_request(struct request *rq, int error)
329 rq->end_io(rq, error); 294 rq->end_io(rq, error);
330 else 295 else
331 blk_mq_free_request(rq); 296 blk_mq_free_request(rq);
297 return false;
332} 298}
299EXPORT_SYMBOL(blk_mq_end_io_partial);
333 300
334void __blk_mq_end_io(struct request *rq, int error) 301static void __blk_mq_complete_request_remote(void *data)
335{
336 if (!blk_mark_rq_complete(rq))
337 blk_mq_complete_request(rq, error);
338}
339
340static void blk_mq_end_io_remote(void *data)
341{ 302{
342 struct request *rq = data; 303 struct request *rq = data;
343 304
344 __blk_mq_end_io(rq, rq->errors); 305 rq->q->softirq_done_fn(rq);
345} 306}
346 307
347/* 308void __blk_mq_complete_request(struct request *rq)
348 * End IO on this request on a multiqueue enabled driver. We'll either do
349 * it directly inline, or punt to a local IPI handler on the matching
350 * remote CPU.
351 */
352void blk_mq_end_io(struct request *rq, int error)
353{ 309{
354 struct blk_mq_ctx *ctx = rq->mq_ctx; 310 struct blk_mq_ctx *ctx = rq->mq_ctx;
355 int cpu; 311 int cpu;
356 312
357 if (!ctx->ipi_redirect) 313 if (!ctx->ipi_redirect) {
358 return __blk_mq_end_io(rq, error); 314 rq->q->softirq_done_fn(rq);
315 return;
316 }
359 317
360 cpu = get_cpu(); 318 cpu = get_cpu();
361 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { 319 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
362 rq->errors = error; 320 rq->csd.func = __blk_mq_complete_request_remote;
363 rq->csd.func = blk_mq_end_io_remote;
364 rq->csd.info = rq; 321 rq->csd.info = rq;
365 rq->csd.flags = 0; 322 rq->csd.flags = 0;
366 __smp_call_function_single(ctx->cpu, &rq->csd, 0); 323 __smp_call_function_single(ctx->cpu, &rq->csd, 0);
367 } else { 324 } else {
368 __blk_mq_end_io(rq, error); 325 rq->q->softirq_done_fn(rq);
369 } 326 }
370 put_cpu(); 327 put_cpu();
371} 328}
372EXPORT_SYMBOL(blk_mq_end_io);
373 329
374static void blk_mq_start_request(struct request *rq) 330/**
331 * blk_mq_complete_request - end I/O on a request
332 * @rq: the request being processed
333 *
334 * Description:
335 * Ends all I/O on a request. It does not handle partial completions.
336 * The actual completion happens out-of-order, through a IPI handler.
337 **/
338void blk_mq_complete_request(struct request *rq)
339{
340 if (unlikely(blk_should_fake_timeout(rq->q)))
341 return;
342 if (!blk_mark_rq_complete(rq))
343 __blk_mq_complete_request(rq);
344}
345EXPORT_SYMBOL(blk_mq_complete_request);
346
347static void blk_mq_start_request(struct request *rq, bool last)
375{ 348{
376 struct request_queue *q = rq->q; 349 struct request_queue *q = rq->q;
377 350
@@ -384,6 +357,25 @@ static void blk_mq_start_request(struct request *rq)
384 */ 357 */
385 rq->deadline = jiffies + q->rq_timeout; 358 rq->deadline = jiffies + q->rq_timeout;
386 set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 359 set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
360
361 if (q->dma_drain_size && blk_rq_bytes(rq)) {
362 /*
363 * Make sure space for the drain appears. We know we can do
364 * this because max_hw_segments has been adjusted to be one
365 * fewer than the device can handle.
366 */
367 rq->nr_phys_segments++;
368 }
369
370 /*
371 * Flag the last request in the series so that drivers know when IO
372 * should be kicked off, if they don't do it on a per-request basis.
373 *
374 * Note: the flag isn't the only condition drivers should do kick off.
375 * If drive is busy, the last request might not have the bit set.
376 */
377 if (last)
378 rq->cmd_flags |= REQ_END;
387} 379}
388 380
389static void blk_mq_requeue_request(struct request *rq) 381static void blk_mq_requeue_request(struct request *rq)
@@ -392,6 +384,11 @@ static void blk_mq_requeue_request(struct request *rq)
392 384
393 trace_block_rq_requeue(q, rq); 385 trace_block_rq_requeue(q, rq);
394 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 386 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
387
388 rq->cmd_flags &= ~REQ_END;
389
390 if (q->dma_drain_size && blk_rq_bytes(rq))
391 rq->nr_phys_segments--;
395} 392}
396 393
397struct blk_mq_timeout_data { 394struct blk_mq_timeout_data {
@@ -559,19 +556,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
559 556
560 rq = list_first_entry(&rq_list, struct request, queuelist); 557 rq = list_first_entry(&rq_list, struct request, queuelist);
561 list_del_init(&rq->queuelist); 558 list_del_init(&rq->queuelist);
562 blk_mq_start_request(rq);
563 559
564 /* 560 blk_mq_start_request(rq, list_empty(&rq_list));
565 * Last request in the series. Flag it as such, this
566 * enables drivers to know when IO should be kicked off,
567 * if they don't do it on a per-request basis.
568 *
569 * Note: the flag isn't the only condition drivers
570 * should do kick off. If drive is busy, the last
571 * request might not have the bit set.
572 */
573 if (list_empty(&rq_list))
574 rq->cmd_flags |= REQ_END;
575 561
576 ret = q->mq_ops->queue_rq(hctx, rq); 562 ret = q->mq_ops->queue_rq(hctx, rq);
577 switch (ret) { 563 switch (ret) {
@@ -589,8 +575,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
589 break; 575 break;
590 default: 576 default:
591 pr_err("blk-mq: bad return on queue: %d\n", ret); 577 pr_err("blk-mq: bad return on queue: %d\n", ret);
592 rq->errors = -EIO;
593 case BLK_MQ_RQ_QUEUE_ERROR: 578 case BLK_MQ_RQ_QUEUE_ERROR:
579 rq->errors = -EIO;
594 blk_mq_end_io(rq, rq->errors); 580 blk_mq_end_io(rq, rq->errors);
595 break; 581 break;
596 } 582 }
@@ -693,13 +679,16 @@ static void blk_mq_work_fn(struct work_struct *work)
693} 679}
694 680
695static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, 681static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
696 struct request *rq) 682 struct request *rq, bool at_head)
697{ 683{
698 struct blk_mq_ctx *ctx = rq->mq_ctx; 684 struct blk_mq_ctx *ctx = rq->mq_ctx;
699 685
700 trace_block_rq_insert(hctx->queue, rq); 686 trace_block_rq_insert(hctx->queue, rq);
701 687
702 list_add_tail(&rq->queuelist, &ctx->rq_list); 688 if (at_head)
689 list_add(&rq->queuelist, &ctx->rq_list);
690 else
691 list_add_tail(&rq->queuelist, &ctx->rq_list);
703 blk_mq_hctx_mark_pending(hctx, ctx); 692 blk_mq_hctx_mark_pending(hctx, ctx);
704 693
705 /* 694 /*
@@ -708,61 +697,28 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
708 blk_mq_add_timer(rq); 697 blk_mq_add_timer(rq);
709} 698}
710 699
711void blk_mq_insert_request(struct request_queue *q, struct request *rq, 700void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
712 bool run_queue) 701 bool async)
713{ 702{
703 struct request_queue *q = rq->q;
714 struct blk_mq_hw_ctx *hctx; 704 struct blk_mq_hw_ctx *hctx;
715 struct blk_mq_ctx *ctx, *current_ctx; 705 struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx;
706
707 current_ctx = blk_mq_get_ctx(q);
708 if (!cpu_online(ctx->cpu))
709 rq->mq_ctx = ctx = current_ctx;
716 710
717 ctx = rq->mq_ctx;
718 hctx = q->mq_ops->map_queue(q, ctx->cpu); 711 hctx = q->mq_ops->map_queue(q, ctx->cpu);
719 712
720 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) { 713 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) &&
714 !(rq->cmd_flags & (REQ_FLUSH_SEQ))) {
721 blk_insert_flush(rq); 715 blk_insert_flush(rq);
722 } else { 716 } else {
723 current_ctx = blk_mq_get_ctx(q);
724
725 if (!cpu_online(ctx->cpu)) {
726 ctx = current_ctx;
727 hctx = q->mq_ops->map_queue(q, ctx->cpu);
728 rq->mq_ctx = ctx;
729 }
730 spin_lock(&ctx->lock); 717 spin_lock(&ctx->lock);
731 __blk_mq_insert_request(hctx, rq); 718 __blk_mq_insert_request(hctx, rq, at_head);
732 spin_unlock(&ctx->lock); 719 spin_unlock(&ctx->lock);
733
734 blk_mq_put_ctx(current_ctx);
735 } 720 }
736 721
737 if (run_queue)
738 __blk_mq_run_hw_queue(hctx);
739}
740EXPORT_SYMBOL(blk_mq_insert_request);
741
742/*
743 * This is a special version of blk_mq_insert_request to bypass FLUSH request
744 * check. Should only be used internally.
745 */
746void blk_mq_run_request(struct request *rq, bool run_queue, bool async)
747{
748 struct request_queue *q = rq->q;
749 struct blk_mq_hw_ctx *hctx;
750 struct blk_mq_ctx *ctx, *current_ctx;
751
752 current_ctx = blk_mq_get_ctx(q);
753
754 ctx = rq->mq_ctx;
755 if (!cpu_online(ctx->cpu)) {
756 ctx = current_ctx;
757 rq->mq_ctx = ctx;
758 }
759 hctx = q->mq_ops->map_queue(q, ctx->cpu);
760
761 /* ctx->cpu might be offline */
762 spin_lock(&ctx->lock);
763 __blk_mq_insert_request(hctx, rq);
764 spin_unlock(&ctx->lock);
765
766 blk_mq_put_ctx(current_ctx); 722 blk_mq_put_ctx(current_ctx);
767 723
768 if (run_queue) 724 if (run_queue)
@@ -798,7 +754,7 @@ static void blk_mq_insert_requests(struct request_queue *q,
798 rq = list_first_entry(list, struct request, queuelist); 754 rq = list_first_entry(list, struct request, queuelist);
799 list_del_init(&rq->queuelist); 755 list_del_init(&rq->queuelist);
800 rq->mq_ctx = ctx; 756 rq->mq_ctx = ctx;
801 __blk_mq_insert_request(hctx, rq); 757 __blk_mq_insert_request(hctx, rq, false);
802 } 758 }
803 spin_unlock(&ctx->lock); 759 spin_unlock(&ctx->lock);
804 760
@@ -888,6 +844,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
888 844
889 blk_queue_bounce(q, &bio); 845 blk_queue_bounce(q, &bio);
890 846
847 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
848 bio_endio(bio, -EIO);
849 return;
850 }
851
891 if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) 852 if (use_plug && blk_attempt_plug_merge(q, bio, &request_count))
892 return; 853 return;
893 854
@@ -899,6 +860,8 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
899 ctx = blk_mq_get_ctx(q); 860 ctx = blk_mq_get_ctx(q);
900 hctx = q->mq_ops->map_queue(q, ctx->cpu); 861 hctx = q->mq_ops->map_queue(q, ctx->cpu);
901 862
863 if (is_sync)
864 rw |= REQ_SYNC;
902 trace_block_getrq(q, bio, rw); 865 trace_block_getrq(q, bio, rw);
903 rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); 866 rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
904 if (likely(rq)) 867 if (likely(rq))
@@ -950,7 +913,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
950 __blk_mq_free_request(hctx, ctx, rq); 913 __blk_mq_free_request(hctx, ctx, rq);
951 else { 914 else {
952 blk_mq_bio_to_request(rq, bio); 915 blk_mq_bio_to_request(rq, bio);
953 __blk_mq_insert_request(hctx, rq); 916 __blk_mq_insert_request(hctx, rq, false);
954 } 917 }
955 918
956 spin_unlock(&ctx->lock); 919 spin_unlock(&ctx->lock);
@@ -1309,15 +1272,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1309 reg->queue_depth = BLK_MQ_MAX_DEPTH; 1272 reg->queue_depth = BLK_MQ_MAX_DEPTH;
1310 } 1273 }
1311 1274
1312 /*
1313 * Set aside a tag for flush requests. It will only be used while
1314 * another flush request is in progress but outside the driver.
1315 *
1316 * TODO: only allocate if flushes are supported
1317 */
1318 reg->queue_depth++;
1319 reg->reserved_tags++;
1320
1321 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) 1275 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
1322 return ERR_PTR(-EINVAL); 1276 return ERR_PTR(-EINVAL);
1323 1277
@@ -1360,17 +1314,27 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1360 q->mq_ops = reg->ops; 1314 q->mq_ops = reg->ops;
1361 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; 1315 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
1362 1316
1317 q->sg_reserved_size = INT_MAX;
1318
1363 blk_queue_make_request(q, blk_mq_make_request); 1319 blk_queue_make_request(q, blk_mq_make_request);
1364 blk_queue_rq_timed_out(q, reg->ops->timeout); 1320 blk_queue_rq_timed_out(q, reg->ops->timeout);
1365 if (reg->timeout) 1321 if (reg->timeout)
1366 blk_queue_rq_timeout(q, reg->timeout); 1322 blk_queue_rq_timeout(q, reg->timeout);
1367 1323
1324 if (reg->ops->complete)
1325 blk_queue_softirq_done(q, reg->ops->complete);
1326
1368 blk_mq_init_flush(q); 1327 blk_mq_init_flush(q);
1369 blk_mq_init_cpu_queues(q, reg->nr_hw_queues); 1328 blk_mq_init_cpu_queues(q, reg->nr_hw_queues);
1370 1329
1371 if (blk_mq_init_hw_queues(q, reg, driver_data)) 1330 q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size,
1331 cache_line_size()), GFP_KERNEL);
1332 if (!q->flush_rq)
1372 goto err_hw; 1333 goto err_hw;
1373 1334
1335 if (blk_mq_init_hw_queues(q, reg, driver_data))
1336 goto err_flush_rq;
1337
1374 blk_mq_map_swqueue(q); 1338 blk_mq_map_swqueue(q);
1375 1339
1376 mutex_lock(&all_q_mutex); 1340 mutex_lock(&all_q_mutex);
@@ -1378,6 +1342,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1378 mutex_unlock(&all_q_mutex); 1342 mutex_unlock(&all_q_mutex);
1379 1343
1380 return q; 1344 return q;
1345
1346err_flush_rq:
1347 kfree(q->flush_rq);
1381err_hw: 1348err_hw:
1382 kfree(q->mq_map); 1349 kfree(q->mq_map);
1383err_map: 1350err_map:
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 5c3917984b00..72beba1f9d55 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -22,13 +22,12 @@ struct blk_mq_ctx {
22 struct kobject kobj; 22 struct kobject kobj;
23}; 23};
24 24
25void __blk_mq_end_io(struct request *rq, int error); 25void __blk_mq_complete_request(struct request *rq);
26void blk_mq_complete_request(struct request *rq, int error);
27void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
28void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 26void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
29void blk_mq_init_flush(struct request_queue *q); 27void blk_mq_init_flush(struct request_queue *q);
30void blk_mq_drain_queue(struct request_queue *q); 28void blk_mq_drain_queue(struct request_queue *q);
31void blk_mq_free_queue(struct request_queue *q); 29void blk_mq_free_queue(struct request_queue *q);
30void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq);
32 31
33/* 32/*
34 * CPU hotplug helpers 33 * CPU hotplug helpers
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8095c4a21fc0..7500f876dae4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -549,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj)
549 if (q->mq_ops) 549 if (q->mq_ops)
550 blk_mq_free_queue(q); 550 blk_mq_free_queue(q);
551 551
552 kfree(q->flush_rq);
553
552 blk_trace_shutdown(q); 554 blk_trace_shutdown(q);
553 555
554 bdi_destroy(&q->backing_dev_info); 556 bdi_destroy(&q->backing_dev_info);
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index bba81c9348e1..d96f7061c6fd 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req)
91 case BLK_EH_HANDLED: 91 case BLK_EH_HANDLED:
92 /* Can we use req->errors here? */ 92 /* Can we use req->errors here? */
93 if (q->mq_ops) 93 if (q->mq_ops)
94 blk_mq_complete_request(req, req->errors); 94 __blk_mq_complete_request(req);
95 else 95 else
96 __blk_complete_request(req); 96 __blk_complete_request(req);
97 break; 97 break;
diff --git a/block/blk.h b/block/blk.h
index c90e1d8f7a2b..d23b415b8a28 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -113,7 +113,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
113 q->flush_queue_delayed = 1; 113 q->flush_queue_delayed = 1;
114 return NULL; 114 return NULL;
115 } 115 }
116 if (unlikely(blk_queue_dying(q)) || 116 if (unlikely(blk_queue_bypass(q)) ||
117 !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) 117 !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
118 return NULL; 118 return NULL;
119 } 119 }