aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-02-14 13:45:18 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-02-14 13:45:18 -0500
commit5e57dc81106b942786f5db8e7ab8788bb9319933 (patch)
tree4533e01e745bba3614c77200b3fd96dd7af7e04e /block
parent0d25e3691186f5ae6feb0229717a60a5169dc5b2 (diff)
parentc8123f8c9cb517403b51aa41c3c46ff5e10b2c17 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block IO fixes from Jens Axboe: "Second round of updates and fixes for 3.14-rc2. Most of this stuff has been queued up for a while. The notable exception is the blk-mq changes, which are naturally a bit more in flux still. The pull request contains: - Two bug fixes for the new immutable vecs, causing crashes with raid or swap. From Kent. - Various blk-mq tweaks and fixes from Christoph. A fix for integrity bio's from Nic. - A few bcache fixes from Kent and Darrick Wong. - xen-blk{front,back} fixes from David Vrabel, Matt Rushton, Nicolas Swenson, and Roger Pau Monne. - Fix for a vec miscount with integrity vectors from Martin. - Minor annotations or fixes from Masanari Iida and Rashika Kheria. - Tweak to null_blk to do more normal FIFO processing of requests from Shlomo Pongratz. - Elevator switching bypass fix from Tejun. - Softlockup in blkdev_issue_discard() fix when !CONFIG_PREEMPT from me" * 'for-linus' of git://git.kernel.dk/linux-block: (31 commits) block: add cond_resched() to potentially long running ioctl discard loop xen-blkback: init persistent_purge_work work_struct blk-mq: pair blk_mq_start_request / blk_mq_requeue_request blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq block: Fix cloning of discard/write same bios block: Fix type mismatch in ssize_t_blk_mq_tag_sysfs_show blk-mq: rework flush sequencing logic null_blk: use blk_complete_request and blk_mq_complete_request virtio_blk: use blk_mq_complete_request blk-mq: rework I/O completions fs: Add prototype declaration to appropriate header file include/linux/bio.h fs: Mark function as static in fs/bio-integrity.c block/null_blk: Fix completion processing from LIFO to FIFO block: Explicitly handle discard/write same segments block: Fix nr_vecs for inline integrity vectors blk-mq: Add bio_integrity setup to blk_mq_make_request blk-mq: initialize sg_reserved_size blk-mq: handle dma_drain_size blk-mq: divert __blk_put_request for MQ ops blk-mq: support at_head inserations for blk_execute_rq ...
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c20
-rw-r--r--block/blk-exec.c2
-rw-r--r--block/blk-flush.c101
-rw-r--r--block/blk-lib.c8
-rw-r--r--block/blk-merge.c91
-rw-r--r--block/blk-mq-tag.c2
-rw-r--r--block/blk-mq.c143
-rw-r--r--block/blk-mq.h4
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/blk-timeout.c2
-rw-r--r--block/blk.h2
11 files changed, 219 insertions, 158 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index c00e0bdeab4a..853f92749202 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
693 if (!uninit_q) 693 if (!uninit_q)
694 return NULL; 694 return NULL;
695 695
696 uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
697 if (!uninit_q->flush_rq)
698 goto out_cleanup_queue;
699
696 q = blk_init_allocated_queue(uninit_q, rfn, lock); 700 q = blk_init_allocated_queue(uninit_q, rfn, lock);
697 if (!q) 701 if (!q)
698 blk_cleanup_queue(uninit_q); 702 goto out_free_flush_rq;
699
700 return q; 703 return q;
704
705out_free_flush_rq:
706 kfree(uninit_q->flush_rq);
707out_cleanup_queue:
708 blk_cleanup_queue(uninit_q);
709 return NULL;
701} 710}
702EXPORT_SYMBOL(blk_init_queue_node); 711EXPORT_SYMBOL(blk_init_queue_node);
703 712
@@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
1127struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 1136struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
1128{ 1137{
1129 if (q->mq_ops) 1138 if (q->mq_ops)
1130 return blk_mq_alloc_request(q, rw, gfp_mask, false); 1139 return blk_mq_alloc_request(q, rw, gfp_mask);
1131 else 1140 else
1132 return blk_old_get_request(q, rw, gfp_mask); 1141 return blk_old_get_request(q, rw, gfp_mask);
1133} 1142}
@@ -1278,6 +1287,11 @@ void __blk_put_request(struct request_queue *q, struct request *req)
1278 if (unlikely(!q)) 1287 if (unlikely(!q))
1279 return; 1288 return;
1280 1289
1290 if (q->mq_ops) {
1291 blk_mq_free_request(req);
1292 return;
1293 }
1294
1281 blk_pm_put_request(req); 1295 blk_pm_put_request(req);
1282 1296
1283 elv_completed_request(q, req); 1297 elv_completed_request(q, req);
diff --git a/block/blk-exec.c b/block/blk-exec.c
index bbfc072a79c2..c68613bb4c79 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
65 * be resued after dying flag is set 65 * be resued after dying flag is set
66 */ 66 */
67 if (q->mq_ops) { 67 if (q->mq_ops) {
68 blk_mq_insert_request(q, rq, true); 68 blk_mq_insert_request(q, rq, at_head, true);
69 return; 69 return;
70 } 70 }
71 71
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 9288aaf35c21..66e2b697f5db 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq)
130 blk_clear_rq_complete(rq); 130 blk_clear_rq_complete(rq);
131} 131}
132 132
133static void mq_flush_data_run(struct work_struct *work) 133static void mq_flush_run(struct work_struct *work)
134{ 134{
135 struct request *rq; 135 struct request *rq;
136 136
137 rq = container_of(work, struct request, mq_flush_data); 137 rq = container_of(work, struct request, mq_flush_work);
138 138
139 memset(&rq->csd, 0, sizeof(rq->csd)); 139 memset(&rq->csd, 0, sizeof(rq->csd));
140 blk_mq_run_request(rq, true, false); 140 blk_mq_run_request(rq, true, false);
141} 141}
142 142
143static void blk_mq_flush_data_insert(struct request *rq) 143static bool blk_flush_queue_rq(struct request *rq)
144{ 144{
145 INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); 145 if (rq->q->mq_ops) {
146 kblockd_schedule_work(rq->q, &rq->mq_flush_data); 146 INIT_WORK(&rq->mq_flush_work, mq_flush_run);
147 kblockd_schedule_work(rq->q, &rq->mq_flush_work);
148 return false;
149 } else {
150 list_add_tail(&rq->queuelist, &rq->q->queue_head);
151 return true;
152 }
147} 153}
148 154
149/** 155/**
@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
187 193
188 case REQ_FSEQ_DATA: 194 case REQ_FSEQ_DATA:
189 list_move_tail(&rq->flush.list, &q->flush_data_in_flight); 195 list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
190 if (q->mq_ops) 196 queued = blk_flush_queue_rq(rq);
191 blk_mq_flush_data_insert(rq);
192 else {
193 list_add(&rq->queuelist, &q->queue_head);
194 queued = true;
195 }
196 break; 197 break;
197 198
198 case REQ_FSEQ_DONE: 199 case REQ_FSEQ_DONE:
@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
216 } 217 }
217 218
218 kicked = blk_kick_flush(q); 219 kicked = blk_kick_flush(q);
219 /* blk_mq_run_flush will run queue */
220 if (q->mq_ops)
221 return queued;
222 return kicked | queued; 220 return kicked | queued;
223} 221}
224 222
@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error)
230 struct request *rq, *n; 228 struct request *rq, *n;
231 unsigned long flags = 0; 229 unsigned long flags = 0;
232 230
233 if (q->mq_ops) { 231 if (q->mq_ops)
234 blk_mq_free_request(flush_rq);
235 spin_lock_irqsave(&q->mq_flush_lock, flags); 232 spin_lock_irqsave(&q->mq_flush_lock, flags);
236 } 233
237 running = &q->flush_queue[q->flush_running_idx]; 234 running = &q->flush_queue[q->flush_running_idx];
238 BUG_ON(q->flush_pending_idx == q->flush_running_idx); 235 BUG_ON(q->flush_pending_idx == q->flush_running_idx);
239 236
@@ -263,49 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error)
263 * kblockd. 260 * kblockd.
264 */ 261 */
265 if (queued || q->flush_queue_delayed) { 262 if (queued || q->flush_queue_delayed) {
266 if (!q->mq_ops) 263 WARN_ON(q->mq_ops);
267 blk_run_queue_async(q); 264 blk_run_queue_async(q);
268 else
269 /*
270 * This can be optimized to only run queues with requests
271 * queued if necessary.
272 */
273 blk_mq_run_queues(q, true);
274 } 265 }
275 q->flush_queue_delayed = 0; 266 q->flush_queue_delayed = 0;
276 if (q->mq_ops) 267 if (q->mq_ops)
277 spin_unlock_irqrestore(&q->mq_flush_lock, flags); 268 spin_unlock_irqrestore(&q->mq_flush_lock, flags);
278} 269}
279 270
280static void mq_flush_work(struct work_struct *work)
281{
282 struct request_queue *q;
283 struct request *rq;
284
285 q = container_of(work, struct request_queue, mq_flush_work);
286
287 /* We don't need set REQ_FLUSH_SEQ, it's for consistency */
288 rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
289 __GFP_WAIT|GFP_ATOMIC, true);
290 rq->cmd_type = REQ_TYPE_FS;
291 rq->end_io = flush_end_io;
292
293 blk_mq_run_request(rq, true, false);
294}
295
296/*
297 * We can't directly use q->flush_rq, because it doesn't have tag and is not in
298 * hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
299 * so offload the work to workqueue.
300 *
301 * Note: we assume a flush request finished in any hardware queue will flush
302 * the whole disk cache.
303 */
304static void mq_run_flush(struct request_queue *q)
305{
306 kblockd_schedule_work(q, &q->mq_flush_work);
307}
308
309/** 271/**
310 * blk_kick_flush - consider issuing flush request 272 * blk_kick_flush - consider issuing flush request
311 * @q: request_queue being kicked 273 * @q: request_queue being kicked
@@ -340,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q)
340 * different from running_idx, which means flush is in flight. 302 * different from running_idx, which means flush is in flight.
341 */ 303 */
342 q->flush_pending_idx ^= 1; 304 q->flush_pending_idx ^= 1;
305
343 if (q->mq_ops) { 306 if (q->mq_ops) {
344 mq_run_flush(q); 307 struct blk_mq_ctx *ctx = first_rq->mq_ctx;
345 return true; 308 struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
309
310 blk_mq_rq_init(hctx, q->flush_rq);
311 q->flush_rq->mq_ctx = ctx;
312
313 /*
314 * Reuse the tag value from the fist waiting request,
315 * with blk-mq the tag is generated during request
316 * allocation and drivers can rely on it being inside
317 * the range they asked for.
318 */
319 q->flush_rq->tag = first_rq->tag;
320 } else {
321 blk_rq_init(q, q->flush_rq);
346 } 322 }
347 323
348 blk_rq_init(q, &q->flush_rq); 324 q->flush_rq->cmd_type = REQ_TYPE_FS;
349 q->flush_rq.cmd_type = REQ_TYPE_FS; 325 q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
350 q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; 326 q->flush_rq->rq_disk = first_rq->rq_disk;
351 q->flush_rq.rq_disk = first_rq->rq_disk; 327 q->flush_rq->end_io = flush_end_io;
352 q->flush_rq.end_io = flush_end_io;
353 328
354 list_add_tail(&q->flush_rq.queuelist, &q->queue_head); 329 return blk_flush_queue_rq(q->flush_rq);
355 return true;
356} 330}
357 331
358static void flush_data_end_io(struct request *rq, int error) 332static void flush_data_end_io(struct request *rq, int error)
@@ -558,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush);
558void blk_mq_init_flush(struct request_queue *q) 532void blk_mq_init_flush(struct request_queue *q)
559{ 533{
560 spin_lock_init(&q->mq_flush_lock); 534 spin_lock_init(&q->mq_flush_lock);
561 INIT_WORK(&q->mq_flush_work, mq_flush_work);
562} 535}
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 2da76c999ef3..97a733cf3d5f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -119,6 +119,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
119 119
120 atomic_inc(&bb.done); 120 atomic_inc(&bb.done);
121 submit_bio(type, bio); 121 submit_bio(type, bio);
122
123 /*
124 * We can loop for a long time in here, if someone does
125 * full device discards (like mkfs). Be nice and allow
126 * us to schedule out to avoid softlocking if preempt
127 * is disabled.
128 */
129 cond_resched();
122 } 130 }
123 blk_finish_plug(&plug); 131 blk_finish_plug(&plug);
124 132
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 8f8adaa95466..6c583f9c5b65 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -21,6 +21,16 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
21 if (!bio) 21 if (!bio)
22 return 0; 22 return 0;
23 23
24 /*
25 * This should probably be returning 0, but blk_add_request_payload()
26 * (Christoph!!!!)
27 */
28 if (bio->bi_rw & REQ_DISCARD)
29 return 1;
30
31 if (bio->bi_rw & REQ_WRITE_SAME)
32 return 1;
33
24 fbio = bio; 34 fbio = bio;
25 cluster = blk_queue_cluster(q); 35 cluster = blk_queue_cluster(q);
26 seg_size = 0; 36 seg_size = 0;
@@ -161,30 +171,60 @@ new_segment:
161 *bvprv = *bvec; 171 *bvprv = *bvec;
162} 172}
163 173
164/* 174static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
165 * map a request to scatterlist, return number of sg entries setup. Caller 175 struct scatterlist *sglist,
166 * must make sure sg can hold rq->nr_phys_segments entries 176 struct scatterlist **sg)
167 */
168int blk_rq_map_sg(struct request_queue *q, struct request *rq,
169 struct scatterlist *sglist)
170{ 177{
171 struct bio_vec bvec, bvprv = { NULL }; 178 struct bio_vec bvec, bvprv = { NULL };
172 struct req_iterator iter; 179 struct bvec_iter iter;
173 struct scatterlist *sg;
174 int nsegs, cluster; 180 int nsegs, cluster;
175 181
176 nsegs = 0; 182 nsegs = 0;
177 cluster = blk_queue_cluster(q); 183 cluster = blk_queue_cluster(q);
178 184
179 /* 185 if (bio->bi_rw & REQ_DISCARD) {
180 * for each bio in rq 186 /*
181 */ 187 * This is a hack - drivers should be neither modifying the
182 sg = NULL; 188 * biovec, nor relying on bi_vcnt - but because of
183 rq_for_each_segment(bvec, rq, iter) { 189 * blk_add_request_payload(), a discard bio may or may not have
184 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, 190 * a payload we need to set up here (thank you Christoph) and
185 &nsegs, &cluster); 191 * bi_vcnt is really the only way of telling if we need to.
186 } /* segments in rq */ 192 */
193
194 if (bio->bi_vcnt)
195 goto single_segment;
196
197 return 0;
198 }
199
200 if (bio->bi_rw & REQ_WRITE_SAME) {
201single_segment:
202 *sg = sglist;
203 bvec = bio_iovec(bio);
204 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
205 return 1;
206 }
207
208 for_each_bio(bio)
209 bio_for_each_segment(bvec, bio, iter)
210 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg,
211 &nsegs, &cluster);
187 212
213 return nsegs;
214}
215
216/*
217 * map a request to scatterlist, return number of sg entries setup. Caller
218 * must make sure sg can hold rq->nr_phys_segments entries
219 */
220int blk_rq_map_sg(struct request_queue *q, struct request *rq,
221 struct scatterlist *sglist)
222{
223 struct scatterlist *sg = NULL;
224 int nsegs = 0;
225
226 if (rq->bio)
227 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
188 228
189 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 229 if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
190 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 230 (blk_rq_bytes(rq) & q->dma_pad_mask)) {
@@ -230,20 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg);
230int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 270int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
231 struct scatterlist *sglist) 271 struct scatterlist *sglist)
232{ 272{
233 struct bio_vec bvec, bvprv = { NULL }; 273 struct scatterlist *sg = NULL;
234 struct scatterlist *sg; 274 int nsegs;
235 int nsegs, cluster; 275 struct bio *next = bio->bi_next;
236 struct bvec_iter iter; 276 bio->bi_next = NULL;
237
238 nsegs = 0;
239 cluster = blk_queue_cluster(q);
240
241 sg = NULL;
242 bio_for_each_segment(bvec, bio, iter) {
243 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg,
244 &nsegs, &cluster);
245 } /* segments in bio */
246 277
278 nsegs = __blk_bios_map_sg(q, bio, sglist, &sg);
279 bio->bi_next = next;
247 if (sg) 280 if (sg)
248 sg_mark_end(sg); 281 sg_mark_end(sg);
249 282
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 5d70edc9855f..83ae96c51a27 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -184,7 +184,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
184ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) 184ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
185{ 185{
186 char *orig_page = page; 186 char *orig_page = page;
187 int cpu; 187 unsigned int cpu;
188 188
189 if (!tags) 189 if (!tags)
190 return 0; 190 return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 57039fcd9c93..1fa9dd153fde 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -226,15 +226,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
226 return rq; 226 return rq;
227} 227}
228 228
229struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 229struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
230 gfp_t gfp, bool reserved)
231{ 230{
232 struct request *rq; 231 struct request *rq;
233 232
234 if (blk_mq_queue_enter(q)) 233 if (blk_mq_queue_enter(q))
235 return NULL; 234 return NULL;
236 235
237 rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); 236 rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
238 if (rq) 237 if (rq)
239 blk_mq_put_ctx(rq->mq_ctx); 238 blk_mq_put_ctx(rq->mq_ctx);
240 return rq; 239 return rq;
@@ -258,7 +257,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
258/* 257/*
259 * Re-init and set pdu, if we have it 258 * Re-init and set pdu, if we have it
260 */ 259 */
261static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) 260void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq)
262{ 261{
263 blk_rq_init(hctx->queue, rq); 262 blk_rq_init(hctx->queue, rq);
264 263
@@ -305,7 +304,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error)
305 bio_endio(bio, error); 304 bio_endio(bio, error);
306} 305}
307 306
308void blk_mq_complete_request(struct request *rq, int error) 307void blk_mq_end_io(struct request *rq, int error)
309{ 308{
310 struct bio *bio = rq->bio; 309 struct bio *bio = rq->bio;
311 unsigned int bytes = 0; 310 unsigned int bytes = 0;
@@ -330,48 +329,55 @@ void blk_mq_complete_request(struct request *rq, int error)
330 else 329 else
331 blk_mq_free_request(rq); 330 blk_mq_free_request(rq);
332} 331}
332EXPORT_SYMBOL(blk_mq_end_io);
333 333
334void __blk_mq_end_io(struct request *rq, int error) 334static void __blk_mq_complete_request_remote(void *data)
335{
336 if (!blk_mark_rq_complete(rq))
337 blk_mq_complete_request(rq, error);
338}
339
340static void blk_mq_end_io_remote(void *data)
341{ 335{
342 struct request *rq = data; 336 struct request *rq = data;
343 337
344 __blk_mq_end_io(rq, rq->errors); 338 rq->q->softirq_done_fn(rq);
345} 339}
346 340
347/* 341void __blk_mq_complete_request(struct request *rq)
348 * End IO on this request on a multiqueue enabled driver. We'll either do
349 * it directly inline, or punt to a local IPI handler on the matching
350 * remote CPU.
351 */
352void blk_mq_end_io(struct request *rq, int error)
353{ 342{
354 struct blk_mq_ctx *ctx = rq->mq_ctx; 343 struct blk_mq_ctx *ctx = rq->mq_ctx;
355 int cpu; 344 int cpu;
356 345
357 if (!ctx->ipi_redirect) 346 if (!ctx->ipi_redirect) {
358 return __blk_mq_end_io(rq, error); 347 rq->q->softirq_done_fn(rq);
348 return;
349 }
359 350
360 cpu = get_cpu(); 351 cpu = get_cpu();
361 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { 352 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
362 rq->errors = error; 353 rq->csd.func = __blk_mq_complete_request_remote;
363 rq->csd.func = blk_mq_end_io_remote;
364 rq->csd.info = rq; 354 rq->csd.info = rq;
365 rq->csd.flags = 0; 355 rq->csd.flags = 0;
366 __smp_call_function_single(ctx->cpu, &rq->csd, 0); 356 __smp_call_function_single(ctx->cpu, &rq->csd, 0);
367 } else { 357 } else {
368 __blk_mq_end_io(rq, error); 358 rq->q->softirq_done_fn(rq);
369 } 359 }
370 put_cpu(); 360 put_cpu();
371} 361}
372EXPORT_SYMBOL(blk_mq_end_io);
373 362
374static void blk_mq_start_request(struct request *rq) 363/**
364 * blk_mq_complete_request - end I/O on a request
365 * @rq: the request being processed
366 *
367 * Description:
368 * Ends all I/O on a request. It does not handle partial completions.
369 * The actual completion happens out-of-order, through a IPI handler.
370 **/
371void blk_mq_complete_request(struct request *rq)
372{
373 if (unlikely(blk_should_fake_timeout(rq->q)))
374 return;
375 if (!blk_mark_rq_complete(rq))
376 __blk_mq_complete_request(rq);
377}
378EXPORT_SYMBOL(blk_mq_complete_request);
379
380static void blk_mq_start_request(struct request *rq, bool last)
375{ 381{
376 struct request_queue *q = rq->q; 382 struct request_queue *q = rq->q;
377 383
@@ -384,6 +390,25 @@ static void blk_mq_start_request(struct request *rq)
384 */ 390 */
385 rq->deadline = jiffies + q->rq_timeout; 391 rq->deadline = jiffies + q->rq_timeout;
386 set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 392 set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
393
394 if (q->dma_drain_size && blk_rq_bytes(rq)) {
395 /*
396 * Make sure space for the drain appears. We know we can do
397 * this because max_hw_segments has been adjusted to be one
398 * fewer than the device can handle.
399 */
400 rq->nr_phys_segments++;
401 }
402
403 /*
404 * Flag the last request in the series so that drivers know when IO
405 * should be kicked off, if they don't do it on a per-request basis.
406 *
407 * Note: the flag isn't the only condition drivers should do kick off.
408 * If drive is busy, the last request might not have the bit set.
409 */
410 if (last)
411 rq->cmd_flags |= REQ_END;
387} 412}
388 413
389static void blk_mq_requeue_request(struct request *rq) 414static void blk_mq_requeue_request(struct request *rq)
@@ -392,6 +417,11 @@ static void blk_mq_requeue_request(struct request *rq)
392 417
393 trace_block_rq_requeue(q, rq); 418 trace_block_rq_requeue(q, rq);
394 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 419 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
420
421 rq->cmd_flags &= ~REQ_END;
422
423 if (q->dma_drain_size && blk_rq_bytes(rq))
424 rq->nr_phys_segments--;
395} 425}
396 426
397struct blk_mq_timeout_data { 427struct blk_mq_timeout_data {
@@ -559,19 +589,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
559 589
560 rq = list_first_entry(&rq_list, struct request, queuelist); 590 rq = list_first_entry(&rq_list, struct request, queuelist);
561 list_del_init(&rq->queuelist); 591 list_del_init(&rq->queuelist);
562 blk_mq_start_request(rq);
563 592
564 /* 593 blk_mq_start_request(rq, list_empty(&rq_list));
565 * Last request in the series. Flag it as such, this
566 * enables drivers to know when IO should be kicked off,
567 * if they don't do it on a per-request basis.
568 *
569 * Note: the flag isn't the only condition drivers
570 * should do kick off. If drive is busy, the last
571 * request might not have the bit set.
572 */
573 if (list_empty(&rq_list))
574 rq->cmd_flags |= REQ_END;
575 594
576 ret = q->mq_ops->queue_rq(hctx, rq); 595 ret = q->mq_ops->queue_rq(hctx, rq);
577 switch (ret) { 596 switch (ret) {
@@ -589,8 +608,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
589 break; 608 break;
590 default: 609 default:
591 pr_err("blk-mq: bad return on queue: %d\n", ret); 610 pr_err("blk-mq: bad return on queue: %d\n", ret);
592 rq->errors = -EIO;
593 case BLK_MQ_RQ_QUEUE_ERROR: 611 case BLK_MQ_RQ_QUEUE_ERROR:
612 rq->errors = -EIO;
594 blk_mq_end_io(rq, rq->errors); 613 blk_mq_end_io(rq, rq->errors);
595 break; 614 break;
596 } 615 }
@@ -693,13 +712,16 @@ static void blk_mq_work_fn(struct work_struct *work)
693} 712}
694 713
695static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, 714static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
696 struct request *rq) 715 struct request *rq, bool at_head)
697{ 716{
698 struct blk_mq_ctx *ctx = rq->mq_ctx; 717 struct blk_mq_ctx *ctx = rq->mq_ctx;
699 718
700 trace_block_rq_insert(hctx->queue, rq); 719 trace_block_rq_insert(hctx->queue, rq);
701 720
702 list_add_tail(&rq->queuelist, &ctx->rq_list); 721 if (at_head)
722 list_add(&rq->queuelist, &ctx->rq_list);
723 else
724 list_add_tail(&rq->queuelist, &ctx->rq_list);
703 blk_mq_hctx_mark_pending(hctx, ctx); 725 blk_mq_hctx_mark_pending(hctx, ctx);
704 726
705 /* 727 /*
@@ -709,7 +731,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
709} 731}
710 732
711void blk_mq_insert_request(struct request_queue *q, struct request *rq, 733void blk_mq_insert_request(struct request_queue *q, struct request *rq,
712 bool run_queue) 734 bool at_head, bool run_queue)
713{ 735{
714 struct blk_mq_hw_ctx *hctx; 736 struct blk_mq_hw_ctx *hctx;
715 struct blk_mq_ctx *ctx, *current_ctx; 737 struct blk_mq_ctx *ctx, *current_ctx;
@@ -728,7 +750,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq,
728 rq->mq_ctx = ctx; 750 rq->mq_ctx = ctx;
729 } 751 }
730 spin_lock(&ctx->lock); 752 spin_lock(&ctx->lock);
731 __blk_mq_insert_request(hctx, rq); 753 __blk_mq_insert_request(hctx, rq, at_head);
732 spin_unlock(&ctx->lock); 754 spin_unlock(&ctx->lock);
733 755
734 blk_mq_put_ctx(current_ctx); 756 blk_mq_put_ctx(current_ctx);
@@ -760,7 +782,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async)
760 782
761 /* ctx->cpu might be offline */ 783 /* ctx->cpu might be offline */
762 spin_lock(&ctx->lock); 784 spin_lock(&ctx->lock);
763 __blk_mq_insert_request(hctx, rq); 785 __blk_mq_insert_request(hctx, rq, false);
764 spin_unlock(&ctx->lock); 786 spin_unlock(&ctx->lock);
765 787
766 blk_mq_put_ctx(current_ctx); 788 blk_mq_put_ctx(current_ctx);
@@ -798,7 +820,7 @@ static void blk_mq_insert_requests(struct request_queue *q,
798 rq = list_first_entry(list, struct request, queuelist); 820 rq = list_first_entry(list, struct request, queuelist);
799 list_del_init(&rq->queuelist); 821 list_del_init(&rq->queuelist);
800 rq->mq_ctx = ctx; 822 rq->mq_ctx = ctx;
801 __blk_mq_insert_request(hctx, rq); 823 __blk_mq_insert_request(hctx, rq, false);
802 } 824 }
803 spin_unlock(&ctx->lock); 825 spin_unlock(&ctx->lock);
804 826
@@ -888,6 +910,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
888 910
889 blk_queue_bounce(q, &bio); 911 blk_queue_bounce(q, &bio);
890 912
913 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
914 bio_endio(bio, -EIO);
915 return;
916 }
917
891 if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) 918 if (use_plug && blk_attempt_plug_merge(q, bio, &request_count))
892 return; 919 return;
893 920
@@ -950,7 +977,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
950 __blk_mq_free_request(hctx, ctx, rq); 977 __blk_mq_free_request(hctx, ctx, rq);
951 else { 978 else {
952 blk_mq_bio_to_request(rq, bio); 979 blk_mq_bio_to_request(rq, bio);
953 __blk_mq_insert_request(hctx, rq); 980 __blk_mq_insert_request(hctx, rq, false);
954 } 981 }
955 982
956 spin_unlock(&ctx->lock); 983 spin_unlock(&ctx->lock);
@@ -1309,15 +1336,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1309 reg->queue_depth = BLK_MQ_MAX_DEPTH; 1336 reg->queue_depth = BLK_MQ_MAX_DEPTH;
1310 } 1337 }
1311 1338
1312 /*
1313 * Set aside a tag for flush requests. It will only be used while
1314 * another flush request is in progress but outside the driver.
1315 *
1316 * TODO: only allocate if flushes are supported
1317 */
1318 reg->queue_depth++;
1319 reg->reserved_tags++;
1320
1321 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) 1339 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
1322 return ERR_PTR(-EINVAL); 1340 return ERR_PTR(-EINVAL);
1323 1341
@@ -1360,17 +1378,27 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1360 q->mq_ops = reg->ops; 1378 q->mq_ops = reg->ops;
1361 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; 1379 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
1362 1380
1381 q->sg_reserved_size = INT_MAX;
1382
1363 blk_queue_make_request(q, blk_mq_make_request); 1383 blk_queue_make_request(q, blk_mq_make_request);
1364 blk_queue_rq_timed_out(q, reg->ops->timeout); 1384 blk_queue_rq_timed_out(q, reg->ops->timeout);
1365 if (reg->timeout) 1385 if (reg->timeout)
1366 blk_queue_rq_timeout(q, reg->timeout); 1386 blk_queue_rq_timeout(q, reg->timeout);
1367 1387
1388 if (reg->ops->complete)
1389 blk_queue_softirq_done(q, reg->ops->complete);
1390
1368 blk_mq_init_flush(q); 1391 blk_mq_init_flush(q);
1369 blk_mq_init_cpu_queues(q, reg->nr_hw_queues); 1392 blk_mq_init_cpu_queues(q, reg->nr_hw_queues);
1370 1393
1371 if (blk_mq_init_hw_queues(q, reg, driver_data)) 1394 q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size,
1395 cache_line_size()), GFP_KERNEL);
1396 if (!q->flush_rq)
1372 goto err_hw; 1397 goto err_hw;
1373 1398
1399 if (blk_mq_init_hw_queues(q, reg, driver_data))
1400 goto err_flush_rq;
1401
1374 blk_mq_map_swqueue(q); 1402 blk_mq_map_swqueue(q);
1375 1403
1376 mutex_lock(&all_q_mutex); 1404 mutex_lock(&all_q_mutex);
@@ -1378,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1378 mutex_unlock(&all_q_mutex); 1406 mutex_unlock(&all_q_mutex);
1379 1407
1380 return q; 1408 return q;
1409
1410err_flush_rq:
1411 kfree(q->flush_rq);
1381err_hw: 1412err_hw:
1382 kfree(q->mq_map); 1413 kfree(q->mq_map);
1383err_map: 1414err_map:
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 5c3917984b00..ed0035cd458e 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -22,13 +22,13 @@ struct blk_mq_ctx {
22 struct kobject kobj; 22 struct kobject kobj;
23}; 23};
24 24
25void __blk_mq_end_io(struct request *rq, int error); 25void __blk_mq_complete_request(struct request *rq);
26void blk_mq_complete_request(struct request *rq, int error);
27void blk_mq_run_request(struct request *rq, bool run_queue, bool async); 26void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
28void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 27void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
29void blk_mq_init_flush(struct request_queue *q); 28void blk_mq_init_flush(struct request_queue *q);
30void blk_mq_drain_queue(struct request_queue *q); 29void blk_mq_drain_queue(struct request_queue *q);
31void blk_mq_free_queue(struct request_queue *q); 30void blk_mq_free_queue(struct request_queue *q);
31void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq);
32 32
33/* 33/*
34 * CPU hotplug helpers 34 * CPU hotplug helpers
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 8095c4a21fc0..7500f876dae4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -549,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj)
549 if (q->mq_ops) 549 if (q->mq_ops)
550 blk_mq_free_queue(q); 550 blk_mq_free_queue(q);
551 551
552 kfree(q->flush_rq);
553
552 blk_trace_shutdown(q); 554 blk_trace_shutdown(q);
553 555
554 bdi_destroy(&q->backing_dev_info); 556 bdi_destroy(&q->backing_dev_info);
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index bba81c9348e1..d96f7061c6fd 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req)
91 case BLK_EH_HANDLED: 91 case BLK_EH_HANDLED:
92 /* Can we use req->errors here? */ 92 /* Can we use req->errors here? */
93 if (q->mq_ops) 93 if (q->mq_ops)
94 blk_mq_complete_request(req, req->errors); 94 __blk_mq_complete_request(req);
95 else 95 else
96 __blk_complete_request(req); 96 __blk_complete_request(req);
97 break; 97 break;
diff --git a/block/blk.h b/block/blk.h
index c90e1d8f7a2b..d23b415b8a28 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -113,7 +113,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
113 q->flush_queue_delayed = 1; 113 q->flush_queue_delayed = 1;
114 return NULL; 114 return NULL;
115 } 115 }
116 if (unlikely(blk_queue_dying(q)) || 116 if (unlikely(blk_queue_bypass(q)) ||
117 !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) 117 !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
118 return NULL; 118 return NULL;
119 } 119 }