diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-02-14 13:45:18 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-02-14 13:45:18 -0500 |
commit | 5e57dc81106b942786f5db8e7ab8788bb9319933 (patch) | |
tree | 4533e01e745bba3614c77200b3fd96dd7af7e04e /block | |
parent | 0d25e3691186f5ae6feb0229717a60a5169dc5b2 (diff) | |
parent | c8123f8c9cb517403b51aa41c3c46ff5e10b2c17 (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block IO fixes from Jens Axboe:
"Second round of updates and fixes for 3.14-rc2. Most of this stuff
has been queued up for a while. The notable exception is the blk-mq
changes, which are naturally a bit more in flux still.
The pull request contains:
- Two bug fixes for the new immutable vecs, causing crashes with raid
or swap. From Kent.
- Various blk-mq tweaks and fixes from Christoph. A fix for
integrity bio's from Nic.
- A few bcache fixes from Kent and Darrick Wong.
- xen-blk{front,back} fixes from David Vrabel, Matt Rushton, Nicolas
Swenson, and Roger Pau Monne.
- Fix for a vec miscount with integrity vectors from Martin.
- Minor annotations or fixes from Masanari Iida and Rashika Kheria.
- Tweak to null_blk to do more normal FIFO processing of requests
from Shlomo Pongratz.
- Elevator switching bypass fix from Tejun.
- Softlockup in blkdev_issue_discard() fix when !CONFIG_PREEMPT from
me"
* 'for-linus' of git://git.kernel.dk/linux-block: (31 commits)
block: add cond_resched() to potentially long running ioctl discard loop
xen-blkback: init persistent_purge_work work_struct
blk-mq: pair blk_mq_start_request / blk_mq_requeue_request
blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq
block: Fix cloning of discard/write same bios
block: Fix type mismatch in ssize_t_blk_mq_tag_sysfs_show
blk-mq: rework flush sequencing logic
null_blk: use blk_complete_request and blk_mq_complete_request
virtio_blk: use blk_mq_complete_request
blk-mq: rework I/O completions
fs: Add prototype declaration to appropriate header file include/linux/bio.h
fs: Mark function as static in fs/bio-integrity.c
block/null_blk: Fix completion processing from LIFO to FIFO
block: Explicitly handle discard/write same segments
block: Fix nr_vecs for inline integrity vectors
blk-mq: Add bio_integrity setup to blk_mq_make_request
blk-mq: initialize sg_reserved_size
blk-mq: handle dma_drain_size
blk-mq: divert __blk_put_request for MQ ops
blk-mq: support at_head inserations for blk_execute_rq
...
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 20 | ||||
-rw-r--r-- | block/blk-exec.c | 2 | ||||
-rw-r--r-- | block/blk-flush.c | 101 | ||||
-rw-r--r-- | block/blk-lib.c | 8 | ||||
-rw-r--r-- | block/blk-merge.c | 91 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 143 | ||||
-rw-r--r-- | block/blk-mq.h | 4 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/blk-timeout.c | 2 | ||||
-rw-r--r-- | block/blk.h | 2 |
11 files changed, 219 insertions, 158 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index c00e0bdeab4a..853f92749202 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | |||
693 | if (!uninit_q) | 693 | if (!uninit_q) |
694 | return NULL; | 694 | return NULL; |
695 | 695 | ||
696 | uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); | ||
697 | if (!uninit_q->flush_rq) | ||
698 | goto out_cleanup_queue; | ||
699 | |||
696 | q = blk_init_allocated_queue(uninit_q, rfn, lock); | 700 | q = blk_init_allocated_queue(uninit_q, rfn, lock); |
697 | if (!q) | 701 | if (!q) |
698 | blk_cleanup_queue(uninit_q); | 702 | goto out_free_flush_rq; |
699 | |||
700 | return q; | 703 | return q; |
704 | |||
705 | out_free_flush_rq: | ||
706 | kfree(uninit_q->flush_rq); | ||
707 | out_cleanup_queue: | ||
708 | blk_cleanup_queue(uninit_q); | ||
709 | return NULL; | ||
701 | } | 710 | } |
702 | EXPORT_SYMBOL(blk_init_queue_node); | 711 | EXPORT_SYMBOL(blk_init_queue_node); |
703 | 712 | ||
@@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, | |||
1127 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 1136 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
1128 | { | 1137 | { |
1129 | if (q->mq_ops) | 1138 | if (q->mq_ops) |
1130 | return blk_mq_alloc_request(q, rw, gfp_mask, false); | 1139 | return blk_mq_alloc_request(q, rw, gfp_mask); |
1131 | else | 1140 | else |
1132 | return blk_old_get_request(q, rw, gfp_mask); | 1141 | return blk_old_get_request(q, rw, gfp_mask); |
1133 | } | 1142 | } |
@@ -1278,6 +1287,11 @@ void __blk_put_request(struct request_queue *q, struct request *req) | |||
1278 | if (unlikely(!q)) | 1287 | if (unlikely(!q)) |
1279 | return; | 1288 | return; |
1280 | 1289 | ||
1290 | if (q->mq_ops) { | ||
1291 | blk_mq_free_request(req); | ||
1292 | return; | ||
1293 | } | ||
1294 | |||
1281 | blk_pm_put_request(req); | 1295 | blk_pm_put_request(req); |
1282 | 1296 | ||
1283 | elv_completed_request(q, req); | 1297 | elv_completed_request(q, req); |
diff --git a/block/blk-exec.c b/block/blk-exec.c index bbfc072a79c2..c68613bb4c79 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
@@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
65 | * be resued after dying flag is set | 65 | * be resued after dying flag is set |
66 | */ | 66 | */ |
67 | if (q->mq_ops) { | 67 | if (q->mq_ops) { |
68 | blk_mq_insert_request(q, rq, true); | 68 | blk_mq_insert_request(q, rq, at_head, true); |
69 | return; | 69 | return; |
70 | } | 70 | } |
71 | 71 | ||
diff --git a/block/blk-flush.c b/block/blk-flush.c index 9288aaf35c21..66e2b697f5db 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq) | |||
130 | blk_clear_rq_complete(rq); | 130 | blk_clear_rq_complete(rq); |
131 | } | 131 | } |
132 | 132 | ||
133 | static void mq_flush_data_run(struct work_struct *work) | 133 | static void mq_flush_run(struct work_struct *work) |
134 | { | 134 | { |
135 | struct request *rq; | 135 | struct request *rq; |
136 | 136 | ||
137 | rq = container_of(work, struct request, mq_flush_data); | 137 | rq = container_of(work, struct request, mq_flush_work); |
138 | 138 | ||
139 | memset(&rq->csd, 0, sizeof(rq->csd)); | 139 | memset(&rq->csd, 0, sizeof(rq->csd)); |
140 | blk_mq_run_request(rq, true, false); | 140 | blk_mq_run_request(rq, true, false); |
141 | } | 141 | } |
142 | 142 | ||
143 | static void blk_mq_flush_data_insert(struct request *rq) | 143 | static bool blk_flush_queue_rq(struct request *rq) |
144 | { | 144 | { |
145 | INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); | 145 | if (rq->q->mq_ops) { |
146 | kblockd_schedule_work(rq->q, &rq->mq_flush_data); | 146 | INIT_WORK(&rq->mq_flush_work, mq_flush_run); |
147 | kblockd_schedule_work(rq->q, &rq->mq_flush_work); | ||
148 | return false; | ||
149 | } else { | ||
150 | list_add_tail(&rq->queuelist, &rq->q->queue_head); | ||
151 | return true; | ||
152 | } | ||
147 | } | 153 | } |
148 | 154 | ||
149 | /** | 155 | /** |
@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, | |||
187 | 193 | ||
188 | case REQ_FSEQ_DATA: | 194 | case REQ_FSEQ_DATA: |
189 | list_move_tail(&rq->flush.list, &q->flush_data_in_flight); | 195 | list_move_tail(&rq->flush.list, &q->flush_data_in_flight); |
190 | if (q->mq_ops) | 196 | queued = blk_flush_queue_rq(rq); |
191 | blk_mq_flush_data_insert(rq); | ||
192 | else { | ||
193 | list_add(&rq->queuelist, &q->queue_head); | ||
194 | queued = true; | ||
195 | } | ||
196 | break; | 197 | break; |
197 | 198 | ||
198 | case REQ_FSEQ_DONE: | 199 | case REQ_FSEQ_DONE: |
@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, | |||
216 | } | 217 | } |
217 | 218 | ||
218 | kicked = blk_kick_flush(q); | 219 | kicked = blk_kick_flush(q); |
219 | /* blk_mq_run_flush will run queue */ | ||
220 | if (q->mq_ops) | ||
221 | return queued; | ||
222 | return kicked | queued; | 220 | return kicked | queued; |
223 | } | 221 | } |
224 | 222 | ||
@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error) | |||
230 | struct request *rq, *n; | 228 | struct request *rq, *n; |
231 | unsigned long flags = 0; | 229 | unsigned long flags = 0; |
232 | 230 | ||
233 | if (q->mq_ops) { | 231 | if (q->mq_ops) |
234 | blk_mq_free_request(flush_rq); | ||
235 | spin_lock_irqsave(&q->mq_flush_lock, flags); | 232 | spin_lock_irqsave(&q->mq_flush_lock, flags); |
236 | } | 233 | |
237 | running = &q->flush_queue[q->flush_running_idx]; | 234 | running = &q->flush_queue[q->flush_running_idx]; |
238 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); | 235 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); |
239 | 236 | ||
@@ -263,49 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error) | |||
263 | * kblockd. | 260 | * kblockd. |
264 | */ | 261 | */ |
265 | if (queued || q->flush_queue_delayed) { | 262 | if (queued || q->flush_queue_delayed) { |
266 | if (!q->mq_ops) | 263 | WARN_ON(q->mq_ops); |
267 | blk_run_queue_async(q); | 264 | blk_run_queue_async(q); |
268 | else | ||
269 | /* | ||
270 | * This can be optimized to only run queues with requests | ||
271 | * queued if necessary. | ||
272 | */ | ||
273 | blk_mq_run_queues(q, true); | ||
274 | } | 265 | } |
275 | q->flush_queue_delayed = 0; | 266 | q->flush_queue_delayed = 0; |
276 | if (q->mq_ops) | 267 | if (q->mq_ops) |
277 | spin_unlock_irqrestore(&q->mq_flush_lock, flags); | 268 | spin_unlock_irqrestore(&q->mq_flush_lock, flags); |
278 | } | 269 | } |
279 | 270 | ||
280 | static void mq_flush_work(struct work_struct *work) | ||
281 | { | ||
282 | struct request_queue *q; | ||
283 | struct request *rq; | ||
284 | |||
285 | q = container_of(work, struct request_queue, mq_flush_work); | ||
286 | |||
287 | /* We don't need set REQ_FLUSH_SEQ, it's for consistency */ | ||
288 | rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, | ||
289 | __GFP_WAIT|GFP_ATOMIC, true); | ||
290 | rq->cmd_type = REQ_TYPE_FS; | ||
291 | rq->end_io = flush_end_io; | ||
292 | |||
293 | blk_mq_run_request(rq, true, false); | ||
294 | } | ||
295 | |||
296 | /* | ||
297 | * We can't directly use q->flush_rq, because it doesn't have tag and is not in | ||
298 | * hctx->rqs[]. so we must allocate a new request, since we can't sleep here, | ||
299 | * so offload the work to workqueue. | ||
300 | * | ||
301 | * Note: we assume a flush request finished in any hardware queue will flush | ||
302 | * the whole disk cache. | ||
303 | */ | ||
304 | static void mq_run_flush(struct request_queue *q) | ||
305 | { | ||
306 | kblockd_schedule_work(q, &q->mq_flush_work); | ||
307 | } | ||
308 | |||
309 | /** | 271 | /** |
310 | * blk_kick_flush - consider issuing flush request | 272 | * blk_kick_flush - consider issuing flush request |
311 | * @q: request_queue being kicked | 273 | * @q: request_queue being kicked |
@@ -340,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q) | |||
340 | * different from running_idx, which means flush is in flight. | 302 | * different from running_idx, which means flush is in flight. |
341 | */ | 303 | */ |
342 | q->flush_pending_idx ^= 1; | 304 | q->flush_pending_idx ^= 1; |
305 | |||
343 | if (q->mq_ops) { | 306 | if (q->mq_ops) { |
344 | mq_run_flush(q); | 307 | struct blk_mq_ctx *ctx = first_rq->mq_ctx; |
345 | return true; | 308 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); |
309 | |||
310 | blk_mq_rq_init(hctx, q->flush_rq); | ||
311 | q->flush_rq->mq_ctx = ctx; | ||
312 | |||
313 | /* | ||
314 | * Reuse the tag value from the fist waiting request, | ||
315 | * with blk-mq the tag is generated during request | ||
316 | * allocation and drivers can rely on it being inside | ||
317 | * the range they asked for. | ||
318 | */ | ||
319 | q->flush_rq->tag = first_rq->tag; | ||
320 | } else { | ||
321 | blk_rq_init(q, q->flush_rq); | ||
346 | } | 322 | } |
347 | 323 | ||
348 | blk_rq_init(q, &q->flush_rq); | 324 | q->flush_rq->cmd_type = REQ_TYPE_FS; |
349 | q->flush_rq.cmd_type = REQ_TYPE_FS; | 325 | q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; |
350 | q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; | 326 | q->flush_rq->rq_disk = first_rq->rq_disk; |
351 | q->flush_rq.rq_disk = first_rq->rq_disk; | 327 | q->flush_rq->end_io = flush_end_io; |
352 | q->flush_rq.end_io = flush_end_io; | ||
353 | 328 | ||
354 | list_add_tail(&q->flush_rq.queuelist, &q->queue_head); | 329 | return blk_flush_queue_rq(q->flush_rq); |
355 | return true; | ||
356 | } | 330 | } |
357 | 331 | ||
358 | static void flush_data_end_io(struct request *rq, int error) | 332 | static void flush_data_end_io(struct request *rq, int error) |
@@ -558,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush); | |||
558 | void blk_mq_init_flush(struct request_queue *q) | 532 | void blk_mq_init_flush(struct request_queue *q) |
559 | { | 533 | { |
560 | spin_lock_init(&q->mq_flush_lock); | 534 | spin_lock_init(&q->mq_flush_lock); |
561 | INIT_WORK(&q->mq_flush_work, mq_flush_work); | ||
562 | } | 535 | } |
diff --git a/block/blk-lib.c b/block/blk-lib.c index 2da76c999ef3..97a733cf3d5f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -119,6 +119,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
119 | 119 | ||
120 | atomic_inc(&bb.done); | 120 | atomic_inc(&bb.done); |
121 | submit_bio(type, bio); | 121 | submit_bio(type, bio); |
122 | |||
123 | /* | ||
124 | * We can loop for a long time in here, if someone does | ||
125 | * full device discards (like mkfs). Be nice and allow | ||
126 | * us to schedule out to avoid softlocking if preempt | ||
127 | * is disabled. | ||
128 | */ | ||
129 | cond_resched(); | ||
122 | } | 130 | } |
123 | blk_finish_plug(&plug); | 131 | blk_finish_plug(&plug); |
124 | 132 | ||
diff --git a/block/blk-merge.c b/block/blk-merge.c index 8f8adaa95466..6c583f9c5b65 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -21,6 +21,16 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, | |||
21 | if (!bio) | 21 | if (!bio) |
22 | return 0; | 22 | return 0; |
23 | 23 | ||
24 | /* | ||
25 | * This should probably be returning 0, but blk_add_request_payload() | ||
26 | * (Christoph!!!!) | ||
27 | */ | ||
28 | if (bio->bi_rw & REQ_DISCARD) | ||
29 | return 1; | ||
30 | |||
31 | if (bio->bi_rw & REQ_WRITE_SAME) | ||
32 | return 1; | ||
33 | |||
24 | fbio = bio; | 34 | fbio = bio; |
25 | cluster = blk_queue_cluster(q); | 35 | cluster = blk_queue_cluster(q); |
26 | seg_size = 0; | 36 | seg_size = 0; |
@@ -161,30 +171,60 @@ new_segment: | |||
161 | *bvprv = *bvec; | 171 | *bvprv = *bvec; |
162 | } | 172 | } |
163 | 173 | ||
164 | /* | 174 | static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, |
165 | * map a request to scatterlist, return number of sg entries setup. Caller | 175 | struct scatterlist *sglist, |
166 | * must make sure sg can hold rq->nr_phys_segments entries | 176 | struct scatterlist **sg) |
167 | */ | ||
168 | int blk_rq_map_sg(struct request_queue *q, struct request *rq, | ||
169 | struct scatterlist *sglist) | ||
170 | { | 177 | { |
171 | struct bio_vec bvec, bvprv = { NULL }; | 178 | struct bio_vec bvec, bvprv = { NULL }; |
172 | struct req_iterator iter; | 179 | struct bvec_iter iter; |
173 | struct scatterlist *sg; | ||
174 | int nsegs, cluster; | 180 | int nsegs, cluster; |
175 | 181 | ||
176 | nsegs = 0; | 182 | nsegs = 0; |
177 | cluster = blk_queue_cluster(q); | 183 | cluster = blk_queue_cluster(q); |
178 | 184 | ||
179 | /* | 185 | if (bio->bi_rw & REQ_DISCARD) { |
180 | * for each bio in rq | 186 | /* |
181 | */ | 187 | * This is a hack - drivers should be neither modifying the |
182 | sg = NULL; | 188 | * biovec, nor relying on bi_vcnt - but because of |
183 | rq_for_each_segment(bvec, rq, iter) { | 189 | * blk_add_request_payload(), a discard bio may or may not have |
184 | __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, | 190 | * a payload we need to set up here (thank you Christoph) and |
185 | &nsegs, &cluster); | 191 | * bi_vcnt is really the only way of telling if we need to. |
186 | } /* segments in rq */ | 192 | */ |
193 | |||
194 | if (bio->bi_vcnt) | ||
195 | goto single_segment; | ||
196 | |||
197 | return 0; | ||
198 | } | ||
199 | |||
200 | if (bio->bi_rw & REQ_WRITE_SAME) { | ||
201 | single_segment: | ||
202 | *sg = sglist; | ||
203 | bvec = bio_iovec(bio); | ||
204 | sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); | ||
205 | return 1; | ||
206 | } | ||
207 | |||
208 | for_each_bio(bio) | ||
209 | bio_for_each_segment(bvec, bio, iter) | ||
210 | __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, | ||
211 | &nsegs, &cluster); | ||
187 | 212 | ||
213 | return nsegs; | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * map a request to scatterlist, return number of sg entries setup. Caller | ||
218 | * must make sure sg can hold rq->nr_phys_segments entries | ||
219 | */ | ||
220 | int blk_rq_map_sg(struct request_queue *q, struct request *rq, | ||
221 | struct scatterlist *sglist) | ||
222 | { | ||
223 | struct scatterlist *sg = NULL; | ||
224 | int nsegs = 0; | ||
225 | |||
226 | if (rq->bio) | ||
227 | nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); | ||
188 | 228 | ||
189 | if (unlikely(rq->cmd_flags & REQ_COPY_USER) && | 229 | if (unlikely(rq->cmd_flags & REQ_COPY_USER) && |
190 | (blk_rq_bytes(rq) & q->dma_pad_mask)) { | 230 | (blk_rq_bytes(rq) & q->dma_pad_mask)) { |
@@ -230,20 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg); | |||
230 | int blk_bio_map_sg(struct request_queue *q, struct bio *bio, | 270 | int blk_bio_map_sg(struct request_queue *q, struct bio *bio, |
231 | struct scatterlist *sglist) | 271 | struct scatterlist *sglist) |
232 | { | 272 | { |
233 | struct bio_vec bvec, bvprv = { NULL }; | 273 | struct scatterlist *sg = NULL; |
234 | struct scatterlist *sg; | 274 | int nsegs; |
235 | int nsegs, cluster; | 275 | struct bio *next = bio->bi_next; |
236 | struct bvec_iter iter; | 276 | bio->bi_next = NULL; |
237 | |||
238 | nsegs = 0; | ||
239 | cluster = blk_queue_cluster(q); | ||
240 | |||
241 | sg = NULL; | ||
242 | bio_for_each_segment(bvec, bio, iter) { | ||
243 | __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, | ||
244 | &nsegs, &cluster); | ||
245 | } /* segments in bio */ | ||
246 | 277 | ||
278 | nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); | ||
279 | bio->bi_next = next; | ||
247 | if (sg) | 280 | if (sg) |
248 | sg_mark_end(sg); | 281 | sg_mark_end(sg); |
249 | 282 | ||
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 5d70edc9855f..83ae96c51a27 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -184,7 +184,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags) | |||
184 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) | 184 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) |
185 | { | 185 | { |
186 | char *orig_page = page; | 186 | char *orig_page = page; |
187 | int cpu; | 187 | unsigned int cpu; |
188 | 188 | ||
189 | if (!tags) | 189 | if (!tags) |
190 | return 0; | 190 | return 0; |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 57039fcd9c93..1fa9dd153fde 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -226,15 +226,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, | |||
226 | return rq; | 226 | return rq; |
227 | } | 227 | } |
228 | 228 | ||
229 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, | 229 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp) |
230 | gfp_t gfp, bool reserved) | ||
231 | { | 230 | { |
232 | struct request *rq; | 231 | struct request *rq; |
233 | 232 | ||
234 | if (blk_mq_queue_enter(q)) | 233 | if (blk_mq_queue_enter(q)) |
235 | return NULL; | 234 | return NULL; |
236 | 235 | ||
237 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); | 236 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, false); |
238 | if (rq) | 237 | if (rq) |
239 | blk_mq_put_ctx(rq->mq_ctx); | 238 | blk_mq_put_ctx(rq->mq_ctx); |
240 | return rq; | 239 | return rq; |
@@ -258,7 +257,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request); | |||
258 | /* | 257 | /* |
259 | * Re-init and set pdu, if we have it | 258 | * Re-init and set pdu, if we have it |
260 | */ | 259 | */ |
261 | static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) | 260 | void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) |
262 | { | 261 | { |
263 | blk_rq_init(hctx->queue, rq); | 262 | blk_rq_init(hctx->queue, rq); |
264 | 263 | ||
@@ -305,7 +304,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) | |||
305 | bio_endio(bio, error); | 304 | bio_endio(bio, error); |
306 | } | 305 | } |
307 | 306 | ||
308 | void blk_mq_complete_request(struct request *rq, int error) | 307 | void blk_mq_end_io(struct request *rq, int error) |
309 | { | 308 | { |
310 | struct bio *bio = rq->bio; | 309 | struct bio *bio = rq->bio; |
311 | unsigned int bytes = 0; | 310 | unsigned int bytes = 0; |
@@ -330,48 +329,55 @@ void blk_mq_complete_request(struct request *rq, int error) | |||
330 | else | 329 | else |
331 | blk_mq_free_request(rq); | 330 | blk_mq_free_request(rq); |
332 | } | 331 | } |
332 | EXPORT_SYMBOL(blk_mq_end_io); | ||
333 | 333 | ||
334 | void __blk_mq_end_io(struct request *rq, int error) | 334 | static void __blk_mq_complete_request_remote(void *data) |
335 | { | ||
336 | if (!blk_mark_rq_complete(rq)) | ||
337 | blk_mq_complete_request(rq, error); | ||
338 | } | ||
339 | |||
340 | static void blk_mq_end_io_remote(void *data) | ||
341 | { | 335 | { |
342 | struct request *rq = data; | 336 | struct request *rq = data; |
343 | 337 | ||
344 | __blk_mq_end_io(rq, rq->errors); | 338 | rq->q->softirq_done_fn(rq); |
345 | } | 339 | } |
346 | 340 | ||
347 | /* | 341 | void __blk_mq_complete_request(struct request *rq) |
348 | * End IO on this request on a multiqueue enabled driver. We'll either do | ||
349 | * it directly inline, or punt to a local IPI handler on the matching | ||
350 | * remote CPU. | ||
351 | */ | ||
352 | void blk_mq_end_io(struct request *rq, int error) | ||
353 | { | 342 | { |
354 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 343 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
355 | int cpu; | 344 | int cpu; |
356 | 345 | ||
357 | if (!ctx->ipi_redirect) | 346 | if (!ctx->ipi_redirect) { |
358 | return __blk_mq_end_io(rq, error); | 347 | rq->q->softirq_done_fn(rq); |
348 | return; | ||
349 | } | ||
359 | 350 | ||
360 | cpu = get_cpu(); | 351 | cpu = get_cpu(); |
361 | if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { | 352 | if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { |
362 | rq->errors = error; | 353 | rq->csd.func = __blk_mq_complete_request_remote; |
363 | rq->csd.func = blk_mq_end_io_remote; | ||
364 | rq->csd.info = rq; | 354 | rq->csd.info = rq; |
365 | rq->csd.flags = 0; | 355 | rq->csd.flags = 0; |
366 | __smp_call_function_single(ctx->cpu, &rq->csd, 0); | 356 | __smp_call_function_single(ctx->cpu, &rq->csd, 0); |
367 | } else { | 357 | } else { |
368 | __blk_mq_end_io(rq, error); | 358 | rq->q->softirq_done_fn(rq); |
369 | } | 359 | } |
370 | put_cpu(); | 360 | put_cpu(); |
371 | } | 361 | } |
372 | EXPORT_SYMBOL(blk_mq_end_io); | ||
373 | 362 | ||
374 | static void blk_mq_start_request(struct request *rq) | 363 | /** |
364 | * blk_mq_complete_request - end I/O on a request | ||
365 | * @rq: the request being processed | ||
366 | * | ||
367 | * Description: | ||
368 | * Ends all I/O on a request. It does not handle partial completions. | ||
369 | * The actual completion happens out-of-order, through a IPI handler. | ||
370 | **/ | ||
371 | void blk_mq_complete_request(struct request *rq) | ||
372 | { | ||
373 | if (unlikely(blk_should_fake_timeout(rq->q))) | ||
374 | return; | ||
375 | if (!blk_mark_rq_complete(rq)) | ||
376 | __blk_mq_complete_request(rq); | ||
377 | } | ||
378 | EXPORT_SYMBOL(blk_mq_complete_request); | ||
379 | |||
380 | static void blk_mq_start_request(struct request *rq, bool last) | ||
375 | { | 381 | { |
376 | struct request_queue *q = rq->q; | 382 | struct request_queue *q = rq->q; |
377 | 383 | ||
@@ -384,6 +390,25 @@ static void blk_mq_start_request(struct request *rq) | |||
384 | */ | 390 | */ |
385 | rq->deadline = jiffies + q->rq_timeout; | 391 | rq->deadline = jiffies + q->rq_timeout; |
386 | set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | 392 | set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); |
393 | |||
394 | if (q->dma_drain_size && blk_rq_bytes(rq)) { | ||
395 | /* | ||
396 | * Make sure space for the drain appears. We know we can do | ||
397 | * this because max_hw_segments has been adjusted to be one | ||
398 | * fewer than the device can handle. | ||
399 | */ | ||
400 | rq->nr_phys_segments++; | ||
401 | } | ||
402 | |||
403 | /* | ||
404 | * Flag the last request in the series so that drivers know when IO | ||
405 | * should be kicked off, if they don't do it on a per-request basis. | ||
406 | * | ||
407 | * Note: the flag isn't the only condition drivers should do kick off. | ||
408 | * If drive is busy, the last request might not have the bit set. | ||
409 | */ | ||
410 | if (last) | ||
411 | rq->cmd_flags |= REQ_END; | ||
387 | } | 412 | } |
388 | 413 | ||
389 | static void blk_mq_requeue_request(struct request *rq) | 414 | static void blk_mq_requeue_request(struct request *rq) |
@@ -392,6 +417,11 @@ static void blk_mq_requeue_request(struct request *rq) | |||
392 | 417 | ||
393 | trace_block_rq_requeue(q, rq); | 418 | trace_block_rq_requeue(q, rq); |
394 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | 419 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); |
420 | |||
421 | rq->cmd_flags &= ~REQ_END; | ||
422 | |||
423 | if (q->dma_drain_size && blk_rq_bytes(rq)) | ||
424 | rq->nr_phys_segments--; | ||
395 | } | 425 | } |
396 | 426 | ||
397 | struct blk_mq_timeout_data { | 427 | struct blk_mq_timeout_data { |
@@ -559,19 +589,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
559 | 589 | ||
560 | rq = list_first_entry(&rq_list, struct request, queuelist); | 590 | rq = list_first_entry(&rq_list, struct request, queuelist); |
561 | list_del_init(&rq->queuelist); | 591 | list_del_init(&rq->queuelist); |
562 | blk_mq_start_request(rq); | ||
563 | 592 | ||
564 | /* | 593 | blk_mq_start_request(rq, list_empty(&rq_list)); |
565 | * Last request in the series. Flag it as such, this | ||
566 | * enables drivers to know when IO should be kicked off, | ||
567 | * if they don't do it on a per-request basis. | ||
568 | * | ||
569 | * Note: the flag isn't the only condition drivers | ||
570 | * should do kick off. If drive is busy, the last | ||
571 | * request might not have the bit set. | ||
572 | */ | ||
573 | if (list_empty(&rq_list)) | ||
574 | rq->cmd_flags |= REQ_END; | ||
575 | 594 | ||
576 | ret = q->mq_ops->queue_rq(hctx, rq); | 595 | ret = q->mq_ops->queue_rq(hctx, rq); |
577 | switch (ret) { | 596 | switch (ret) { |
@@ -589,8 +608,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
589 | break; | 608 | break; |
590 | default: | 609 | default: |
591 | pr_err("blk-mq: bad return on queue: %d\n", ret); | 610 | pr_err("blk-mq: bad return on queue: %d\n", ret); |
592 | rq->errors = -EIO; | ||
593 | case BLK_MQ_RQ_QUEUE_ERROR: | 611 | case BLK_MQ_RQ_QUEUE_ERROR: |
612 | rq->errors = -EIO; | ||
594 | blk_mq_end_io(rq, rq->errors); | 613 | blk_mq_end_io(rq, rq->errors); |
595 | break; | 614 | break; |
596 | } | 615 | } |
@@ -693,13 +712,16 @@ static void blk_mq_work_fn(struct work_struct *work) | |||
693 | } | 712 | } |
694 | 713 | ||
695 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | 714 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, |
696 | struct request *rq) | 715 | struct request *rq, bool at_head) |
697 | { | 716 | { |
698 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 717 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
699 | 718 | ||
700 | trace_block_rq_insert(hctx->queue, rq); | 719 | trace_block_rq_insert(hctx->queue, rq); |
701 | 720 | ||
702 | list_add_tail(&rq->queuelist, &ctx->rq_list); | 721 | if (at_head) |
722 | list_add(&rq->queuelist, &ctx->rq_list); | ||
723 | else | ||
724 | list_add_tail(&rq->queuelist, &ctx->rq_list); | ||
703 | blk_mq_hctx_mark_pending(hctx, ctx); | 725 | blk_mq_hctx_mark_pending(hctx, ctx); |
704 | 726 | ||
705 | /* | 727 | /* |
@@ -709,7 +731,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | |||
709 | } | 731 | } |
710 | 732 | ||
711 | void blk_mq_insert_request(struct request_queue *q, struct request *rq, | 733 | void blk_mq_insert_request(struct request_queue *q, struct request *rq, |
712 | bool run_queue) | 734 | bool at_head, bool run_queue) |
713 | { | 735 | { |
714 | struct blk_mq_hw_ctx *hctx; | 736 | struct blk_mq_hw_ctx *hctx; |
715 | struct blk_mq_ctx *ctx, *current_ctx; | 737 | struct blk_mq_ctx *ctx, *current_ctx; |
@@ -728,7 +750,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq, | |||
728 | rq->mq_ctx = ctx; | 750 | rq->mq_ctx = ctx; |
729 | } | 751 | } |
730 | spin_lock(&ctx->lock); | 752 | spin_lock(&ctx->lock); |
731 | __blk_mq_insert_request(hctx, rq); | 753 | __blk_mq_insert_request(hctx, rq, at_head); |
732 | spin_unlock(&ctx->lock); | 754 | spin_unlock(&ctx->lock); |
733 | 755 | ||
734 | blk_mq_put_ctx(current_ctx); | 756 | blk_mq_put_ctx(current_ctx); |
@@ -760,7 +782,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async) | |||
760 | 782 | ||
761 | /* ctx->cpu might be offline */ | 783 | /* ctx->cpu might be offline */ |
762 | spin_lock(&ctx->lock); | 784 | spin_lock(&ctx->lock); |
763 | __blk_mq_insert_request(hctx, rq); | 785 | __blk_mq_insert_request(hctx, rq, false); |
764 | spin_unlock(&ctx->lock); | 786 | spin_unlock(&ctx->lock); |
765 | 787 | ||
766 | blk_mq_put_ctx(current_ctx); | 788 | blk_mq_put_ctx(current_ctx); |
@@ -798,7 +820,7 @@ static void blk_mq_insert_requests(struct request_queue *q, | |||
798 | rq = list_first_entry(list, struct request, queuelist); | 820 | rq = list_first_entry(list, struct request, queuelist); |
799 | list_del_init(&rq->queuelist); | 821 | list_del_init(&rq->queuelist); |
800 | rq->mq_ctx = ctx; | 822 | rq->mq_ctx = ctx; |
801 | __blk_mq_insert_request(hctx, rq); | 823 | __blk_mq_insert_request(hctx, rq, false); |
802 | } | 824 | } |
803 | spin_unlock(&ctx->lock); | 825 | spin_unlock(&ctx->lock); |
804 | 826 | ||
@@ -888,6 +910,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
888 | 910 | ||
889 | blk_queue_bounce(q, &bio); | 911 | blk_queue_bounce(q, &bio); |
890 | 912 | ||
913 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { | ||
914 | bio_endio(bio, -EIO); | ||
915 | return; | ||
916 | } | ||
917 | |||
891 | if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) | 918 | if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) |
892 | return; | 919 | return; |
893 | 920 | ||
@@ -950,7 +977,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
950 | __blk_mq_free_request(hctx, ctx, rq); | 977 | __blk_mq_free_request(hctx, ctx, rq); |
951 | else { | 978 | else { |
952 | blk_mq_bio_to_request(rq, bio); | 979 | blk_mq_bio_to_request(rq, bio); |
953 | __blk_mq_insert_request(hctx, rq); | 980 | __blk_mq_insert_request(hctx, rq, false); |
954 | } | 981 | } |
955 | 982 | ||
956 | spin_unlock(&ctx->lock); | 983 | spin_unlock(&ctx->lock); |
@@ -1309,15 +1336,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | |||
1309 | reg->queue_depth = BLK_MQ_MAX_DEPTH; | 1336 | reg->queue_depth = BLK_MQ_MAX_DEPTH; |
1310 | } | 1337 | } |
1311 | 1338 | ||
1312 | /* | ||
1313 | * Set aside a tag for flush requests. It will only be used while | ||
1314 | * another flush request is in progress but outside the driver. | ||
1315 | * | ||
1316 | * TODO: only allocate if flushes are supported | ||
1317 | */ | ||
1318 | reg->queue_depth++; | ||
1319 | reg->reserved_tags++; | ||
1320 | |||
1321 | if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) | 1339 | if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) |
1322 | return ERR_PTR(-EINVAL); | 1340 | return ERR_PTR(-EINVAL); |
1323 | 1341 | ||
@@ -1360,17 +1378,27 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | |||
1360 | q->mq_ops = reg->ops; | 1378 | q->mq_ops = reg->ops; |
1361 | q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; | 1379 | q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; |
1362 | 1380 | ||
1381 | q->sg_reserved_size = INT_MAX; | ||
1382 | |||
1363 | blk_queue_make_request(q, blk_mq_make_request); | 1383 | blk_queue_make_request(q, blk_mq_make_request); |
1364 | blk_queue_rq_timed_out(q, reg->ops->timeout); | 1384 | blk_queue_rq_timed_out(q, reg->ops->timeout); |
1365 | if (reg->timeout) | 1385 | if (reg->timeout) |
1366 | blk_queue_rq_timeout(q, reg->timeout); | 1386 | blk_queue_rq_timeout(q, reg->timeout); |
1367 | 1387 | ||
1388 | if (reg->ops->complete) | ||
1389 | blk_queue_softirq_done(q, reg->ops->complete); | ||
1390 | |||
1368 | blk_mq_init_flush(q); | 1391 | blk_mq_init_flush(q); |
1369 | blk_mq_init_cpu_queues(q, reg->nr_hw_queues); | 1392 | blk_mq_init_cpu_queues(q, reg->nr_hw_queues); |
1370 | 1393 | ||
1371 | if (blk_mq_init_hw_queues(q, reg, driver_data)) | 1394 | q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size, |
1395 | cache_line_size()), GFP_KERNEL); | ||
1396 | if (!q->flush_rq) | ||
1372 | goto err_hw; | 1397 | goto err_hw; |
1373 | 1398 | ||
1399 | if (blk_mq_init_hw_queues(q, reg, driver_data)) | ||
1400 | goto err_flush_rq; | ||
1401 | |||
1374 | blk_mq_map_swqueue(q); | 1402 | blk_mq_map_swqueue(q); |
1375 | 1403 | ||
1376 | mutex_lock(&all_q_mutex); | 1404 | mutex_lock(&all_q_mutex); |
@@ -1378,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | |||
1378 | mutex_unlock(&all_q_mutex); | 1406 | mutex_unlock(&all_q_mutex); |
1379 | 1407 | ||
1380 | return q; | 1408 | return q; |
1409 | |||
1410 | err_flush_rq: | ||
1411 | kfree(q->flush_rq); | ||
1381 | err_hw: | 1412 | err_hw: |
1382 | kfree(q->mq_map); | 1413 | kfree(q->mq_map); |
1383 | err_map: | 1414 | err_map: |
diff --git a/block/blk-mq.h b/block/blk-mq.h index 5c3917984b00..ed0035cd458e 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -22,13 +22,13 @@ struct blk_mq_ctx { | |||
22 | struct kobject kobj; | 22 | struct kobject kobj; |
23 | }; | 23 | }; |
24 | 24 | ||
25 | void __blk_mq_end_io(struct request *rq, int error); | 25 | void __blk_mq_complete_request(struct request *rq); |
26 | void blk_mq_complete_request(struct request *rq, int error); | ||
27 | void blk_mq_run_request(struct request *rq, bool run_queue, bool async); | 26 | void blk_mq_run_request(struct request *rq, bool run_queue, bool async); |
28 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); | 27 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
29 | void blk_mq_init_flush(struct request_queue *q); | 28 | void blk_mq_init_flush(struct request_queue *q); |
30 | void blk_mq_drain_queue(struct request_queue *q); | 29 | void blk_mq_drain_queue(struct request_queue *q); |
31 | void blk_mq_free_queue(struct request_queue *q); | 30 | void blk_mq_free_queue(struct request_queue *q); |
31 | void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq); | ||
32 | 32 | ||
33 | /* | 33 | /* |
34 | * CPU hotplug helpers | 34 | * CPU hotplug helpers |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8095c4a21fc0..7500f876dae4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -549,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj) | |||
549 | if (q->mq_ops) | 549 | if (q->mq_ops) |
550 | blk_mq_free_queue(q); | 550 | blk_mq_free_queue(q); |
551 | 551 | ||
552 | kfree(q->flush_rq); | ||
553 | |||
552 | blk_trace_shutdown(q); | 554 | blk_trace_shutdown(q); |
553 | 555 | ||
554 | bdi_destroy(&q->backing_dev_info); | 556 | bdi_destroy(&q->backing_dev_info); |
diff --git a/block/blk-timeout.c b/block/blk-timeout.c index bba81c9348e1..d96f7061c6fd 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c | |||
@@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req) | |||
91 | case BLK_EH_HANDLED: | 91 | case BLK_EH_HANDLED: |
92 | /* Can we use req->errors here? */ | 92 | /* Can we use req->errors here? */ |
93 | if (q->mq_ops) | 93 | if (q->mq_ops) |
94 | blk_mq_complete_request(req, req->errors); | 94 | __blk_mq_complete_request(req); |
95 | else | 95 | else |
96 | __blk_complete_request(req); | 96 | __blk_complete_request(req); |
97 | break; | 97 | break; |
diff --git a/block/blk.h b/block/blk.h index c90e1d8f7a2b..d23b415b8a28 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -113,7 +113,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) | |||
113 | q->flush_queue_delayed = 1; | 113 | q->flush_queue_delayed = 1; |
114 | return NULL; | 114 | return NULL; |
115 | } | 115 | } |
116 | if (unlikely(blk_queue_dying(q)) || | 116 | if (unlikely(blk_queue_bypass(q)) || |
117 | !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) | 117 | !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) |
118 | return NULL; | 118 | return NULL; |
119 | } | 119 | } |