diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-02-14 13:45:18 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-02-14 13:45:18 -0500 |
| commit | 5e57dc81106b942786f5db8e7ab8788bb9319933 (patch) | |
| tree | 4533e01e745bba3614c77200b3fd96dd7af7e04e | |
| parent | 0d25e3691186f5ae6feb0229717a60a5169dc5b2 (diff) | |
| parent | c8123f8c9cb517403b51aa41c3c46ff5e10b2c17 (diff) | |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block IO fixes from Jens Axboe:
"Second round of updates and fixes for 3.14-rc2. Most of this stuff
has been queued up for a while. The notable exception is the blk-mq
changes, which are naturally a bit more in flux still.
The pull request contains:
- Two bug fixes for the new immutable vecs, causing crashes with raid
or swap. From Kent.
- Various blk-mq tweaks and fixes from Christoph. A fix for
integrity bio's from Nic.
- A few bcache fixes from Kent and Darrick Wong.
- xen-blk{front,back} fixes from David Vrabel, Matt Rushton, Nicolas
Swenson, and Roger Pau Monne.
- Fix for a vec miscount with integrity vectors from Martin.
- Minor annotations or fixes from Masanari Iida and Rashika Kheria.
- Tweak to null_blk to do more normal FIFO processing of requests
from Shlomo Pongratz.
- Elevator switching bypass fix from Tejun.
- Softlockup in blkdev_issue_discard() fix when !CONFIG_PREEMPT from
me"
* 'for-linus' of git://git.kernel.dk/linux-block: (31 commits)
block: add cond_resched() to potentially long running ioctl discard loop
xen-blkback: init persistent_purge_work work_struct
blk-mq: pair blk_mq_start_request / blk_mq_requeue_request
blk-mq: dont assume rq->errors is set when returning an error from ->queue_rq
block: Fix cloning of discard/write same bios
block: Fix type mismatch in ssize_t_blk_mq_tag_sysfs_show
blk-mq: rework flush sequencing logic
null_blk: use blk_complete_request and blk_mq_complete_request
virtio_blk: use blk_mq_complete_request
blk-mq: rework I/O completions
fs: Add prototype declaration to appropriate header file include/linux/bio.h
fs: Mark function as static in fs/bio-integrity.c
block/null_blk: Fix completion processing from LIFO to FIFO
block: Explicitly handle discard/write same segments
block: Fix nr_vecs for inline integrity vectors
blk-mq: Add bio_integrity setup to blk_mq_make_request
blk-mq: initialize sg_reserved_size
blk-mq: handle dma_drain_size
blk-mq: divert __blk_put_request for MQ ops
blk-mq: support at_head inserations for blk_execute_rq
...
| -rw-r--r-- | block/blk-core.c | 20 | ||||
| -rw-r--r-- | block/blk-exec.c | 2 | ||||
| -rw-r--r-- | block/blk-flush.c | 101 | ||||
| -rw-r--r-- | block/blk-lib.c | 8 | ||||
| -rw-r--r-- | block/blk-merge.c | 91 | ||||
| -rw-r--r-- | block/blk-mq-tag.c | 2 | ||||
| -rw-r--r-- | block/blk-mq.c | 143 | ||||
| -rw-r--r-- | block/blk-mq.h | 4 | ||||
| -rw-r--r-- | block/blk-sysfs.c | 2 | ||||
| -rw-r--r-- | block/blk-timeout.c | 2 | ||||
| -rw-r--r-- | block/blk.h | 2 | ||||
| -rw-r--r-- | drivers/block/null_blk.c | 97 | ||||
| -rw-r--r-- | drivers/block/virtio_blk.c | 7 | ||||
| -rw-r--r-- | drivers/block/xen-blkback/blkback.c | 66 | ||||
| -rw-r--r-- | drivers/block/xen-blkback/common.h | 5 | ||||
| -rw-r--r-- | drivers/block/xen-blkback/xenbus.c | 14 | ||||
| -rw-r--r-- | drivers/block/xen-blkfront.c | 11 | ||||
| -rw-r--r-- | drivers/md/bcache/bcache.h | 4 | ||||
| -rw-r--r-- | drivers/md/bcache/bset.c | 7 | ||||
| -rw-r--r-- | drivers/md/bcache/btree.c | 4 | ||||
| -rw-r--r-- | drivers/md/bcache/request.c | 6 | ||||
| -rw-r--r-- | drivers/md/bcache/sysfs.c | 2 | ||||
| -rw-r--r-- | fs/bio-integrity.c | 13 | ||||
| -rw-r--r-- | fs/bio.c | 15 | ||||
| -rw-r--r-- | include/linux/bio.h | 12 | ||||
| -rw-r--r-- | include/linux/blk-mq.h | 9 | ||||
| -rw-r--r-- | include/linux/blkdev.h | 11 | ||||
| -rw-r--r-- | include/xen/interface/io/blkif.h | 34 | ||||
| -rw-r--r-- | lib/percpu_ida.c | 7 |
29 files changed, 397 insertions, 304 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index c00e0bdeab4a..853f92749202 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) | |||
| 693 | if (!uninit_q) | 693 | if (!uninit_q) |
| 694 | return NULL; | 694 | return NULL; |
| 695 | 695 | ||
| 696 | uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); | ||
| 697 | if (!uninit_q->flush_rq) | ||
| 698 | goto out_cleanup_queue; | ||
| 699 | |||
| 696 | q = blk_init_allocated_queue(uninit_q, rfn, lock); | 700 | q = blk_init_allocated_queue(uninit_q, rfn, lock); |
| 697 | if (!q) | 701 | if (!q) |
| 698 | blk_cleanup_queue(uninit_q); | 702 | goto out_free_flush_rq; |
| 699 | |||
| 700 | return q; | 703 | return q; |
| 704 | |||
| 705 | out_free_flush_rq: | ||
| 706 | kfree(uninit_q->flush_rq); | ||
| 707 | out_cleanup_queue: | ||
| 708 | blk_cleanup_queue(uninit_q); | ||
| 709 | return NULL; | ||
| 701 | } | 710 | } |
| 702 | EXPORT_SYMBOL(blk_init_queue_node); | 711 | EXPORT_SYMBOL(blk_init_queue_node); |
| 703 | 712 | ||
| @@ -1127,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw, | |||
| 1127 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) | 1136 | struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) |
| 1128 | { | 1137 | { |
| 1129 | if (q->mq_ops) | 1138 | if (q->mq_ops) |
| 1130 | return blk_mq_alloc_request(q, rw, gfp_mask, false); | 1139 | return blk_mq_alloc_request(q, rw, gfp_mask); |
| 1131 | else | 1140 | else |
| 1132 | return blk_old_get_request(q, rw, gfp_mask); | 1141 | return blk_old_get_request(q, rw, gfp_mask); |
| 1133 | } | 1142 | } |
| @@ -1278,6 +1287,11 @@ void __blk_put_request(struct request_queue *q, struct request *req) | |||
| 1278 | if (unlikely(!q)) | 1287 | if (unlikely(!q)) |
| 1279 | return; | 1288 | return; |
| 1280 | 1289 | ||
| 1290 | if (q->mq_ops) { | ||
| 1291 | blk_mq_free_request(req); | ||
| 1292 | return; | ||
| 1293 | } | ||
| 1294 | |||
| 1281 | blk_pm_put_request(req); | 1295 | blk_pm_put_request(req); |
| 1282 | 1296 | ||
| 1283 | elv_completed_request(q, req); | 1297 | elv_completed_request(q, req); |
diff --git a/block/blk-exec.c b/block/blk-exec.c index bbfc072a79c2..c68613bb4c79 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
| @@ -65,7 +65,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
| 65 | * be resued after dying flag is set | 65 | * be resued after dying flag is set |
| 66 | */ | 66 | */ |
| 67 | if (q->mq_ops) { | 67 | if (q->mq_ops) { |
| 68 | blk_mq_insert_request(q, rq, true); | 68 | blk_mq_insert_request(q, rq, at_head, true); |
| 69 | return; | 69 | return; |
| 70 | } | 70 | } |
| 71 | 71 | ||
diff --git a/block/blk-flush.c b/block/blk-flush.c index 9288aaf35c21..66e2b697f5db 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
| @@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq) | |||
| 130 | blk_clear_rq_complete(rq); | 130 | blk_clear_rq_complete(rq); |
| 131 | } | 131 | } |
| 132 | 132 | ||
| 133 | static void mq_flush_data_run(struct work_struct *work) | 133 | static void mq_flush_run(struct work_struct *work) |
| 134 | { | 134 | { |
| 135 | struct request *rq; | 135 | struct request *rq; |
| 136 | 136 | ||
| 137 | rq = container_of(work, struct request, mq_flush_data); | 137 | rq = container_of(work, struct request, mq_flush_work); |
| 138 | 138 | ||
| 139 | memset(&rq->csd, 0, sizeof(rq->csd)); | 139 | memset(&rq->csd, 0, sizeof(rq->csd)); |
| 140 | blk_mq_run_request(rq, true, false); | 140 | blk_mq_run_request(rq, true, false); |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | static void blk_mq_flush_data_insert(struct request *rq) | 143 | static bool blk_flush_queue_rq(struct request *rq) |
| 144 | { | 144 | { |
| 145 | INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); | 145 | if (rq->q->mq_ops) { |
| 146 | kblockd_schedule_work(rq->q, &rq->mq_flush_data); | 146 | INIT_WORK(&rq->mq_flush_work, mq_flush_run); |
| 147 | kblockd_schedule_work(rq->q, &rq->mq_flush_work); | ||
| 148 | return false; | ||
| 149 | } else { | ||
| 150 | list_add_tail(&rq->queuelist, &rq->q->queue_head); | ||
| 151 | return true; | ||
| 152 | } | ||
| 147 | } | 153 | } |
| 148 | 154 | ||
| 149 | /** | 155 | /** |
| @@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, | |||
| 187 | 193 | ||
| 188 | case REQ_FSEQ_DATA: | 194 | case REQ_FSEQ_DATA: |
| 189 | list_move_tail(&rq->flush.list, &q->flush_data_in_flight); | 195 | list_move_tail(&rq->flush.list, &q->flush_data_in_flight); |
| 190 | if (q->mq_ops) | 196 | queued = blk_flush_queue_rq(rq); |
| 191 | blk_mq_flush_data_insert(rq); | ||
| 192 | else { | ||
| 193 | list_add(&rq->queuelist, &q->queue_head); | ||
| 194 | queued = true; | ||
| 195 | } | ||
| 196 | break; | 197 | break; |
| 197 | 198 | ||
| 198 | case REQ_FSEQ_DONE: | 199 | case REQ_FSEQ_DONE: |
| @@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, | |||
| 216 | } | 217 | } |
| 217 | 218 | ||
| 218 | kicked = blk_kick_flush(q); | 219 | kicked = blk_kick_flush(q); |
| 219 | /* blk_mq_run_flush will run queue */ | ||
| 220 | if (q->mq_ops) | ||
| 221 | return queued; | ||
| 222 | return kicked | queued; | 220 | return kicked | queued; |
| 223 | } | 221 | } |
| 224 | 222 | ||
| @@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error) | |||
| 230 | struct request *rq, *n; | 228 | struct request *rq, *n; |
| 231 | unsigned long flags = 0; | 229 | unsigned long flags = 0; |
| 232 | 230 | ||
| 233 | if (q->mq_ops) { | 231 | if (q->mq_ops) |
| 234 | blk_mq_free_request(flush_rq); | ||
| 235 | spin_lock_irqsave(&q->mq_flush_lock, flags); | 232 | spin_lock_irqsave(&q->mq_flush_lock, flags); |
| 236 | } | 233 | |
| 237 | running = &q->flush_queue[q->flush_running_idx]; | 234 | running = &q->flush_queue[q->flush_running_idx]; |
| 238 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); | 235 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); |
| 239 | 236 | ||
| @@ -263,49 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error) | |||
| 263 | * kblockd. | 260 | * kblockd. |
| 264 | */ | 261 | */ |
| 265 | if (queued || q->flush_queue_delayed) { | 262 | if (queued || q->flush_queue_delayed) { |
| 266 | if (!q->mq_ops) | 263 | WARN_ON(q->mq_ops); |
| 267 | blk_run_queue_async(q); | 264 | blk_run_queue_async(q); |
| 268 | else | ||
| 269 | /* | ||
| 270 | * This can be optimized to only run queues with requests | ||
| 271 | * queued if necessary. | ||
| 272 | */ | ||
| 273 | blk_mq_run_queues(q, true); | ||
| 274 | } | 265 | } |
| 275 | q->flush_queue_delayed = 0; | 266 | q->flush_queue_delayed = 0; |
| 276 | if (q->mq_ops) | 267 | if (q->mq_ops) |
| 277 | spin_unlock_irqrestore(&q->mq_flush_lock, flags); | 268 | spin_unlock_irqrestore(&q->mq_flush_lock, flags); |
| 278 | } | 269 | } |
| 279 | 270 | ||
| 280 | static void mq_flush_work(struct work_struct *work) | ||
| 281 | { | ||
| 282 | struct request_queue *q; | ||
| 283 | struct request *rq; | ||
| 284 | |||
| 285 | q = container_of(work, struct request_queue, mq_flush_work); | ||
| 286 | |||
| 287 | /* We don't need set REQ_FLUSH_SEQ, it's for consistency */ | ||
| 288 | rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ, | ||
| 289 | __GFP_WAIT|GFP_ATOMIC, true); | ||
| 290 | rq->cmd_type = REQ_TYPE_FS; | ||
| 291 | rq->end_io = flush_end_io; | ||
| 292 | |||
| 293 | blk_mq_run_request(rq, true, false); | ||
| 294 | } | ||
| 295 | |||
| 296 | /* | ||
| 297 | * We can't directly use q->flush_rq, because it doesn't have tag and is not in | ||
| 298 | * hctx->rqs[]. so we must allocate a new request, since we can't sleep here, | ||
| 299 | * so offload the work to workqueue. | ||
| 300 | * | ||
| 301 | * Note: we assume a flush request finished in any hardware queue will flush | ||
| 302 | * the whole disk cache. | ||
| 303 | */ | ||
| 304 | static void mq_run_flush(struct request_queue *q) | ||
| 305 | { | ||
| 306 | kblockd_schedule_work(q, &q->mq_flush_work); | ||
| 307 | } | ||
| 308 | |||
| 309 | /** | 271 | /** |
| 310 | * blk_kick_flush - consider issuing flush request | 272 | * blk_kick_flush - consider issuing flush request |
| 311 | * @q: request_queue being kicked | 273 | * @q: request_queue being kicked |
| @@ -340,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q) | |||
| 340 | * different from running_idx, which means flush is in flight. | 302 | * different from running_idx, which means flush is in flight. |
| 341 | */ | 303 | */ |
| 342 | q->flush_pending_idx ^= 1; | 304 | q->flush_pending_idx ^= 1; |
| 305 | |||
| 343 | if (q->mq_ops) { | 306 | if (q->mq_ops) { |
| 344 | mq_run_flush(q); | 307 | struct blk_mq_ctx *ctx = first_rq->mq_ctx; |
| 345 | return true; | 308 | struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu); |
| 309 | |||
| 310 | blk_mq_rq_init(hctx, q->flush_rq); | ||
| 311 | q->flush_rq->mq_ctx = ctx; | ||
| 312 | |||
| 313 | /* | ||
| 314 | * Reuse the tag value from the fist waiting request, | ||
| 315 | * with blk-mq the tag is generated during request | ||
| 316 | * allocation and drivers can rely on it being inside | ||
| 317 | * the range they asked for. | ||
| 318 | */ | ||
| 319 | q->flush_rq->tag = first_rq->tag; | ||
| 320 | } else { | ||
| 321 | blk_rq_init(q, q->flush_rq); | ||
| 346 | } | 322 | } |
| 347 | 323 | ||
| 348 | blk_rq_init(q, &q->flush_rq); | 324 | q->flush_rq->cmd_type = REQ_TYPE_FS; |
| 349 | q->flush_rq.cmd_type = REQ_TYPE_FS; | 325 | q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; |
| 350 | q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; | 326 | q->flush_rq->rq_disk = first_rq->rq_disk; |
| 351 | q->flush_rq.rq_disk = first_rq->rq_disk; | 327 | q->flush_rq->end_io = flush_end_io; |
| 352 | q->flush_rq.end_io = flush_end_io; | ||
| 353 | 328 | ||
| 354 | list_add_tail(&q->flush_rq.queuelist, &q->queue_head); | 329 | return blk_flush_queue_rq(q->flush_rq); |
| 355 | return true; | ||
| 356 | } | 330 | } |
| 357 | 331 | ||
| 358 | static void flush_data_end_io(struct request *rq, int error) | 332 | static void flush_data_end_io(struct request *rq, int error) |
| @@ -558,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush); | |||
| 558 | void blk_mq_init_flush(struct request_queue *q) | 532 | void blk_mq_init_flush(struct request_queue *q) |
| 559 | { | 533 | { |
| 560 | spin_lock_init(&q->mq_flush_lock); | 534 | spin_lock_init(&q->mq_flush_lock); |
| 561 | INIT_WORK(&q->mq_flush_work, mq_flush_work); | ||
| 562 | } | 535 | } |
diff --git a/block/blk-lib.c b/block/blk-lib.c index 2da76c999ef3..97a733cf3d5f 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
| @@ -119,6 +119,14 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
| 119 | 119 | ||
| 120 | atomic_inc(&bb.done); | 120 | atomic_inc(&bb.done); |
| 121 | submit_bio(type, bio); | 121 | submit_bio(type, bio); |
| 122 | |||
| 123 | /* | ||
| 124 | * We can loop for a long time in here, if someone does | ||
| 125 | * full device discards (like mkfs). Be nice and allow | ||
| 126 | * us to schedule out to avoid softlocking if preempt | ||
| 127 | * is disabled. | ||
| 128 | */ | ||
| 129 | cond_resched(); | ||
| 122 | } | 130 | } |
| 123 | blk_finish_plug(&plug); | 131 | blk_finish_plug(&plug); |
| 124 | 132 | ||
diff --git a/block/blk-merge.c b/block/blk-merge.c index 8f8adaa95466..6c583f9c5b65 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
| @@ -21,6 +21,16 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, | |||
| 21 | if (!bio) | 21 | if (!bio) |
| 22 | return 0; | 22 | return 0; |
| 23 | 23 | ||
| 24 | /* | ||
| 25 | * This should probably be returning 0, but blk_add_request_payload() | ||
| 26 | * (Christoph!!!!) | ||
| 27 | */ | ||
| 28 | if (bio->bi_rw & REQ_DISCARD) | ||
| 29 | return 1; | ||
| 30 | |||
| 31 | if (bio->bi_rw & REQ_WRITE_SAME) | ||
| 32 | return 1; | ||
| 33 | |||
| 24 | fbio = bio; | 34 | fbio = bio; |
| 25 | cluster = blk_queue_cluster(q); | 35 | cluster = blk_queue_cluster(q); |
| 26 | seg_size = 0; | 36 | seg_size = 0; |
| @@ -161,30 +171,60 @@ new_segment: | |||
| 161 | *bvprv = *bvec; | 171 | *bvprv = *bvec; |
| 162 | } | 172 | } |
| 163 | 173 | ||
| 164 | /* | 174 | static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, |
| 165 | * map a request to scatterlist, return number of sg entries setup. Caller | 175 | struct scatterlist *sglist, |
| 166 | * must make sure sg can hold rq->nr_phys_segments entries | 176 | struct scatterlist **sg) |
| 167 | */ | ||
| 168 | int blk_rq_map_sg(struct request_queue *q, struct request *rq, | ||
| 169 | struct scatterlist *sglist) | ||
| 170 | { | 177 | { |
| 171 | struct bio_vec bvec, bvprv = { NULL }; | 178 | struct bio_vec bvec, bvprv = { NULL }; |
| 172 | struct req_iterator iter; | 179 | struct bvec_iter iter; |
| 173 | struct scatterlist *sg; | ||
| 174 | int nsegs, cluster; | 180 | int nsegs, cluster; |
| 175 | 181 | ||
| 176 | nsegs = 0; | 182 | nsegs = 0; |
| 177 | cluster = blk_queue_cluster(q); | 183 | cluster = blk_queue_cluster(q); |
| 178 | 184 | ||
| 179 | /* | 185 | if (bio->bi_rw & REQ_DISCARD) { |
| 180 | * for each bio in rq | 186 | /* |
| 181 | */ | 187 | * This is a hack - drivers should be neither modifying the |
| 182 | sg = NULL; | 188 | * biovec, nor relying on bi_vcnt - but because of |
| 183 | rq_for_each_segment(bvec, rq, iter) { | 189 | * blk_add_request_payload(), a discard bio may or may not have |
| 184 | __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, | 190 | * a payload we need to set up here (thank you Christoph) and |
| 185 | &nsegs, &cluster); | 191 | * bi_vcnt is really the only way of telling if we need to. |
| 186 | } /* segments in rq */ | 192 | */ |
| 193 | |||
| 194 | if (bio->bi_vcnt) | ||
| 195 | goto single_segment; | ||
| 196 | |||
| 197 | return 0; | ||
| 198 | } | ||
| 199 | |||
| 200 | if (bio->bi_rw & REQ_WRITE_SAME) { | ||
| 201 | single_segment: | ||
| 202 | *sg = sglist; | ||
| 203 | bvec = bio_iovec(bio); | ||
| 204 | sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); | ||
| 205 | return 1; | ||
| 206 | } | ||
| 207 | |||
| 208 | for_each_bio(bio) | ||
| 209 | bio_for_each_segment(bvec, bio, iter) | ||
| 210 | __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, | ||
| 211 | &nsegs, &cluster); | ||
| 187 | 212 | ||
| 213 | return nsegs; | ||
| 214 | } | ||
| 215 | |||
| 216 | /* | ||
| 217 | * map a request to scatterlist, return number of sg entries setup. Caller | ||
| 218 | * must make sure sg can hold rq->nr_phys_segments entries | ||
| 219 | */ | ||
| 220 | int blk_rq_map_sg(struct request_queue *q, struct request *rq, | ||
| 221 | struct scatterlist *sglist) | ||
| 222 | { | ||
| 223 | struct scatterlist *sg = NULL; | ||
| 224 | int nsegs = 0; | ||
| 225 | |||
| 226 | if (rq->bio) | ||
| 227 | nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); | ||
| 188 | 228 | ||
| 189 | if (unlikely(rq->cmd_flags & REQ_COPY_USER) && | 229 | if (unlikely(rq->cmd_flags & REQ_COPY_USER) && |
| 190 | (blk_rq_bytes(rq) & q->dma_pad_mask)) { | 230 | (blk_rq_bytes(rq) & q->dma_pad_mask)) { |
| @@ -230,20 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg); | |||
| 230 | int blk_bio_map_sg(struct request_queue *q, struct bio *bio, | 270 | int blk_bio_map_sg(struct request_queue *q, struct bio *bio, |
| 231 | struct scatterlist *sglist) | 271 | struct scatterlist *sglist) |
| 232 | { | 272 | { |
| 233 | struct bio_vec bvec, bvprv = { NULL }; | 273 | struct scatterlist *sg = NULL; |
| 234 | struct scatterlist *sg; | 274 | int nsegs; |
| 235 | int nsegs, cluster; | 275 | struct bio *next = bio->bi_next; |
| 236 | struct bvec_iter iter; | 276 | bio->bi_next = NULL; |
| 237 | |||
| 238 | nsegs = 0; | ||
| 239 | cluster = blk_queue_cluster(q); | ||
| 240 | |||
| 241 | sg = NULL; | ||
| 242 | bio_for_each_segment(bvec, bio, iter) { | ||
| 243 | __blk_segment_map_sg(q, &bvec, sglist, &bvprv, &sg, | ||
| 244 | &nsegs, &cluster); | ||
| 245 | } /* segments in bio */ | ||
| 246 | 277 | ||
| 278 | nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); | ||
| 279 | bio->bi_next = next; | ||
| 247 | if (sg) | 280 | if (sg) |
| 248 | sg_mark_end(sg); | 281 | sg_mark_end(sg); |
| 249 | 282 | ||
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 5d70edc9855f..83ae96c51a27 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
| @@ -184,7 +184,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags) | |||
| 184 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) | 184 | ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) |
| 185 | { | 185 | { |
| 186 | char *orig_page = page; | 186 | char *orig_page = page; |
| 187 | int cpu; | 187 | unsigned int cpu; |
| 188 | 188 | ||
| 189 | if (!tags) | 189 | if (!tags) |
| 190 | return 0; | 190 | return 0; |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 57039fcd9c93..1fa9dd153fde 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
| @@ -226,15 +226,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, | |||
| 226 | return rq; | 226 | return rq; |
| 227 | } | 227 | } |
| 228 | 228 | ||
| 229 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, | 229 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp) |
| 230 | gfp_t gfp, bool reserved) | ||
| 231 | { | 230 | { |
| 232 | struct request *rq; | 231 | struct request *rq; |
| 233 | 232 | ||
| 234 | if (blk_mq_queue_enter(q)) | 233 | if (blk_mq_queue_enter(q)) |
| 235 | return NULL; | 234 | return NULL; |
| 236 | 235 | ||
| 237 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); | 236 | rq = blk_mq_alloc_request_pinned(q, rw, gfp, false); |
| 238 | if (rq) | 237 | if (rq) |
| 239 | blk_mq_put_ctx(rq->mq_ctx); | 238 | blk_mq_put_ctx(rq->mq_ctx); |
| 240 | return rq; | 239 | return rq; |
| @@ -258,7 +257,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request); | |||
| 258 | /* | 257 | /* |
| 259 | * Re-init and set pdu, if we have it | 258 | * Re-init and set pdu, if we have it |
| 260 | */ | 259 | */ |
| 261 | static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) | 260 | void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) |
| 262 | { | 261 | { |
| 263 | blk_rq_init(hctx->queue, rq); | 262 | blk_rq_init(hctx->queue, rq); |
| 264 | 263 | ||
| @@ -305,7 +304,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) | |||
| 305 | bio_endio(bio, error); | 304 | bio_endio(bio, error); |
| 306 | } | 305 | } |
| 307 | 306 | ||
| 308 | void blk_mq_complete_request(struct request *rq, int error) | 307 | void blk_mq_end_io(struct request *rq, int error) |
| 309 | { | 308 | { |
| 310 | struct bio *bio = rq->bio; | 309 | struct bio *bio = rq->bio; |
| 311 | unsigned int bytes = 0; | 310 | unsigned int bytes = 0; |
| @@ -330,48 +329,55 @@ void blk_mq_complete_request(struct request *rq, int error) | |||
| 330 | else | 329 | else |
| 331 | blk_mq_free_request(rq); | 330 | blk_mq_free_request(rq); |
| 332 | } | 331 | } |
| 332 | EXPORT_SYMBOL(blk_mq_end_io); | ||
| 333 | 333 | ||
| 334 | void __blk_mq_end_io(struct request *rq, int error) | 334 | static void __blk_mq_complete_request_remote(void *data) |
| 335 | { | ||
| 336 | if (!blk_mark_rq_complete(rq)) | ||
| 337 | blk_mq_complete_request(rq, error); | ||
| 338 | } | ||
| 339 | |||
| 340 | static void blk_mq_end_io_remote(void *data) | ||
| 341 | { | 335 | { |
| 342 | struct request *rq = data; | 336 | struct request *rq = data; |
| 343 | 337 | ||
| 344 | __blk_mq_end_io(rq, rq->errors); | 338 | rq->q->softirq_done_fn(rq); |
| 345 | } | 339 | } |
| 346 | 340 | ||
| 347 | /* | 341 | void __blk_mq_complete_request(struct request *rq) |
| 348 | * End IO on this request on a multiqueue enabled driver. We'll either do | ||
| 349 | * it directly inline, or punt to a local IPI handler on the matching | ||
| 350 | * remote CPU. | ||
| 351 | */ | ||
| 352 | void blk_mq_end_io(struct request *rq, int error) | ||
| 353 | { | 342 | { |
| 354 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 343 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
| 355 | int cpu; | 344 | int cpu; |
| 356 | 345 | ||
| 357 | if (!ctx->ipi_redirect) | 346 | if (!ctx->ipi_redirect) { |
| 358 | return __blk_mq_end_io(rq, error); | 347 | rq->q->softirq_done_fn(rq); |
| 348 | return; | ||
| 349 | } | ||
| 359 | 350 | ||
| 360 | cpu = get_cpu(); | 351 | cpu = get_cpu(); |
| 361 | if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { | 352 | if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { |
| 362 | rq->errors = error; | 353 | rq->csd.func = __blk_mq_complete_request_remote; |
| 363 | rq->csd.func = blk_mq_end_io_remote; | ||
| 364 | rq->csd.info = rq; | 354 | rq->csd.info = rq; |
| 365 | rq->csd.flags = 0; | 355 | rq->csd.flags = 0; |
| 366 | __smp_call_function_single(ctx->cpu, &rq->csd, 0); | 356 | __smp_call_function_single(ctx->cpu, &rq->csd, 0); |
| 367 | } else { | 357 | } else { |
| 368 | __blk_mq_end_io(rq, error); | 358 | rq->q->softirq_done_fn(rq); |
| 369 | } | 359 | } |
| 370 | put_cpu(); | 360 | put_cpu(); |
| 371 | } | 361 | } |
| 372 | EXPORT_SYMBOL(blk_mq_end_io); | ||
| 373 | 362 | ||
| 374 | static void blk_mq_start_request(struct request *rq) | 363 | /** |
| 364 | * blk_mq_complete_request - end I/O on a request | ||
| 365 | * @rq: the request being processed | ||
| 366 | * | ||
| 367 | * Description: | ||
| 368 | * Ends all I/O on a request. It does not handle partial completions. | ||
| 369 | * The actual completion happens out-of-order, through a IPI handler. | ||
| 370 | **/ | ||
| 371 | void blk_mq_complete_request(struct request *rq) | ||
| 372 | { | ||
| 373 | if (unlikely(blk_should_fake_timeout(rq->q))) | ||
| 374 | return; | ||
| 375 | if (!blk_mark_rq_complete(rq)) | ||
| 376 | __blk_mq_complete_request(rq); | ||
| 377 | } | ||
| 378 | EXPORT_SYMBOL(blk_mq_complete_request); | ||
| 379 | |||
| 380 | static void blk_mq_start_request(struct request *rq, bool last) | ||
| 375 | { | 381 | { |
| 376 | struct request_queue *q = rq->q; | 382 | struct request_queue *q = rq->q; |
| 377 | 383 | ||
| @@ -384,6 +390,25 @@ static void blk_mq_start_request(struct request *rq) | |||
| 384 | */ | 390 | */ |
| 385 | rq->deadline = jiffies + q->rq_timeout; | 391 | rq->deadline = jiffies + q->rq_timeout; |
| 386 | set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | 392 | set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); |
| 393 | |||
| 394 | if (q->dma_drain_size && blk_rq_bytes(rq)) { | ||
| 395 | /* | ||
| 396 | * Make sure space for the drain appears. We know we can do | ||
| 397 | * this because max_hw_segments has been adjusted to be one | ||
| 398 | * fewer than the device can handle. | ||
| 399 | */ | ||
| 400 | rq->nr_phys_segments++; | ||
| 401 | } | ||
| 402 | |||
| 403 | /* | ||
| 404 | * Flag the last request in the series so that drivers know when IO | ||
| 405 | * should be kicked off, if they don't do it on a per-request basis. | ||
| 406 | * | ||
| 407 | * Note: the flag isn't the only condition drivers should do kick off. | ||
| 408 | * If drive is busy, the last request might not have the bit set. | ||
| 409 | */ | ||
| 410 | if (last) | ||
| 411 | rq->cmd_flags |= REQ_END; | ||
| 387 | } | 412 | } |
| 388 | 413 | ||
| 389 | static void blk_mq_requeue_request(struct request *rq) | 414 | static void blk_mq_requeue_request(struct request *rq) |
| @@ -392,6 +417,11 @@ static void blk_mq_requeue_request(struct request *rq) | |||
| 392 | 417 | ||
| 393 | trace_block_rq_requeue(q, rq); | 418 | trace_block_rq_requeue(q, rq); |
| 394 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); | 419 | clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); |
| 420 | |||
| 421 | rq->cmd_flags &= ~REQ_END; | ||
| 422 | |||
| 423 | if (q->dma_drain_size && blk_rq_bytes(rq)) | ||
| 424 | rq->nr_phys_segments--; | ||
| 395 | } | 425 | } |
| 396 | 426 | ||
| 397 | struct blk_mq_timeout_data { | 427 | struct blk_mq_timeout_data { |
| @@ -559,19 +589,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
| 559 | 589 | ||
| 560 | rq = list_first_entry(&rq_list, struct request, queuelist); | 590 | rq = list_first_entry(&rq_list, struct request, queuelist); |
| 561 | list_del_init(&rq->queuelist); | 591 | list_del_init(&rq->queuelist); |
| 562 | blk_mq_start_request(rq); | ||
| 563 | 592 | ||
| 564 | /* | 593 | blk_mq_start_request(rq, list_empty(&rq_list)); |
| 565 | * Last request in the series. Flag it as such, this | ||
| 566 | * enables drivers to know when IO should be kicked off, | ||
| 567 | * if they don't do it on a per-request basis. | ||
| 568 | * | ||
| 569 | * Note: the flag isn't the only condition drivers | ||
| 570 | * should do kick off. If drive is busy, the last | ||
| 571 | * request might not have the bit set. | ||
| 572 | */ | ||
| 573 | if (list_empty(&rq_list)) | ||
| 574 | rq->cmd_flags |= REQ_END; | ||
| 575 | 594 | ||
| 576 | ret = q->mq_ops->queue_rq(hctx, rq); | 595 | ret = q->mq_ops->queue_rq(hctx, rq); |
| 577 | switch (ret) { | 596 | switch (ret) { |
| @@ -589,8 +608,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
| 589 | break; | 608 | break; |
| 590 | default: | 609 | default: |
| 591 | pr_err("blk-mq: bad return on queue: %d\n", ret); | 610 | pr_err("blk-mq: bad return on queue: %d\n", ret); |
| 592 | rq->errors = -EIO; | ||
| 593 | case BLK_MQ_RQ_QUEUE_ERROR: | 611 | case BLK_MQ_RQ_QUEUE_ERROR: |
| 612 | rq->errors = -EIO; | ||
| 594 | blk_mq_end_io(rq, rq->errors); | 613 | blk_mq_end_io(rq, rq->errors); |
| 595 | break; | 614 | break; |
| 596 | } | 615 | } |
| @@ -693,13 +712,16 @@ static void blk_mq_work_fn(struct work_struct *work) | |||
| 693 | } | 712 | } |
| 694 | 713 | ||
| 695 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | 714 | static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, |
| 696 | struct request *rq) | 715 | struct request *rq, bool at_head) |
| 697 | { | 716 | { |
| 698 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 717 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
| 699 | 718 | ||
| 700 | trace_block_rq_insert(hctx->queue, rq); | 719 | trace_block_rq_insert(hctx->queue, rq); |
| 701 | 720 | ||
| 702 | list_add_tail(&rq->queuelist, &ctx->rq_list); | 721 | if (at_head) |
| 722 | list_add(&rq->queuelist, &ctx->rq_list); | ||
| 723 | else | ||
| 724 | list_add_tail(&rq->queuelist, &ctx->rq_list); | ||
| 703 | blk_mq_hctx_mark_pending(hctx, ctx); | 725 | blk_mq_hctx_mark_pending(hctx, ctx); |
| 704 | 726 | ||
| 705 | /* | 727 | /* |
| @@ -709,7 +731,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, | |||
| 709 | } | 731 | } |
| 710 | 732 | ||
| 711 | void blk_mq_insert_request(struct request_queue *q, struct request *rq, | 733 | void blk_mq_insert_request(struct request_queue *q, struct request *rq, |
| 712 | bool run_queue) | 734 | bool at_head, bool run_queue) |
| 713 | { | 735 | { |
| 714 | struct blk_mq_hw_ctx *hctx; | 736 | struct blk_mq_hw_ctx *hctx; |
| 715 | struct blk_mq_ctx *ctx, *current_ctx; | 737 | struct blk_mq_ctx *ctx, *current_ctx; |
| @@ -728,7 +750,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq, | |||
| 728 | rq->mq_ctx = ctx; | 750 | rq->mq_ctx = ctx; |
| 729 | } | 751 | } |
| 730 | spin_lock(&ctx->lock); | 752 | spin_lock(&ctx->lock); |
| 731 | __blk_mq_insert_request(hctx, rq); | 753 | __blk_mq_insert_request(hctx, rq, at_head); |
| 732 | spin_unlock(&ctx->lock); | 754 | spin_unlock(&ctx->lock); |
| 733 | 755 | ||
| 734 | blk_mq_put_ctx(current_ctx); | 756 | blk_mq_put_ctx(current_ctx); |
| @@ -760,7 +782,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async) | |||
| 760 | 782 | ||
| 761 | /* ctx->cpu might be offline */ | 783 | /* ctx->cpu might be offline */ |
| 762 | spin_lock(&ctx->lock); | 784 | spin_lock(&ctx->lock); |
| 763 | __blk_mq_insert_request(hctx, rq); | 785 | __blk_mq_insert_request(hctx, rq, false); |
| 764 | spin_unlock(&ctx->lock); | 786 | spin_unlock(&ctx->lock); |
| 765 | 787 | ||
| 766 | blk_mq_put_ctx(current_ctx); | 788 | blk_mq_put_ctx(current_ctx); |
| @@ -798,7 +820,7 @@ static void blk_mq_insert_requests(struct request_queue *q, | |||
| 798 | rq = list_first_entry(list, struct request, queuelist); | 820 | rq = list_first_entry(list, struct request, queuelist); |
| 799 | list_del_init(&rq->queuelist); | 821 | list_del_init(&rq->queuelist); |
| 800 | rq->mq_ctx = ctx; | 822 | rq->mq_ctx = ctx; |
| 801 | __blk_mq_insert_request(hctx, rq); | 823 | __blk_mq_insert_request(hctx, rq, false); |
| 802 | } | 824 | } |
| 803 | spin_unlock(&ctx->lock); | 825 | spin_unlock(&ctx->lock); |
| 804 | 826 | ||
| @@ -888,6 +910,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
| 888 | 910 | ||
| 889 | blk_queue_bounce(q, &bio); | 911 | blk_queue_bounce(q, &bio); |
| 890 | 912 | ||
| 913 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) { | ||
| 914 | bio_endio(bio, -EIO); | ||
| 915 | return; | ||
| 916 | } | ||
| 917 | |||
| 891 | if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) | 918 | if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) |
| 892 | return; | 919 | return; |
| 893 | 920 | ||
| @@ -950,7 +977,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
| 950 | __blk_mq_free_request(hctx, ctx, rq); | 977 | __blk_mq_free_request(hctx, ctx, rq); |
| 951 | else { | 978 | else { |
| 952 | blk_mq_bio_to_request(rq, bio); | 979 | blk_mq_bio_to_request(rq, bio); |
| 953 | __blk_mq_insert_request(hctx, rq); | 980 | __blk_mq_insert_request(hctx, rq, false); |
| 954 | } | 981 | } |
| 955 | 982 | ||
| 956 | spin_unlock(&ctx->lock); | 983 | spin_unlock(&ctx->lock); |
| @@ -1309,15 +1336,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | |||
| 1309 | reg->queue_depth = BLK_MQ_MAX_DEPTH; | 1336 | reg->queue_depth = BLK_MQ_MAX_DEPTH; |
| 1310 | } | 1337 | } |
| 1311 | 1338 | ||
| 1312 | /* | ||
| 1313 | * Set aside a tag for flush requests. It will only be used while | ||
| 1314 | * another flush request is in progress but outside the driver. | ||
| 1315 | * | ||
| 1316 | * TODO: only allocate if flushes are supported | ||
| 1317 | */ | ||
| 1318 | reg->queue_depth++; | ||
| 1319 | reg->reserved_tags++; | ||
| 1320 | |||
| 1321 | if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) | 1339 | if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) |
| 1322 | return ERR_PTR(-EINVAL); | 1340 | return ERR_PTR(-EINVAL); |
| 1323 | 1341 | ||
| @@ -1360,17 +1378,27 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | |||
| 1360 | q->mq_ops = reg->ops; | 1378 | q->mq_ops = reg->ops; |
| 1361 | q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; | 1379 | q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; |
| 1362 | 1380 | ||
| 1381 | q->sg_reserved_size = INT_MAX; | ||
| 1382 | |||
| 1363 | blk_queue_make_request(q, blk_mq_make_request); | 1383 | blk_queue_make_request(q, blk_mq_make_request); |
| 1364 | blk_queue_rq_timed_out(q, reg->ops->timeout); | 1384 | blk_queue_rq_timed_out(q, reg->ops->timeout); |
| 1365 | if (reg->timeout) | 1385 | if (reg->timeout) |
| 1366 | blk_queue_rq_timeout(q, reg->timeout); | 1386 | blk_queue_rq_timeout(q, reg->timeout); |
| 1367 | 1387 | ||
| 1388 | if (reg->ops->complete) | ||
| 1389 | blk_queue_softirq_done(q, reg->ops->complete); | ||
| 1390 | |||
| 1368 | blk_mq_init_flush(q); | 1391 | blk_mq_init_flush(q); |
| 1369 | blk_mq_init_cpu_queues(q, reg->nr_hw_queues); | 1392 | blk_mq_init_cpu_queues(q, reg->nr_hw_queues); |
| 1370 | 1393 | ||
| 1371 | if (blk_mq_init_hw_queues(q, reg, driver_data)) | 1394 | q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size, |
| 1395 | cache_line_size()), GFP_KERNEL); | ||
| 1396 | if (!q->flush_rq) | ||
| 1372 | goto err_hw; | 1397 | goto err_hw; |
| 1373 | 1398 | ||
| 1399 | if (blk_mq_init_hw_queues(q, reg, driver_data)) | ||
| 1400 | goto err_flush_rq; | ||
| 1401 | |||
| 1374 | blk_mq_map_swqueue(q); | 1402 | blk_mq_map_swqueue(q); |
| 1375 | 1403 | ||
| 1376 | mutex_lock(&all_q_mutex); | 1404 | mutex_lock(&all_q_mutex); |
| @@ -1378,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg, | |||
| 1378 | mutex_unlock(&all_q_mutex); | 1406 | mutex_unlock(&all_q_mutex); |
| 1379 | 1407 | ||
| 1380 | return q; | 1408 | return q; |
| 1409 | |||
| 1410 | err_flush_rq: | ||
| 1411 | kfree(q->flush_rq); | ||
| 1381 | err_hw: | 1412 | err_hw: |
| 1382 | kfree(q->mq_map); | 1413 | kfree(q->mq_map); |
| 1383 | err_map: | 1414 | err_map: |
diff --git a/block/blk-mq.h b/block/blk-mq.h index 5c3917984b00..ed0035cd458e 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
| @@ -22,13 +22,13 @@ struct blk_mq_ctx { | |||
| 22 | struct kobject kobj; | 22 | struct kobject kobj; |
| 23 | }; | 23 | }; |
| 24 | 24 | ||
| 25 | void __blk_mq_end_io(struct request *rq, int error); | 25 | void __blk_mq_complete_request(struct request *rq); |
| 26 | void blk_mq_complete_request(struct request *rq, int error); | ||
| 27 | void blk_mq_run_request(struct request *rq, bool run_queue, bool async); | 26 | void blk_mq_run_request(struct request *rq, bool run_queue, bool async); |
| 28 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); | 27 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); |
| 29 | void blk_mq_init_flush(struct request_queue *q); | 28 | void blk_mq_init_flush(struct request_queue *q); |
| 30 | void blk_mq_drain_queue(struct request_queue *q); | 29 | void blk_mq_drain_queue(struct request_queue *q); |
| 31 | void blk_mq_free_queue(struct request_queue *q); | 30 | void blk_mq_free_queue(struct request_queue *q); |
| 31 | void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq); | ||
| 32 | 32 | ||
| 33 | /* | 33 | /* |
| 34 | * CPU hotplug helpers | 34 | * CPU hotplug helpers |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 8095c4a21fc0..7500f876dae4 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
| @@ -549,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj) | |||
| 549 | if (q->mq_ops) | 549 | if (q->mq_ops) |
| 550 | blk_mq_free_queue(q); | 550 | blk_mq_free_queue(q); |
| 551 | 551 | ||
| 552 | kfree(q->flush_rq); | ||
| 553 | |||
| 552 | blk_trace_shutdown(q); | 554 | blk_trace_shutdown(q); |
| 553 | 555 | ||
| 554 | bdi_destroy(&q->backing_dev_info); | 556 | bdi_destroy(&q->backing_dev_info); |
diff --git a/block/blk-timeout.c b/block/blk-timeout.c index bba81c9348e1..d96f7061c6fd 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c | |||
| @@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req) | |||
| 91 | case BLK_EH_HANDLED: | 91 | case BLK_EH_HANDLED: |
| 92 | /* Can we use req->errors here? */ | 92 | /* Can we use req->errors here? */ |
| 93 | if (q->mq_ops) | 93 | if (q->mq_ops) |
| 94 | blk_mq_complete_request(req, req->errors); | 94 | __blk_mq_complete_request(req); |
| 95 | else | 95 | else |
| 96 | __blk_complete_request(req); | 96 | __blk_complete_request(req); |
| 97 | break; | 97 | break; |
diff --git a/block/blk.h b/block/blk.h index c90e1d8f7a2b..d23b415b8a28 100644 --- a/block/blk.h +++ b/block/blk.h | |||
| @@ -113,7 +113,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) | |||
| 113 | q->flush_queue_delayed = 1; | 113 | q->flush_queue_delayed = 1; |
| 114 | return NULL; | 114 | return NULL; |
| 115 | } | 115 | } |
| 116 | if (unlikely(blk_queue_dying(q)) || | 116 | if (unlikely(blk_queue_bypass(q)) || |
| 117 | !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) | 117 | !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) |
| 118 | return NULL; | 118 | return NULL; |
| 119 | } | 119 | } |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 3107282a9741..091b9ea14feb 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
| @@ -60,7 +60,9 @@ enum { | |||
| 60 | NULL_IRQ_NONE = 0, | 60 | NULL_IRQ_NONE = 0, |
| 61 | NULL_IRQ_SOFTIRQ = 1, | 61 | NULL_IRQ_SOFTIRQ = 1, |
| 62 | NULL_IRQ_TIMER = 2, | 62 | NULL_IRQ_TIMER = 2, |
| 63 | }; | ||
| 63 | 64 | ||
| 65 | enum { | ||
| 64 | NULL_Q_BIO = 0, | 66 | NULL_Q_BIO = 0, |
| 65 | NULL_Q_RQ = 1, | 67 | NULL_Q_RQ = 1, |
| 66 | NULL_Q_MQ = 2, | 68 | NULL_Q_MQ = 2, |
| @@ -172,18 +174,20 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait) | |||
| 172 | 174 | ||
| 173 | static void end_cmd(struct nullb_cmd *cmd) | 175 | static void end_cmd(struct nullb_cmd *cmd) |
| 174 | { | 176 | { |
| 175 | if (cmd->rq) { | 177 | switch (queue_mode) { |
| 176 | if (queue_mode == NULL_Q_MQ) | 178 | case NULL_Q_MQ: |
| 177 | blk_mq_end_io(cmd->rq, 0); | 179 | blk_mq_end_io(cmd->rq, 0); |
| 178 | else { | 180 | return; |
| 179 | INIT_LIST_HEAD(&cmd->rq->queuelist); | 181 | case NULL_Q_RQ: |
| 180 | blk_end_request_all(cmd->rq, 0); | 182 | INIT_LIST_HEAD(&cmd->rq->queuelist); |
| 181 | } | 183 | blk_end_request_all(cmd->rq, 0); |
| 182 | } else if (cmd->bio) | 184 | break; |
| 185 | case NULL_Q_BIO: | ||
| 183 | bio_endio(cmd->bio, 0); | 186 | bio_endio(cmd->bio, 0); |
| 187 | break; | ||
| 188 | } | ||
| 184 | 189 | ||
| 185 | if (queue_mode != NULL_Q_MQ) | 190 | free_cmd(cmd); |
| 186 | free_cmd(cmd); | ||
| 187 | } | 191 | } |
| 188 | 192 | ||
| 189 | static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) | 193 | static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) |
| @@ -195,6 +199,7 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) | |||
| 195 | cq = &per_cpu(completion_queues, smp_processor_id()); | 199 | cq = &per_cpu(completion_queues, smp_processor_id()); |
| 196 | 200 | ||
| 197 | while ((entry = llist_del_all(&cq->list)) != NULL) { | 201 | while ((entry = llist_del_all(&cq->list)) != NULL) { |
| 202 | entry = llist_reverse_order(entry); | ||
| 198 | do { | 203 | do { |
| 199 | cmd = container_of(entry, struct nullb_cmd, ll_list); | 204 | cmd = container_of(entry, struct nullb_cmd, ll_list); |
| 200 | end_cmd(cmd); | 205 | end_cmd(cmd); |
| @@ -221,61 +226,31 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd) | |||
| 221 | 226 | ||
| 222 | static void null_softirq_done_fn(struct request *rq) | 227 | static void null_softirq_done_fn(struct request *rq) |
| 223 | { | 228 | { |
| 224 | blk_end_request_all(rq, 0); | 229 | end_cmd(rq->special); |
| 225 | } | ||
| 226 | |||
| 227 | #ifdef CONFIG_SMP | ||
| 228 | |||
| 229 | static void null_ipi_cmd_end_io(void *data) | ||
| 230 | { | ||
| 231 | struct completion_queue *cq; | ||
| 232 | struct llist_node *entry, *next; | ||
| 233 | struct nullb_cmd *cmd; | ||
| 234 | |||
| 235 | cq = &per_cpu(completion_queues, smp_processor_id()); | ||
| 236 | |||
| 237 | entry = llist_del_all(&cq->list); | ||
| 238 | |||
| 239 | while (entry) { | ||
| 240 | next = entry->next; | ||
| 241 | cmd = llist_entry(entry, struct nullb_cmd, ll_list); | ||
| 242 | end_cmd(cmd); | ||
| 243 | entry = next; | ||
| 244 | } | ||
| 245 | } | ||
| 246 | |||
| 247 | static void null_cmd_end_ipi(struct nullb_cmd *cmd) | ||
| 248 | { | ||
| 249 | struct call_single_data *data = &cmd->csd; | ||
| 250 | int cpu = get_cpu(); | ||
| 251 | struct completion_queue *cq = &per_cpu(completion_queues, cpu); | ||
| 252 | |||
| 253 | cmd->ll_list.next = NULL; | ||
| 254 | |||
| 255 | if (llist_add(&cmd->ll_list, &cq->list)) { | ||
| 256 | data->func = null_ipi_cmd_end_io; | ||
| 257 | data->flags = 0; | ||
| 258 | __smp_call_function_single(cpu, data, 0); | ||
| 259 | } | ||
| 260 | |||
| 261 | put_cpu(); | ||
| 262 | } | 230 | } |
| 263 | 231 | ||
| 264 | #endif /* CONFIG_SMP */ | ||
| 265 | |||
| 266 | static inline void null_handle_cmd(struct nullb_cmd *cmd) | 232 | static inline void null_handle_cmd(struct nullb_cmd *cmd) |
| 267 | { | 233 | { |
| 268 | /* Complete IO by inline, softirq or timer */ | 234 | /* Complete IO by inline, softirq or timer */ |
| 269 | switch (irqmode) { | 235 | switch (irqmode) { |
| 270 | case NULL_IRQ_NONE: | ||
| 271 | end_cmd(cmd); | ||
| 272 | break; | ||
| 273 | case NULL_IRQ_SOFTIRQ: | 236 | case NULL_IRQ_SOFTIRQ: |
| 274 | #ifdef CONFIG_SMP | 237 | switch (queue_mode) { |
| 275 | null_cmd_end_ipi(cmd); | 238 | case NULL_Q_MQ: |
| 276 | #else | 239 | blk_mq_complete_request(cmd->rq); |
| 240 | break; | ||
| 241 | case NULL_Q_RQ: | ||
| 242 | blk_complete_request(cmd->rq); | ||
| 243 | break; | ||
| 244 | case NULL_Q_BIO: | ||
| 245 | /* | ||
| 246 | * XXX: no proper submitting cpu information available. | ||
| 247 | */ | ||
| 248 | end_cmd(cmd); | ||
| 249 | break; | ||
| 250 | } | ||
| 251 | break; | ||
| 252 | case NULL_IRQ_NONE: | ||
| 277 | end_cmd(cmd); | 253 | end_cmd(cmd); |
| 278 | #endif | ||
| 279 | break; | 254 | break; |
| 280 | case NULL_IRQ_TIMER: | 255 | case NULL_IRQ_TIMER: |
| 281 | null_cmd_end_timer(cmd); | 256 | null_cmd_end_timer(cmd); |
| @@ -411,6 +386,7 @@ static struct blk_mq_ops null_mq_ops = { | |||
| 411 | .queue_rq = null_queue_rq, | 386 | .queue_rq = null_queue_rq, |
| 412 | .map_queue = blk_mq_map_queue, | 387 | .map_queue = blk_mq_map_queue, |
| 413 | .init_hctx = null_init_hctx, | 388 | .init_hctx = null_init_hctx, |
| 389 | .complete = null_softirq_done_fn, | ||
| 414 | }; | 390 | }; |
| 415 | 391 | ||
| 416 | static struct blk_mq_reg null_mq_reg = { | 392 | static struct blk_mq_reg null_mq_reg = { |
| @@ -609,13 +585,6 @@ static int __init null_init(void) | |||
| 609 | { | 585 | { |
| 610 | unsigned int i; | 586 | unsigned int i; |
| 611 | 587 | ||
| 612 | #if !defined(CONFIG_SMP) | ||
| 613 | if (irqmode == NULL_IRQ_SOFTIRQ) { | ||
| 614 | pr_warn("null_blk: softirq completions not available.\n"); | ||
| 615 | pr_warn("null_blk: using direct completions.\n"); | ||
| 616 | irqmode = NULL_IRQ_NONE; | ||
| 617 | } | ||
| 618 | #endif | ||
| 619 | if (bs > PAGE_SIZE) { | 588 | if (bs > PAGE_SIZE) { |
| 620 | pr_warn("null_blk: invalid block size\n"); | 589 | pr_warn("null_blk: invalid block size\n"); |
| 621 | pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); | 590 | pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE); |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6a680d4de7f1..b1cb3f4c4db4 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
| @@ -110,9 +110,9 @@ static int __virtblk_add_req(struct virtqueue *vq, | |||
| 110 | return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); | 110 | return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC); |
| 111 | } | 111 | } |
| 112 | 112 | ||
| 113 | static inline void virtblk_request_done(struct virtblk_req *vbr) | 113 | static inline void virtblk_request_done(struct request *req) |
| 114 | { | 114 | { |
| 115 | struct request *req = vbr->req; | 115 | struct virtblk_req *vbr = req->special; |
| 116 | int error = virtblk_result(vbr); | 116 | int error = virtblk_result(vbr); |
| 117 | 117 | ||
| 118 | if (req->cmd_type == REQ_TYPE_BLOCK_PC) { | 118 | if (req->cmd_type == REQ_TYPE_BLOCK_PC) { |
| @@ -138,7 +138,7 @@ static void virtblk_done(struct virtqueue *vq) | |||
| 138 | do { | 138 | do { |
| 139 | virtqueue_disable_cb(vq); | 139 | virtqueue_disable_cb(vq); |
| 140 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { | 140 | while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) { |
| 141 | virtblk_request_done(vbr); | 141 | blk_mq_complete_request(vbr->req); |
| 142 | req_done = true; | 142 | req_done = true; |
| 143 | } | 143 | } |
| 144 | if (unlikely(virtqueue_is_broken(vq))) | 144 | if (unlikely(virtqueue_is_broken(vq))) |
| @@ -479,6 +479,7 @@ static struct blk_mq_ops virtio_mq_ops = { | |||
| 479 | .map_queue = blk_mq_map_queue, | 479 | .map_queue = blk_mq_map_queue, |
| 480 | .alloc_hctx = blk_mq_alloc_single_hw_queue, | 480 | .alloc_hctx = blk_mq_alloc_single_hw_queue, |
| 481 | .free_hctx = blk_mq_free_single_hw_queue, | 481 | .free_hctx = blk_mq_free_single_hw_queue, |
| 482 | .complete = virtblk_request_done, | ||
| 482 | }; | 483 | }; |
| 483 | 484 | ||
| 484 | static struct blk_mq_reg virtio_mq_reg = { | 485 | static struct blk_mq_reg virtio_mq_reg = { |
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index 4b97b86da926..64c60edcdfbc 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c | |||
| @@ -299,7 +299,7 @@ static void free_persistent_gnts(struct xen_blkif *blkif, struct rb_root *root, | |||
| 299 | BUG_ON(num != 0); | 299 | BUG_ON(num != 0); |
| 300 | } | 300 | } |
| 301 | 301 | ||
| 302 | static void unmap_purged_grants(struct work_struct *work) | 302 | void xen_blkbk_unmap_purged_grants(struct work_struct *work) |
| 303 | { | 303 | { |
| 304 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 304 | struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
| 305 | struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | 305 | struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
| @@ -375,7 +375,7 @@ static void purge_persistent_gnt(struct xen_blkif *blkif) | |||
| 375 | 375 | ||
| 376 | pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); | 376 | pr_debug(DRV_PFX "Going to purge %u persistent grants\n", num_clean); |
| 377 | 377 | ||
| 378 | INIT_LIST_HEAD(&blkif->persistent_purge_list); | 378 | BUG_ON(!list_empty(&blkif->persistent_purge_list)); |
| 379 | root = &blkif->persistent_gnts; | 379 | root = &blkif->persistent_gnts; |
| 380 | purge_list: | 380 | purge_list: |
| 381 | foreach_grant_safe(persistent_gnt, n, root, node) { | 381 | foreach_grant_safe(persistent_gnt, n, root, node) { |
| @@ -420,7 +420,6 @@ finished: | |||
| 420 | blkif->vbd.overflow_max_grants = 0; | 420 | blkif->vbd.overflow_max_grants = 0; |
| 421 | 421 | ||
| 422 | /* We can defer this work */ | 422 | /* We can defer this work */ |
| 423 | INIT_WORK(&blkif->persistent_purge_work, unmap_purged_grants); | ||
| 424 | schedule_work(&blkif->persistent_purge_work); | 423 | schedule_work(&blkif->persistent_purge_work); |
| 425 | pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total); | 424 | pr_debug(DRV_PFX "Purged %u/%u\n", (total - num_clean), total); |
| 426 | return; | 425 | return; |
| @@ -625,9 +624,23 @@ purge_gnt_list: | |||
| 625 | print_stats(blkif); | 624 | print_stats(blkif); |
| 626 | } | 625 | } |
| 627 | 626 | ||
| 628 | /* Since we are shutting down remove all pages from the buffer */ | 627 | /* Drain pending purge work */ |
| 629 | shrink_free_pagepool(blkif, 0 /* All */); | 628 | flush_work(&blkif->persistent_purge_work); |
| 630 | 629 | ||
| 630 | if (log_stats) | ||
| 631 | print_stats(blkif); | ||
| 632 | |||
| 633 | blkif->xenblkd = NULL; | ||
| 634 | xen_blkif_put(blkif); | ||
| 635 | |||
| 636 | return 0; | ||
| 637 | } | ||
| 638 | |||
| 639 | /* | ||
| 640 | * Remove persistent grants and empty the pool of free pages | ||
| 641 | */ | ||
| 642 | void xen_blkbk_free_caches(struct xen_blkif *blkif) | ||
| 643 | { | ||
| 631 | /* Free all persistent grant pages */ | 644 | /* Free all persistent grant pages */ |
| 632 | if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) | 645 | if (!RB_EMPTY_ROOT(&blkif->persistent_gnts)) |
| 633 | free_persistent_gnts(blkif, &blkif->persistent_gnts, | 646 | free_persistent_gnts(blkif, &blkif->persistent_gnts, |
| @@ -636,13 +649,8 @@ purge_gnt_list: | |||
| 636 | BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); | 649 | BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); |
| 637 | blkif->persistent_gnt_c = 0; | 650 | blkif->persistent_gnt_c = 0; |
| 638 | 651 | ||
| 639 | if (log_stats) | 652 | /* Since we are shutting down remove all pages from the buffer */ |
| 640 | print_stats(blkif); | 653 | shrink_free_pagepool(blkif, 0 /* All */); |
| 641 | |||
| 642 | blkif->xenblkd = NULL; | ||
| 643 | xen_blkif_put(blkif); | ||
| 644 | |||
| 645 | return 0; | ||
| 646 | } | 654 | } |
| 647 | 655 | ||
| 648 | /* | 656 | /* |
| @@ -838,7 +846,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, | |||
| 838 | struct grant_page **pages = pending_req->indirect_pages; | 846 | struct grant_page **pages = pending_req->indirect_pages; |
| 839 | struct xen_blkif *blkif = pending_req->blkif; | 847 | struct xen_blkif *blkif = pending_req->blkif; |
| 840 | int indirect_grefs, rc, n, nseg, i; | 848 | int indirect_grefs, rc, n, nseg, i; |
| 841 | struct blkif_request_segment_aligned *segments = NULL; | 849 | struct blkif_request_segment *segments = NULL; |
| 842 | 850 | ||
| 843 | nseg = pending_req->nr_pages; | 851 | nseg = pending_req->nr_pages; |
| 844 | indirect_grefs = INDIRECT_PAGES(nseg); | 852 | indirect_grefs = INDIRECT_PAGES(nseg); |
| @@ -934,9 +942,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif) | |||
| 934 | { | 942 | { |
| 935 | atomic_set(&blkif->drain, 1); | 943 | atomic_set(&blkif->drain, 1); |
| 936 | do { | 944 | do { |
| 937 | /* The initial value is one, and one refcnt taken at the | 945 | if (atomic_read(&blkif->inflight) == 0) |
| 938 | * start of the xen_blkif_schedule thread. */ | ||
| 939 | if (atomic_read(&blkif->refcnt) <= 2) | ||
| 940 | break; | 946 | break; |
| 941 | wait_for_completion_interruptible_timeout( | 947 | wait_for_completion_interruptible_timeout( |
| 942 | &blkif->drain_complete, HZ); | 948 | &blkif->drain_complete, HZ); |
| @@ -976,17 +982,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error) | |||
| 976 | * the proper response on the ring. | 982 | * the proper response on the ring. |
| 977 | */ | 983 | */ |
| 978 | if (atomic_dec_and_test(&pending_req->pendcnt)) { | 984 | if (atomic_dec_and_test(&pending_req->pendcnt)) { |
| 979 | xen_blkbk_unmap(pending_req->blkif, | 985 | struct xen_blkif *blkif = pending_req->blkif; |
| 986 | |||
| 987 | xen_blkbk_unmap(blkif, | ||
| 980 | pending_req->segments, | 988 | pending_req->segments, |
| 981 | pending_req->nr_pages); | 989 | pending_req->nr_pages); |
| 982 | make_response(pending_req->blkif, pending_req->id, | 990 | make_response(blkif, pending_req->id, |
| 983 | pending_req->operation, pending_req->status); | 991 | pending_req->operation, pending_req->status); |
| 984 | xen_blkif_put(pending_req->blkif); | 992 | free_req(blkif, pending_req); |
| 985 | if (atomic_read(&pending_req->blkif->refcnt) <= 2) { | 993 | /* |
| 986 | if (atomic_read(&pending_req->blkif->drain)) | 994 | * Make sure the request is freed before releasing blkif, |
| 987 | complete(&pending_req->blkif->drain_complete); | 995 | * or there could be a race between free_req and the |
| 996 | * cleanup done in xen_blkif_free during shutdown. | ||
| 997 | * | ||
| 998 | * NB: The fact that we might try to wake up pending_free_wq | ||
| 999 | * before drain_complete (in case there's a drain going on) | ||
| 1000 | * it's not a problem with our current implementation | ||
| 1001 | * because we can assure there's no thread waiting on | ||
| 1002 | * pending_free_wq if there's a drain going on, but it has | ||
| 1003 | * to be taken into account if the current model is changed. | ||
| 1004 | */ | ||
| 1005 | if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) { | ||
| 1006 | complete(&blkif->drain_complete); | ||
| 988 | } | 1007 | } |
| 989 | free_req(pending_req->blkif, pending_req); | 1008 | xen_blkif_put(blkif); |
| 990 | } | 1009 | } |
| 991 | } | 1010 | } |
| 992 | 1011 | ||
| @@ -1240,6 +1259,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif, | |||
| 1240 | * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. | 1259 | * below (in "!bio") if we are handling a BLKIF_OP_DISCARD. |
| 1241 | */ | 1260 | */ |
| 1242 | xen_blkif_get(blkif); | 1261 | xen_blkif_get(blkif); |
| 1262 | atomic_inc(&blkif->inflight); | ||
| 1243 | 1263 | ||
| 1244 | for (i = 0; i < nseg; i++) { | 1264 | for (i = 0; i < nseg; i++) { |
| 1245 | while ((bio == NULL) || | 1265 | while ((bio == NULL) || |
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h index 8d8807563d99..be052773ad03 100644 --- a/drivers/block/xen-blkback/common.h +++ b/drivers/block/xen-blkback/common.h | |||
| @@ -57,7 +57,7 @@ | |||
| 57 | #define MAX_INDIRECT_SEGMENTS 256 | 57 | #define MAX_INDIRECT_SEGMENTS 256 |
| 58 | 58 | ||
| 59 | #define SEGS_PER_INDIRECT_FRAME \ | 59 | #define SEGS_PER_INDIRECT_FRAME \ |
| 60 | (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) | 60 | (PAGE_SIZE/sizeof(struct blkif_request_segment)) |
| 61 | #define MAX_INDIRECT_PAGES \ | 61 | #define MAX_INDIRECT_PAGES \ |
| 62 | ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) | 62 | ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) |
| 63 | #define INDIRECT_PAGES(_segs) \ | 63 | #define INDIRECT_PAGES(_segs) \ |
| @@ -278,6 +278,7 @@ struct xen_blkif { | |||
| 278 | /* for barrier (drain) requests */ | 278 | /* for barrier (drain) requests */ |
| 279 | struct completion drain_complete; | 279 | struct completion drain_complete; |
| 280 | atomic_t drain; | 280 | atomic_t drain; |
| 281 | atomic_t inflight; | ||
| 281 | /* One thread per one blkif. */ | 282 | /* One thread per one blkif. */ |
| 282 | struct task_struct *xenblkd; | 283 | struct task_struct *xenblkd; |
| 283 | unsigned int waiting_reqs; | 284 | unsigned int waiting_reqs; |
| @@ -376,6 +377,7 @@ int xen_blkif_xenbus_init(void); | |||
| 376 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id); | 377 | irqreturn_t xen_blkif_be_int(int irq, void *dev_id); |
| 377 | int xen_blkif_schedule(void *arg); | 378 | int xen_blkif_schedule(void *arg); |
| 378 | int xen_blkif_purge_persistent(void *arg); | 379 | int xen_blkif_purge_persistent(void *arg); |
| 380 | void xen_blkbk_free_caches(struct xen_blkif *blkif); | ||
| 379 | 381 | ||
| 380 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | 382 | int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, |
| 381 | struct backend_info *be, int state); | 383 | struct backend_info *be, int state); |
| @@ -383,6 +385,7 @@ int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt, | |||
| 383 | int xen_blkbk_barrier(struct xenbus_transaction xbt, | 385 | int xen_blkbk_barrier(struct xenbus_transaction xbt, |
| 384 | struct backend_info *be, int state); | 386 | struct backend_info *be, int state); |
| 385 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); | 387 | struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be); |
| 388 | void xen_blkbk_unmap_purged_grants(struct work_struct *work); | ||
| 386 | 389 | ||
| 387 | static inline void blkif_get_x86_32_req(struct blkif_request *dst, | 390 | static inline void blkif_get_x86_32_req(struct blkif_request *dst, |
| 388 | struct blkif_x86_32_request *src) | 391 | struct blkif_x86_32_request *src) |
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c index c2014a0aa206..9a547e6b6ebf 100644 --- a/drivers/block/xen-blkback/xenbus.c +++ b/drivers/block/xen-blkback/xenbus.c | |||
| @@ -125,8 +125,11 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) | |||
| 125 | blkif->persistent_gnts.rb_node = NULL; | 125 | blkif->persistent_gnts.rb_node = NULL; |
| 126 | spin_lock_init(&blkif->free_pages_lock); | 126 | spin_lock_init(&blkif->free_pages_lock); |
| 127 | INIT_LIST_HEAD(&blkif->free_pages); | 127 | INIT_LIST_HEAD(&blkif->free_pages); |
| 128 | INIT_LIST_HEAD(&blkif->persistent_purge_list); | ||
| 128 | blkif->free_pages_num = 0; | 129 | blkif->free_pages_num = 0; |
| 129 | atomic_set(&blkif->persistent_gnt_in_use, 0); | 130 | atomic_set(&blkif->persistent_gnt_in_use, 0); |
| 131 | atomic_set(&blkif->inflight, 0); | ||
| 132 | INIT_WORK(&blkif->persistent_purge_work, xen_blkbk_unmap_purged_grants); | ||
| 130 | 133 | ||
| 131 | INIT_LIST_HEAD(&blkif->pending_free); | 134 | INIT_LIST_HEAD(&blkif->pending_free); |
| 132 | 135 | ||
| @@ -259,6 +262,17 @@ static void xen_blkif_free(struct xen_blkif *blkif) | |||
| 259 | if (!atomic_dec_and_test(&blkif->refcnt)) | 262 | if (!atomic_dec_and_test(&blkif->refcnt)) |
| 260 | BUG(); | 263 | BUG(); |
| 261 | 264 | ||
| 265 | /* Remove all persistent grants and the cache of ballooned pages. */ | ||
| 266 | xen_blkbk_free_caches(blkif); | ||
| 267 | |||
| 268 | /* Make sure everything is drained before shutting down */ | ||
| 269 | BUG_ON(blkif->persistent_gnt_c != 0); | ||
| 270 | BUG_ON(atomic_read(&blkif->persistent_gnt_in_use) != 0); | ||
| 271 | BUG_ON(blkif->free_pages_num != 0); | ||
| 272 | BUG_ON(!list_empty(&blkif->persistent_purge_list)); | ||
| 273 | BUG_ON(!list_empty(&blkif->free_pages)); | ||
| 274 | BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts)); | ||
| 275 | |||
| 262 | /* Check that there is no request in use */ | 276 | /* Check that there is no request in use */ |
| 263 | list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { | 277 | list_for_each_entry_safe(req, n, &blkif->pending_free, free_list) { |
| 264 | list_del(&req->free_list); | 278 | list_del(&req->free_list); |
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 8dcfb54f1603..efe1b4761735 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c | |||
| @@ -162,7 +162,7 @@ static DEFINE_SPINLOCK(minor_lock); | |||
| 162 | #define DEV_NAME "xvd" /* name in /dev */ | 162 | #define DEV_NAME "xvd" /* name in /dev */ |
| 163 | 163 | ||
| 164 | #define SEGS_PER_INDIRECT_FRAME \ | 164 | #define SEGS_PER_INDIRECT_FRAME \ |
| 165 | (PAGE_SIZE/sizeof(struct blkif_request_segment_aligned)) | 165 | (PAGE_SIZE/sizeof(struct blkif_request_segment)) |
| 166 | #define INDIRECT_GREFS(_segs) \ | 166 | #define INDIRECT_GREFS(_segs) \ |
| 167 | ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) | 167 | ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME) |
| 168 | 168 | ||
| @@ -393,7 +393,7 @@ static int blkif_queue_request(struct request *req) | |||
| 393 | unsigned long id; | 393 | unsigned long id; |
| 394 | unsigned int fsect, lsect; | 394 | unsigned int fsect, lsect; |
| 395 | int i, ref, n; | 395 | int i, ref, n; |
| 396 | struct blkif_request_segment_aligned *segments = NULL; | 396 | struct blkif_request_segment *segments = NULL; |
| 397 | 397 | ||
| 398 | /* | 398 | /* |
| 399 | * Used to store if we are able to queue the request by just using | 399 | * Used to store if we are able to queue the request by just using |
| @@ -550,7 +550,7 @@ static int blkif_queue_request(struct request *req) | |||
| 550 | } else { | 550 | } else { |
| 551 | n = i % SEGS_PER_INDIRECT_FRAME; | 551 | n = i % SEGS_PER_INDIRECT_FRAME; |
| 552 | segments[n] = | 552 | segments[n] = |
| 553 | (struct blkif_request_segment_aligned) { | 553 | (struct blkif_request_segment) { |
| 554 | .gref = ref, | 554 | .gref = ref, |
| 555 | .first_sect = fsect, | 555 | .first_sect = fsect, |
| 556 | .last_sect = lsect }; | 556 | .last_sect = lsect }; |
| @@ -1904,13 +1904,16 @@ static void blkback_changed(struct xenbus_device *dev, | |||
| 1904 | case XenbusStateReconfiguring: | 1904 | case XenbusStateReconfiguring: |
| 1905 | case XenbusStateReconfigured: | 1905 | case XenbusStateReconfigured: |
| 1906 | case XenbusStateUnknown: | 1906 | case XenbusStateUnknown: |
| 1907 | case XenbusStateClosed: | ||
| 1908 | break; | 1907 | break; |
| 1909 | 1908 | ||
| 1910 | case XenbusStateConnected: | 1909 | case XenbusStateConnected: |
| 1911 | blkfront_connect(info); | 1910 | blkfront_connect(info); |
| 1912 | break; | 1911 | break; |
| 1913 | 1912 | ||
| 1913 | case XenbusStateClosed: | ||
| 1914 | if (dev->state == XenbusStateClosed) | ||
| 1915 | break; | ||
| 1916 | /* Missed the backend's Closing state -- fallthrough */ | ||
| 1914 | case XenbusStateClosing: | 1917 | case XenbusStateClosing: |
| 1915 | blkfront_closing(info); | 1918 | blkfront_closing(info); |
| 1916 | break; | 1919 | break; |
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 0c707e4f4eaf..a4c7306ff43d 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h | |||
| @@ -210,7 +210,9 @@ BITMASK(GC_MARK, struct bucket, gc_mark, 0, 2); | |||
| 210 | #define GC_MARK_RECLAIMABLE 0 | 210 | #define GC_MARK_RECLAIMABLE 0 |
| 211 | #define GC_MARK_DIRTY 1 | 211 | #define GC_MARK_DIRTY 1 |
| 212 | #define GC_MARK_METADATA 2 | 212 | #define GC_MARK_METADATA 2 |
| 213 | BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, 13); | 213 | #define GC_SECTORS_USED_SIZE 13 |
| 214 | #define MAX_GC_SECTORS_USED (~(~0ULL << GC_SECTORS_USED_SIZE)) | ||
| 215 | BITMASK(GC_SECTORS_USED, struct bucket, gc_mark, 2, GC_SECTORS_USED_SIZE); | ||
| 214 | BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1); | 216 | BITMASK(GC_MOVE, struct bucket, gc_mark, 15, 1); |
| 215 | 217 | ||
| 216 | #include "journal.h" | 218 | #include "journal.h" |
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 4f6b5940e609..3f74b4b0747b 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c | |||
| @@ -23,7 +23,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned set) | |||
| 23 | for (k = i->start; k < bset_bkey_last(i); k = next) { | 23 | for (k = i->start; k < bset_bkey_last(i); k = next) { |
| 24 | next = bkey_next(k); | 24 | next = bkey_next(k); |
| 25 | 25 | ||
| 26 | printk(KERN_ERR "block %u key %zi/%u: ", set, | 26 | printk(KERN_ERR "block %u key %li/%u: ", set, |
| 27 | (uint64_t *) k - i->d, i->keys); | 27 | (uint64_t *) k - i->d, i->keys); |
| 28 | 28 | ||
| 29 | if (b->ops->key_dump) | 29 | if (b->ops->key_dump) |
| @@ -1185,9 +1185,12 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, | |||
| 1185 | struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, | 1185 | struct bset *out = (void *) __get_free_pages(__GFP_NOWARN|GFP_NOIO, |
| 1186 | order); | 1186 | order); |
| 1187 | if (!out) { | 1187 | if (!out) { |
| 1188 | struct page *outp; | ||
| 1189 | |||
| 1188 | BUG_ON(order > state->page_order); | 1190 | BUG_ON(order > state->page_order); |
| 1189 | 1191 | ||
| 1190 | out = page_address(mempool_alloc(state->pool, GFP_NOIO)); | 1192 | outp = mempool_alloc(state->pool, GFP_NOIO); |
| 1193 | out = page_address(outp); | ||
| 1191 | used_mempool = true; | 1194 | used_mempool = true; |
| 1192 | order = state->page_order; | 1195 | order = state->page_order; |
| 1193 | } | 1196 | } |
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 98cc0a810a36..5f9c2a665ca5 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
| @@ -1167,7 +1167,7 @@ uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) | |||
| 1167 | /* guard against overflow */ | 1167 | /* guard against overflow */ |
| 1168 | SET_GC_SECTORS_USED(g, min_t(unsigned, | 1168 | SET_GC_SECTORS_USED(g, min_t(unsigned, |
| 1169 | GC_SECTORS_USED(g) + KEY_SIZE(k), | 1169 | GC_SECTORS_USED(g) + KEY_SIZE(k), |
| 1170 | (1 << 14) - 1)); | 1170 | MAX_GC_SECTORS_USED)); |
| 1171 | 1171 | ||
| 1172 | BUG_ON(!GC_SECTORS_USED(g)); | 1172 | BUG_ON(!GC_SECTORS_USED(g)); |
| 1173 | } | 1173 | } |
| @@ -1805,7 +1805,7 @@ static bool btree_insert_key(struct btree *b, struct bkey *k, | |||
| 1805 | 1805 | ||
| 1806 | static size_t insert_u64s_remaining(struct btree *b) | 1806 | static size_t insert_u64s_remaining(struct btree *b) |
| 1807 | { | 1807 | { |
| 1808 | ssize_t ret = bch_btree_keys_u64s_remaining(&b->keys); | 1808 | long ret = bch_btree_keys_u64s_remaining(&b->keys); |
| 1809 | 1809 | ||
| 1810 | /* | 1810 | /* |
| 1811 | * Might land in the middle of an existing extent and have to split it | 1811 | * Might land in the middle of an existing extent and have to split it |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 72cd213f213f..5d5d031cf381 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
| @@ -353,14 +353,14 @@ static void bch_data_insert_start(struct closure *cl) | |||
| 353 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); | 353 | struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); |
| 354 | struct bio *bio = op->bio, *n; | 354 | struct bio *bio = op->bio, *n; |
| 355 | 355 | ||
| 356 | if (op->bypass) | ||
| 357 | return bch_data_invalidate(cl); | ||
| 358 | |||
| 359 | if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { | 356 | if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { |
| 360 | set_gc_sectors(op->c); | 357 | set_gc_sectors(op->c); |
| 361 | wake_up_gc(op->c); | 358 | wake_up_gc(op->c); |
| 362 | } | 359 | } |
| 363 | 360 | ||
| 361 | if (op->bypass) | ||
| 362 | return bch_data_invalidate(cl); | ||
| 363 | |||
| 364 | /* | 364 | /* |
| 365 | * Journal writes are marked REQ_FLUSH; if the original write was a | 365 | * Journal writes are marked REQ_FLUSH; if the original write was a |
| 366 | * flush, it'll wait on the journal write. | 366 | * flush, it'll wait on the journal write. |
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index c6ab69333a6d..d8458d477a12 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c | |||
| @@ -416,7 +416,7 @@ static int btree_bset_stats(struct btree_op *b_op, struct btree *b) | |||
| 416 | return MAP_CONTINUE; | 416 | return MAP_CONTINUE; |
| 417 | } | 417 | } |
| 418 | 418 | ||
| 419 | int bch_bset_print_stats(struct cache_set *c, char *buf) | 419 | static int bch_bset_print_stats(struct cache_set *c, char *buf) |
| 420 | { | 420 | { |
| 421 | struct bset_stats_op op; | 421 | struct bset_stats_op op; |
| 422 | int ret; | 422 | int ret; |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 0bad24ddc2e7..0129b78a6908 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
| @@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio) | |||
| 114 | } | 114 | } |
| 115 | EXPORT_SYMBOL(bio_integrity_free); | 115 | EXPORT_SYMBOL(bio_integrity_free); |
| 116 | 116 | ||
| 117 | static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) | ||
| 118 | { | ||
| 119 | if (bip->bip_slab == BIO_POOL_NONE) | ||
| 120 | return BIP_INLINE_VECS; | ||
| 121 | |||
| 122 | return bvec_nr_vecs(bip->bip_slab); | ||
| 123 | } | ||
| 124 | |||
| 117 | /** | 125 | /** |
| 118 | * bio_integrity_add_page - Attach integrity metadata | 126 | * bio_integrity_add_page - Attach integrity metadata |
| 119 | * @bio: bio to update | 127 | * @bio: bio to update |
| @@ -129,7 +137,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, | |||
| 129 | struct bio_integrity_payload *bip = bio->bi_integrity; | 137 | struct bio_integrity_payload *bip = bio->bi_integrity; |
| 130 | struct bio_vec *iv; | 138 | struct bio_vec *iv; |
| 131 | 139 | ||
| 132 | if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) { | 140 | if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { |
| 133 | printk(KERN_ERR "%s: bip_vec full\n", __func__); | 141 | printk(KERN_ERR "%s: bip_vec full\n", __func__); |
| 134 | return 0; | 142 | return 0; |
| 135 | } | 143 | } |
| @@ -226,7 +234,8 @@ unsigned int bio_integrity_tag_size(struct bio *bio) | |||
| 226 | } | 234 | } |
| 227 | EXPORT_SYMBOL(bio_integrity_tag_size); | 235 | EXPORT_SYMBOL(bio_integrity_tag_size); |
| 228 | 236 | ||
| 229 | int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set) | 237 | static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, |
| 238 | int set) | ||
| 230 | { | 239 | { |
| 231 | struct bio_integrity_payload *bip = bio->bi_integrity; | 240 | struct bio_integrity_payload *bip = bio->bi_integrity; |
| 232 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | 241 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); |
| @@ -611,7 +611,6 @@ EXPORT_SYMBOL(bio_clone_fast); | |||
| 611 | struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, | 611 | struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, |
| 612 | struct bio_set *bs) | 612 | struct bio_set *bs) |
| 613 | { | 613 | { |
| 614 | unsigned nr_iovecs = 0; | ||
| 615 | struct bvec_iter iter; | 614 | struct bvec_iter iter; |
| 616 | struct bio_vec bv; | 615 | struct bio_vec bv; |
| 617 | struct bio *bio; | 616 | struct bio *bio; |
| @@ -638,10 +637,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, | |||
| 638 | * __bio_clone_fast() anyways. | 637 | * __bio_clone_fast() anyways. |
| 639 | */ | 638 | */ |
| 640 | 639 | ||
| 641 | bio_for_each_segment(bv, bio_src, iter) | 640 | bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); |
| 642 | nr_iovecs++; | ||
| 643 | |||
| 644 | bio = bio_alloc_bioset(gfp_mask, nr_iovecs, bs); | ||
| 645 | if (!bio) | 641 | if (!bio) |
| 646 | return NULL; | 642 | return NULL; |
| 647 | 643 | ||
| @@ -650,9 +646,18 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, | |||
| 650 | bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; | 646 | bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; |
| 651 | bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; | 647 | bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; |
| 652 | 648 | ||
| 649 | if (bio->bi_rw & REQ_DISCARD) | ||
| 650 | goto integrity_clone; | ||
| 651 | |||
| 652 | if (bio->bi_rw & REQ_WRITE_SAME) { | ||
| 653 | bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; | ||
| 654 | goto integrity_clone; | ||
| 655 | } | ||
| 656 | |||
| 653 | bio_for_each_segment(bv, bio_src, iter) | 657 | bio_for_each_segment(bv, bio_src, iter) |
| 654 | bio->bi_io_vec[bio->bi_vcnt++] = bv; | 658 | bio->bi_io_vec[bio->bi_vcnt++] = bv; |
| 655 | 659 | ||
| 660 | integrity_clone: | ||
| 656 | if (bio_integrity(bio_src)) { | 661 | if (bio_integrity(bio_src)) { |
| 657 | int ret; | 662 | int ret; |
| 658 | 663 | ||
diff --git a/include/linux/bio.h b/include/linux/bio.h index 70654521dab6..5a4d39b4686b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
| @@ -250,6 +250,17 @@ static inline unsigned bio_segments(struct bio *bio) | |||
| 250 | struct bio_vec bv; | 250 | struct bio_vec bv; |
| 251 | struct bvec_iter iter; | 251 | struct bvec_iter iter; |
| 252 | 252 | ||
| 253 | /* | ||
| 254 | * We special case discard/write same, because they interpret bi_size | ||
| 255 | * differently: | ||
| 256 | */ | ||
| 257 | |||
| 258 | if (bio->bi_rw & REQ_DISCARD) | ||
| 259 | return 1; | ||
| 260 | |||
| 261 | if (bio->bi_rw & REQ_WRITE_SAME) | ||
| 262 | return 1; | ||
| 263 | |||
| 253 | bio_for_each_segment(bv, bio, iter) | 264 | bio_for_each_segment(bv, bio, iter) |
| 254 | segs++; | 265 | segs++; |
| 255 | 266 | ||
| @@ -332,6 +343,7 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); | |||
| 332 | extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); | 343 | extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); |
| 333 | 344 | ||
| 334 | extern struct bio_set *fs_bio_set; | 345 | extern struct bio_set *fs_bio_set; |
| 346 | unsigned int bio_integrity_tag_size(struct bio *bio); | ||
| 335 | 347 | ||
| 336 | static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | 348 | static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) |
| 337 | { | 349 | { |
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 161b23105b1e..18ba8a627f46 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
| @@ -83,6 +83,8 @@ struct blk_mq_ops { | |||
| 83 | */ | 83 | */ |
| 84 | rq_timed_out_fn *timeout; | 84 | rq_timed_out_fn *timeout; |
| 85 | 85 | ||
| 86 | softirq_done_fn *complete; | ||
| 87 | |||
| 86 | /* | 88 | /* |
| 87 | * Override for hctx allocations (should probably go) | 89 | * Override for hctx allocations (should probably go) |
| 88 | */ | 90 | */ |
| @@ -119,11 +121,12 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc | |||
| 119 | 121 | ||
| 120 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); | 122 | void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); |
| 121 | 123 | ||
| 122 | void blk_mq_insert_request(struct request_queue *, struct request *, bool); | 124 | void blk_mq_insert_request(struct request_queue *, struct request *, |
| 125 | bool, bool); | ||
| 123 | void blk_mq_run_queues(struct request_queue *q, bool async); | 126 | void blk_mq_run_queues(struct request_queue *q, bool async); |
| 124 | void blk_mq_free_request(struct request *rq); | 127 | void blk_mq_free_request(struct request *rq); |
| 125 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); | 128 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); |
| 126 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); | 129 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp); |
| 127 | struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); | 130 | struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); |
| 128 | struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); | 131 | struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); |
| 129 | 132 | ||
| @@ -133,6 +136,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); | |||
| 133 | 136 | ||
| 134 | void blk_mq_end_io(struct request *rq, int error); | 137 | void blk_mq_end_io(struct request *rq, int error); |
| 135 | 138 | ||
| 139 | void blk_mq_complete_request(struct request *rq); | ||
| 140 | |||
| 136 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); | 141 | void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); |
| 137 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); | 142 | void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); |
| 138 | void blk_mq_stop_hw_queues(struct request_queue *q); | 143 | void blk_mq_stop_hw_queues(struct request_queue *q); |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8678c4322b44..4afa4f8f6090 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
| @@ -98,7 +98,7 @@ struct request { | |||
| 98 | struct list_head queuelist; | 98 | struct list_head queuelist; |
| 99 | union { | 99 | union { |
| 100 | struct call_single_data csd; | 100 | struct call_single_data csd; |
| 101 | struct work_struct mq_flush_data; | 101 | struct work_struct mq_flush_work; |
| 102 | }; | 102 | }; |
| 103 | 103 | ||
| 104 | struct request_queue *q; | 104 | struct request_queue *q; |
| @@ -448,13 +448,8 @@ struct request_queue { | |||
| 448 | unsigned long flush_pending_since; | 448 | unsigned long flush_pending_since; |
| 449 | struct list_head flush_queue[2]; | 449 | struct list_head flush_queue[2]; |
| 450 | struct list_head flush_data_in_flight; | 450 | struct list_head flush_data_in_flight; |
| 451 | union { | 451 | struct request *flush_rq; |
| 452 | struct request flush_rq; | 452 | spinlock_t mq_flush_lock; |
| 453 | struct { | ||
| 454 | spinlock_t mq_flush_lock; | ||
| 455 | struct work_struct mq_flush_work; | ||
| 456 | }; | ||
| 457 | }; | ||
| 458 | 453 | ||
| 459 | struct mutex sysfs_lock; | 454 | struct mutex sysfs_lock; |
| 460 | 455 | ||
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h index ae665ac59c36..32ec05a6572f 100644 --- a/include/xen/interface/io/blkif.h +++ b/include/xen/interface/io/blkif.h | |||
| @@ -113,13 +113,13 @@ typedef uint64_t blkif_sector_t; | |||
| 113 | * it's less than the number provided by the backend. The indirect_grefs field | 113 | * it's less than the number provided by the backend. The indirect_grefs field |
| 114 | * in blkif_request_indirect should be filled by the frontend with the | 114 | * in blkif_request_indirect should be filled by the frontend with the |
| 115 | * grant references of the pages that are holding the indirect segments. | 115 | * grant references of the pages that are holding the indirect segments. |
| 116 | * This pages are filled with an array of blkif_request_segment_aligned | 116 | * These pages are filled with an array of blkif_request_segment that hold the |
| 117 | * that hold the information about the segments. The number of indirect | 117 | * information about the segments. The number of indirect pages to use is |
| 118 | * pages to use is determined by the maximum number of segments | 118 | * determined by the number of segments an indirect request contains. Every |
| 119 | * a indirect request contains. Every indirect page can contain a maximum | 119 | * indirect page can contain a maximum of |
| 120 | * of 512 segments (PAGE_SIZE/sizeof(blkif_request_segment_aligned)), | 120 | * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to |
| 121 | * so to calculate the number of indirect pages to use we have to do | 121 | * calculate the number of indirect pages to use we have to do |
| 122 | * ceil(indirect_segments/512). | 122 | * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))). |
| 123 | * | 123 | * |
| 124 | * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* | 124 | * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not* |
| 125 | * create the "feature-max-indirect-segments" node! | 125 | * create the "feature-max-indirect-segments" node! |
| @@ -135,13 +135,12 @@ typedef uint64_t blkif_sector_t; | |||
| 135 | 135 | ||
| 136 | #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 | 136 | #define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8 |
| 137 | 137 | ||
| 138 | struct blkif_request_segment_aligned { | 138 | struct blkif_request_segment { |
| 139 | grant_ref_t gref; /* reference to I/O buffer frame */ | 139 | grant_ref_t gref; /* reference to I/O buffer frame */ |
| 140 | /* @first_sect: first sector in frame to transfer (inclusive). */ | 140 | /* @first_sect: first sector in frame to transfer (inclusive). */ |
| 141 | /* @last_sect: last sector in frame to transfer (inclusive). */ | 141 | /* @last_sect: last sector in frame to transfer (inclusive). */ |
| 142 | uint8_t first_sect, last_sect; | 142 | uint8_t first_sect, last_sect; |
| 143 | uint16_t _pad; /* padding to make it 8 bytes, so it's cache-aligned */ | 143 | }; |
| 144 | } __attribute__((__packed__)); | ||
| 145 | 144 | ||
| 146 | struct blkif_request_rw { | 145 | struct blkif_request_rw { |
| 147 | uint8_t nr_segments; /* number of segments */ | 146 | uint8_t nr_segments; /* number of segments */ |
| @@ -151,12 +150,7 @@ struct blkif_request_rw { | |||
| 151 | #endif | 150 | #endif |
| 152 | uint64_t id; /* private guest value, echoed in resp */ | 151 | uint64_t id; /* private guest value, echoed in resp */ |
| 153 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ | 152 | blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ |
| 154 | struct blkif_request_segment { | 153 | struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; |
| 155 | grant_ref_t gref; /* reference to I/O buffer frame */ | ||
| 156 | /* @first_sect: first sector in frame to transfer (inclusive). */ | ||
| 157 | /* @last_sect: last sector in frame to transfer (inclusive). */ | ||
| 158 | uint8_t first_sect, last_sect; | ||
| 159 | } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; | ||
| 160 | } __attribute__((__packed__)); | 154 | } __attribute__((__packed__)); |
| 161 | 155 | ||
| 162 | struct blkif_request_discard { | 156 | struct blkif_request_discard { |
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 7be235f1a70b..93d145e5539c 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c | |||
| @@ -54,9 +54,7 @@ static inline void move_tags(unsigned *dst, unsigned *dst_nr, | |||
| 54 | /* | 54 | /* |
| 55 | * Try to steal tags from a remote cpu's percpu freelist. | 55 | * Try to steal tags from a remote cpu's percpu freelist. |
| 56 | * | 56 | * |
| 57 | * We first check how many percpu freelists have tags - we don't steal tags | 57 | * We first check how many percpu freelists have tags |
| 58 | * unless enough percpu freelists have tags on them that it's possible more than | ||
| 59 | * half the total tags could be stuck on remote percpu freelists. | ||
| 60 | * | 58 | * |
| 61 | * Then we iterate through the cpus until we find some tags - we don't attempt | 59 | * Then we iterate through the cpus until we find some tags - we don't attempt |
| 62 | * to find the "best" cpu to steal from, to keep cacheline bouncing to a | 60 | * to find the "best" cpu to steal from, to keep cacheline bouncing to a |
| @@ -69,8 +67,7 @@ static inline void steal_tags(struct percpu_ida *pool, | |||
| 69 | struct percpu_ida_cpu *remote; | 67 | struct percpu_ida_cpu *remote; |
| 70 | 68 | ||
| 71 | for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); | 69 | for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); |
| 72 | cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2; | 70 | cpus_have_tags; cpus_have_tags--) { |
| 73 | cpus_have_tags--) { | ||
| 74 | cpu = cpumask_next(cpu, &pool->cpus_have_tags); | 71 | cpu = cpumask_next(cpu, &pool->cpus_have_tags); |
| 75 | 72 | ||
| 76 | if (cpu >= nr_cpu_ids) { | 73 | if (cpu >= nr_cpu_ids) { |
