aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2014-02-20 08:54:28 -0500
committerJiri Kosina <jkosina@suse.cz>2014-02-20 08:54:28 -0500
commitd4263348f796f29546f90802177865dd4379dd0a (patch)
treeadcbdaebae584eee2f32fab95e826e8e49eef385 /block
parentbe873ac782f5ff5ee6675f83929f4fe6737eead2 (diff)
parent6d0abeca3242a88cab8232e4acd7e2bf088f3bc2 (diff)
Merge branch 'master' into for-next
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c81
-rw-r--r--block/blk-exec.c6
-rw-r--r--block/blk-flush.c103
-rw-r--r--block/blk-integrity.c40
-rw-r--r--block/blk-lib.c20
-rw-r--r--block/blk-map.c6
-rw-r--r--block/blk-merge.c145
-rw-r--r--block/blk-mq-cpu.c37
-rw-r--r--block/blk-mq-sysfs.c13
-rw-r--r--block/blk-mq-tag.c8
-rw-r--r--block/blk-mq.c258
-rw-r--r--block/blk-mq.h7
-rw-r--r--block/blk-settings.c4
-rw-r--r--block/blk-sysfs.c3
-rw-r--r--block/blk-throttle.c49
-rw-r--r--block/blk-timeout.c2
-rw-r--r--block/blk.h2
-rw-r--r--block/cfq-iosched.c131
-rw-r--r--block/cmdline-parser.c18
-rw-r--r--block/elevator.c2
-rw-r--r--block/scsi_ioctl.c6
21 files changed, 484 insertions, 457 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index cd0158163fe0..4db2b32b70e0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -38,6 +38,7 @@
38 38
39#include "blk.h" 39#include "blk.h"
40#include "blk-cgroup.h" 40#include "blk-cgroup.h"
41#include "blk-mq.h"
41 42
42EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); 43EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap);
43EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); 44EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
@@ -130,7 +131,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
130 bio_advance(bio, nbytes); 131 bio_advance(bio, nbytes);
131 132
132 /* don't actually finish bio if it's part of flush sequence */ 133 /* don't actually finish bio if it's part of flush sequence */
133 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) 134 if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
134 bio_endio(bio, error); 135 bio_endio(bio, error);
135} 136}
136 137
@@ -245,7 +246,16 @@ EXPORT_SYMBOL(blk_stop_queue);
245void blk_sync_queue(struct request_queue *q) 246void blk_sync_queue(struct request_queue *q)
246{ 247{
247 del_timer_sync(&q->timeout); 248 del_timer_sync(&q->timeout);
248 cancel_delayed_work_sync(&q->delay_work); 249
250 if (q->mq_ops) {
251 struct blk_mq_hw_ctx *hctx;
252 int i;
253
254 queue_for_each_hw_ctx(q, hctx, i)
255 cancel_delayed_work_sync(&hctx->delayed_work);
256 } else {
257 cancel_delayed_work_sync(&q->delay_work);
258 }
249} 259}
250EXPORT_SYMBOL(blk_sync_queue); 260EXPORT_SYMBOL(blk_sync_queue);
251 261
@@ -497,8 +507,13 @@ void blk_cleanup_queue(struct request_queue *q)
497 * Drain all requests queued before DYING marking. Set DEAD flag to 507 * Drain all requests queued before DYING marking. Set DEAD flag to
498 * prevent that q->request_fn() gets invoked after draining finished. 508 * prevent that q->request_fn() gets invoked after draining finished.
499 */ 509 */
500 spin_lock_irq(lock); 510 if (q->mq_ops) {
501 __blk_drain_queue(q, true); 511 blk_mq_drain_queue(q);
512 spin_lock_irq(lock);
513 } else {
514 spin_lock_irq(lock);
515 __blk_drain_queue(q, true);
516 }
502 queue_flag_set(QUEUE_FLAG_DEAD, q); 517 queue_flag_set(QUEUE_FLAG_DEAD, q);
503 spin_unlock_irq(lock); 518 spin_unlock_irq(lock);
504 519
@@ -678,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
678 if (!uninit_q) 693 if (!uninit_q)
679 return NULL; 694 return NULL;
680 695
696 uninit_q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
697 if (!uninit_q->flush_rq)
698 goto out_cleanup_queue;
699
681 q = blk_init_allocated_queue(uninit_q, rfn, lock); 700 q = blk_init_allocated_queue(uninit_q, rfn, lock);
682 if (!q) 701 if (!q)
683 blk_cleanup_queue(uninit_q); 702 goto out_free_flush_rq;
684
685 return q; 703 return q;
704
705out_free_flush_rq:
706 kfree(uninit_q->flush_rq);
707out_cleanup_queue:
708 blk_cleanup_queue(uninit_q);
709 return NULL;
686} 710}
687EXPORT_SYMBOL(blk_init_queue_node); 711EXPORT_SYMBOL(blk_init_queue_node);
688 712
@@ -1112,7 +1136,7 @@ static struct request *blk_old_get_request(struct request_queue *q, int rw,
1112struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) 1136struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask)
1113{ 1137{
1114 if (q->mq_ops) 1138 if (q->mq_ops)
1115 return blk_mq_alloc_request(q, rw, gfp_mask, false); 1139 return blk_mq_alloc_request(q, rw, gfp_mask);
1116 else 1140 else
1117 return blk_old_get_request(q, rw, gfp_mask); 1141 return blk_old_get_request(q, rw, gfp_mask);
1118} 1142}
@@ -1263,6 +1287,11 @@ void __blk_put_request(struct request_queue *q, struct request *req)
1263 if (unlikely(!q)) 1287 if (unlikely(!q))
1264 return; 1288 return;
1265 1289
1290 if (q->mq_ops) {
1291 blk_mq_free_request(req);
1292 return;
1293 }
1294
1266 blk_pm_put_request(req); 1295 blk_pm_put_request(req);
1267 1296
1268 elv_completed_request(q, req); 1297 elv_completed_request(q, req);
@@ -1326,7 +1355,7 @@ void blk_add_request_payload(struct request *rq, struct page *page,
1326 bio->bi_io_vec->bv_offset = 0; 1355 bio->bi_io_vec->bv_offset = 0;
1327 bio->bi_io_vec->bv_len = len; 1356 bio->bi_io_vec->bv_len = len;
1328 1357
1329 bio->bi_size = len; 1358 bio->bi_iter.bi_size = len;
1330 bio->bi_vcnt = 1; 1359 bio->bi_vcnt = 1;
1331 bio->bi_phys_segments = 1; 1360 bio->bi_phys_segments = 1;
1332 1361
@@ -1351,7 +1380,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
1351 1380
1352 req->biotail->bi_next = bio; 1381 req->biotail->bi_next = bio;
1353 req->biotail = bio; 1382 req->biotail = bio;
1354 req->__data_len += bio->bi_size; 1383 req->__data_len += bio->bi_iter.bi_size;
1355 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); 1384 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1356 1385
1357 blk_account_io_start(req, false); 1386 blk_account_io_start(req, false);
@@ -1380,8 +1409,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req,
1380 * not touch req->buffer either... 1409 * not touch req->buffer either...
1381 */ 1410 */
1382 req->buffer = bio_data(bio); 1411 req->buffer = bio_data(bio);
1383 req->__sector = bio->bi_sector; 1412 req->__sector = bio->bi_iter.bi_sector;
1384 req->__data_len += bio->bi_size; 1413 req->__data_len += bio->bi_iter.bi_size;
1385 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); 1414 req->ioprio = ioprio_best(req->ioprio, bio_prio(bio));
1386 1415
1387 blk_account_io_start(req, false); 1416 blk_account_io_start(req, false);
@@ -1459,7 +1488,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1459 req->cmd_flags |= REQ_FAILFAST_MASK; 1488 req->cmd_flags |= REQ_FAILFAST_MASK;
1460 1489
1461 req->errors = 0; 1490 req->errors = 0;
1462 req->__sector = bio->bi_sector; 1491 req->__sector = bio->bi_iter.bi_sector;
1463 req->ioprio = bio_prio(bio); 1492 req->ioprio = bio_prio(bio);
1464 blk_rq_bio_prep(req->q, req, bio); 1493 blk_rq_bio_prep(req->q, req, bio);
1465} 1494}
@@ -1583,12 +1612,12 @@ static inline void blk_partition_remap(struct bio *bio)
1583 if (bio_sectors(bio) && bdev != bdev->bd_contains) { 1612 if (bio_sectors(bio) && bdev != bdev->bd_contains) {
1584 struct hd_struct *p = bdev->bd_part; 1613 struct hd_struct *p = bdev->bd_part;
1585 1614
1586 bio->bi_sector += p->start_sect; 1615 bio->bi_iter.bi_sector += p->start_sect;
1587 bio->bi_bdev = bdev->bd_contains; 1616 bio->bi_bdev = bdev->bd_contains;
1588 1617
1589 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, 1618 trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio,
1590 bdev->bd_dev, 1619 bdev->bd_dev,
1591 bio->bi_sector - p->start_sect); 1620 bio->bi_iter.bi_sector - p->start_sect);
1592 } 1621 }
1593} 1622}
1594 1623
@@ -1654,7 +1683,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
1654 /* Test device or partition size, when known. */ 1683 /* Test device or partition size, when known. */
1655 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; 1684 maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9;
1656 if (maxsector) { 1685 if (maxsector) {
1657 sector_t sector = bio->bi_sector; 1686 sector_t sector = bio->bi_iter.bi_sector;
1658 1687
1659 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { 1688 if (maxsector < nr_sectors || maxsector - nr_sectors < sector) {
1660 /* 1689 /*
@@ -1690,7 +1719,7 @@ generic_make_request_checks(struct bio *bio)
1690 "generic_make_request: Trying to access " 1719 "generic_make_request: Trying to access "
1691 "nonexistent block-device %s (%Lu)\n", 1720 "nonexistent block-device %s (%Lu)\n",
1692 bdevname(bio->bi_bdev, b), 1721 bdevname(bio->bi_bdev, b),
1693 (long long) bio->bi_sector); 1722 (long long) bio->bi_iter.bi_sector);
1694 goto end_io; 1723 goto end_io;
1695 } 1724 }
1696 1725
@@ -1704,9 +1733,9 @@ generic_make_request_checks(struct bio *bio)
1704 } 1733 }
1705 1734
1706 part = bio->bi_bdev->bd_part; 1735 part = bio->bi_bdev->bd_part;
1707 if (should_fail_request(part, bio->bi_size) || 1736 if (should_fail_request(part, bio->bi_iter.bi_size) ||
1708 should_fail_request(&part_to_disk(part)->part0, 1737 should_fail_request(&part_to_disk(part)->part0,
1709 bio->bi_size)) 1738 bio->bi_iter.bi_size))
1710 goto end_io; 1739 goto end_io;
1711 1740
1712 /* 1741 /*
@@ -1865,7 +1894,7 @@ void submit_bio(int rw, struct bio *bio)
1865 if (rw & WRITE) { 1894 if (rw & WRITE) {
1866 count_vm_events(PGPGOUT, count); 1895 count_vm_events(PGPGOUT, count);
1867 } else { 1896 } else {
1868 task_io_account_read(bio->bi_size); 1897 task_io_account_read(bio->bi_iter.bi_size);
1869 count_vm_events(PGPGIN, count); 1898 count_vm_events(PGPGIN, count);
1870 } 1899 }
1871 1900
@@ -1874,7 +1903,7 @@ void submit_bio(int rw, struct bio *bio)
1874 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", 1903 printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n",
1875 current->comm, task_pid_nr(current), 1904 current->comm, task_pid_nr(current),
1876 (rw & WRITE) ? "WRITE" : "READ", 1905 (rw & WRITE) ? "WRITE" : "READ",
1877 (unsigned long long)bio->bi_sector, 1906 (unsigned long long)bio->bi_iter.bi_sector,
1878 bdevname(bio->bi_bdev, b), 1907 bdevname(bio->bi_bdev, b),
1879 count); 1908 count);
1880 } 1909 }
@@ -2007,7 +2036,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq)
2007 for (bio = rq->bio; bio; bio = bio->bi_next) { 2036 for (bio = rq->bio; bio; bio = bio->bi_next) {
2008 if ((bio->bi_rw & ff) != ff) 2037 if ((bio->bi_rw & ff) != ff)
2009 break; 2038 break;
2010 bytes += bio->bi_size; 2039 bytes += bio->bi_iter.bi_size;
2011 } 2040 }
2012 2041
2013 /* this could lead to infinite loop */ 2042 /* this could lead to infinite loop */
@@ -2378,9 +2407,9 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2378 total_bytes = 0; 2407 total_bytes = 0;
2379 while (req->bio) { 2408 while (req->bio) {
2380 struct bio *bio = req->bio; 2409 struct bio *bio = req->bio;
2381 unsigned bio_bytes = min(bio->bi_size, nr_bytes); 2410 unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
2382 2411
2383 if (bio_bytes == bio->bi_size) 2412 if (bio_bytes == bio->bi_iter.bi_size)
2384 req->bio = bio->bi_next; 2413 req->bio = bio->bi_next;
2385 2414
2386 req_bio_endio(req, bio, bio_bytes, error); 2415 req_bio_endio(req, bio, bio_bytes, error);
@@ -2728,7 +2757,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2728 rq->nr_phys_segments = bio_phys_segments(q, bio); 2757 rq->nr_phys_segments = bio_phys_segments(q, bio);
2729 rq->buffer = bio_data(bio); 2758 rq->buffer = bio_data(bio);
2730 } 2759 }
2731 rq->__data_len = bio->bi_size; 2760 rq->__data_len = bio->bi_iter.bi_size;
2732 rq->bio = rq->biotail = bio; 2761 rq->bio = rq->biotail = bio;
2733 2762
2734 if (bio->bi_bdev) 2763 if (bio->bi_bdev)
@@ -2746,10 +2775,10 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2746void rq_flush_dcache_pages(struct request *rq) 2775void rq_flush_dcache_pages(struct request *rq)
2747{ 2776{
2748 struct req_iterator iter; 2777 struct req_iterator iter;
2749 struct bio_vec *bvec; 2778 struct bio_vec bvec;
2750 2779
2751 rq_for_each_segment(bvec, rq, iter) 2780 rq_for_each_segment(bvec, rq, iter)
2752 flush_dcache_page(bvec->bv_page); 2781 flush_dcache_page(bvec.bv_page);
2753} 2782}
2754EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); 2783EXPORT_SYMBOL_GPL(rq_flush_dcache_pages);
2755#endif 2784#endif
diff --git a/block/blk-exec.c b/block/blk-exec.c
index c3edf9dff566..c68613bb4c79 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -60,8 +60,12 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
60 rq->rq_disk = bd_disk; 60 rq->rq_disk = bd_disk;
61 rq->end_io = done; 61 rq->end_io = done;
62 62
63 /*
64 * don't check dying flag for MQ because the request won't
65 * be resued after dying flag is set
66 */
63 if (q->mq_ops) { 67 if (q->mq_ops) {
64 blk_mq_insert_request(q, rq, true); 68 blk_mq_insert_request(q, rq, at_head, true);
65 return; 69 return;
66 } 70 }
67 71
diff --git a/block/blk-flush.c b/block/blk-flush.c
index fb6f3c0ffa49..66e2b697f5db 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -130,20 +130,26 @@ static void blk_flush_restore_request(struct request *rq)
130 blk_clear_rq_complete(rq); 130 blk_clear_rq_complete(rq);
131} 131}
132 132
133static void mq_flush_data_run(struct work_struct *work) 133static void mq_flush_run(struct work_struct *work)
134{ 134{
135 struct request *rq; 135 struct request *rq;
136 136
137 rq = container_of(work, struct request, mq_flush_data); 137 rq = container_of(work, struct request, mq_flush_work);
138 138
139 memset(&rq->csd, 0, sizeof(rq->csd)); 139 memset(&rq->csd, 0, sizeof(rq->csd));
140 blk_mq_run_request(rq, true, false); 140 blk_mq_run_request(rq, true, false);
141} 141}
142 142
143static void blk_mq_flush_data_insert(struct request *rq) 143static bool blk_flush_queue_rq(struct request *rq)
144{ 144{
145 INIT_WORK(&rq->mq_flush_data, mq_flush_data_run); 145 if (rq->q->mq_ops) {
146 kblockd_schedule_work(rq->q, &rq->mq_flush_data); 146 INIT_WORK(&rq->mq_flush_work, mq_flush_run);
147 kblockd_schedule_work(rq->q, &rq->mq_flush_work);
148 return false;
149 } else {
150 list_add_tail(&rq->queuelist, &rq->q->queue_head);
151 return true;
152 }
147} 153}
148 154
149/** 155/**
@@ -187,12 +193,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
187 193
188 case REQ_FSEQ_DATA: 194 case REQ_FSEQ_DATA:
189 list_move_tail(&rq->flush.list, &q->flush_data_in_flight); 195 list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
190 if (q->mq_ops) 196 queued = blk_flush_queue_rq(rq);
191 blk_mq_flush_data_insert(rq);
192 else {
193 list_add(&rq->queuelist, &q->queue_head);
194 queued = true;
195 }
196 break; 197 break;
197 198
198 case REQ_FSEQ_DONE: 199 case REQ_FSEQ_DONE:
@@ -216,9 +217,6 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
216 } 217 }
217 218
218 kicked = blk_kick_flush(q); 219 kicked = blk_kick_flush(q);
219 /* blk_mq_run_flush will run queue */
220 if (q->mq_ops)
221 return queued;
222 return kicked | queued; 220 return kicked | queued;
223} 221}
224 222
@@ -230,10 +228,9 @@ static void flush_end_io(struct request *flush_rq, int error)
230 struct request *rq, *n; 228 struct request *rq, *n;
231 unsigned long flags = 0; 229 unsigned long flags = 0;
232 230
233 if (q->mq_ops) { 231 if (q->mq_ops)
234 blk_mq_free_request(flush_rq);
235 spin_lock_irqsave(&q->mq_flush_lock, flags); 232 spin_lock_irqsave(&q->mq_flush_lock, flags);
236 } 233
237 running = &q->flush_queue[q->flush_running_idx]; 234 running = &q->flush_queue[q->flush_running_idx];
238 BUG_ON(q->flush_pending_idx == q->flush_running_idx); 235 BUG_ON(q->flush_pending_idx == q->flush_running_idx);
239 236
@@ -263,49 +260,14 @@ static void flush_end_io(struct request *flush_rq, int error)
263 * kblockd. 260 * kblockd.
264 */ 261 */
265 if (queued || q->flush_queue_delayed) { 262 if (queued || q->flush_queue_delayed) {
266 if (!q->mq_ops) 263 WARN_ON(q->mq_ops);
267 blk_run_queue_async(q); 264 blk_run_queue_async(q);
268 else
269 /*
270 * This can be optimized to only run queues with requests
271 * queued if necessary.
272 */
273 blk_mq_run_queues(q, true);
274 } 265 }
275 q->flush_queue_delayed = 0; 266 q->flush_queue_delayed = 0;
276 if (q->mq_ops) 267 if (q->mq_ops)
277 spin_unlock_irqrestore(&q->mq_flush_lock, flags); 268 spin_unlock_irqrestore(&q->mq_flush_lock, flags);
278} 269}
279 270
280static void mq_flush_work(struct work_struct *work)
281{
282 struct request_queue *q;
283 struct request *rq;
284
285 q = container_of(work, struct request_queue, mq_flush_work);
286
287 /* We don't need set REQ_FLUSH_SEQ, it's for consistency */
288 rq = blk_mq_alloc_request(q, WRITE_FLUSH|REQ_FLUSH_SEQ,
289 __GFP_WAIT|GFP_ATOMIC, true);
290 rq->cmd_type = REQ_TYPE_FS;
291 rq->end_io = flush_end_io;
292
293 blk_mq_run_request(rq, true, false);
294}
295
296/*
297 * We can't directly use q->flush_rq, because it doesn't have tag and is not in
298 * hctx->rqs[]. so we must allocate a new request, since we can't sleep here,
299 * so offload the work to workqueue.
300 *
301 * Note: we assume a flush request finished in any hardware queue will flush
302 * the whole disk cache.
303 */
304static void mq_run_flush(struct request_queue *q)
305{
306 kblockd_schedule_work(q, &q->mq_flush_work);
307}
308
309/** 271/**
310 * blk_kick_flush - consider issuing flush request 272 * blk_kick_flush - consider issuing flush request
311 * @q: request_queue being kicked 273 * @q: request_queue being kicked
@@ -340,19 +302,31 @@ static bool blk_kick_flush(struct request_queue *q)
340 * different from running_idx, which means flush is in flight. 302 * different from running_idx, which means flush is in flight.
341 */ 303 */
342 q->flush_pending_idx ^= 1; 304 q->flush_pending_idx ^= 1;
305
343 if (q->mq_ops) { 306 if (q->mq_ops) {
344 mq_run_flush(q); 307 struct blk_mq_ctx *ctx = first_rq->mq_ctx;
345 return true; 308 struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q, ctx->cpu);
309
310 blk_mq_rq_init(hctx, q->flush_rq);
311 q->flush_rq->mq_ctx = ctx;
312
313 /*
314 * Reuse the tag value from the fist waiting request,
315 * with blk-mq the tag is generated during request
316 * allocation and drivers can rely on it being inside
317 * the range they asked for.
318 */
319 q->flush_rq->tag = first_rq->tag;
320 } else {
321 blk_rq_init(q, q->flush_rq);
346 } 322 }
347 323
348 blk_rq_init(q, &q->flush_rq); 324 q->flush_rq->cmd_type = REQ_TYPE_FS;
349 q->flush_rq.cmd_type = REQ_TYPE_FS; 325 q->flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
350 q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; 326 q->flush_rq->rq_disk = first_rq->rq_disk;
351 q->flush_rq.rq_disk = first_rq->rq_disk; 327 q->flush_rq->end_io = flush_end_io;
352 q->flush_rq.end_io = flush_end_io;
353 328
354 list_add_tail(&q->flush_rq.queuelist, &q->queue_head); 329 return blk_flush_queue_rq(q->flush_rq);
355 return true;
356} 330}
357 331
358static void flush_data_end_io(struct request *rq, int error) 332static void flush_data_end_io(struct request *rq, int error)
@@ -548,7 +522,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
548 * copied from blk_rq_pos(rq). 522 * copied from blk_rq_pos(rq).
549 */ 523 */
550 if (error_sector) 524 if (error_sector)
551 *error_sector = bio->bi_sector; 525 *error_sector = bio->bi_iter.bi_sector;
552 526
553 bio_put(bio); 527 bio_put(bio);
554 return ret; 528 return ret;
@@ -558,5 +532,4 @@ EXPORT_SYMBOL(blkdev_issue_flush);
558void blk_mq_init_flush(struct request_queue *q) 532void blk_mq_init_flush(struct request_queue *q)
559{ 533{
560 spin_lock_init(&q->mq_flush_lock); 534 spin_lock_init(&q->mq_flush_lock);
561 INIT_WORK(&q->mq_flush_work, mq_flush_work);
562} 535}
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 03cf7179e8ef..7fbab84399e6 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -43,30 +43,32 @@ static const char *bi_unsupported_name = "unsupported";
43 */ 43 */
44int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio) 44int blk_rq_count_integrity_sg(struct request_queue *q, struct bio *bio)
45{ 45{
46 struct bio_vec *iv, *ivprv = NULL; 46 struct bio_vec iv, ivprv = { NULL };
47 unsigned int segments = 0; 47 unsigned int segments = 0;
48 unsigned int seg_size = 0; 48 unsigned int seg_size = 0;
49 unsigned int i = 0; 49 struct bvec_iter iter;
50 int prev = 0;
50 51
51 bio_for_each_integrity_vec(iv, bio, i) { 52 bio_for_each_integrity_vec(iv, bio, iter) {
52 53
53 if (ivprv) { 54 if (prev) {
54 if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) 55 if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
55 goto new_segment; 56 goto new_segment;
56 57
57 if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) 58 if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
58 goto new_segment; 59 goto new_segment;
59 60
60 if (seg_size + iv->bv_len > queue_max_segment_size(q)) 61 if (seg_size + iv.bv_len > queue_max_segment_size(q))
61 goto new_segment; 62 goto new_segment;
62 63
63 seg_size += iv->bv_len; 64 seg_size += iv.bv_len;
64 } else { 65 } else {
65new_segment: 66new_segment:
66 segments++; 67 segments++;
67 seg_size = iv->bv_len; 68 seg_size = iv.bv_len;
68 } 69 }
69 70
71 prev = 1;
70 ivprv = iv; 72 ivprv = iv;
71 } 73 }
72 74
@@ -87,24 +89,25 @@ EXPORT_SYMBOL(blk_rq_count_integrity_sg);
87int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio, 89int blk_rq_map_integrity_sg(struct request_queue *q, struct bio *bio,
88 struct scatterlist *sglist) 90 struct scatterlist *sglist)
89{ 91{
90 struct bio_vec *iv, *ivprv = NULL; 92 struct bio_vec iv, ivprv = { NULL };
91 struct scatterlist *sg = NULL; 93 struct scatterlist *sg = NULL;
92 unsigned int segments = 0; 94 unsigned int segments = 0;
93 unsigned int i = 0; 95 struct bvec_iter iter;
96 int prev = 0;
94 97
95 bio_for_each_integrity_vec(iv, bio, i) { 98 bio_for_each_integrity_vec(iv, bio, iter) {
96 99
97 if (ivprv) { 100 if (prev) {
98 if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) 101 if (!BIOVEC_PHYS_MERGEABLE(&ivprv, &iv))
99 goto new_segment; 102 goto new_segment;
100 103
101 if (!BIOVEC_SEG_BOUNDARY(q, ivprv, iv)) 104 if (!BIOVEC_SEG_BOUNDARY(q, &ivprv, &iv))
102 goto new_segment; 105 goto new_segment;
103 106
104 if (sg->length + iv->bv_len > queue_max_segment_size(q)) 107 if (sg->length + iv.bv_len > queue_max_segment_size(q))
105 goto new_segment; 108 goto new_segment;
106 109
107 sg->length += iv->bv_len; 110 sg->length += iv.bv_len;
108 } else { 111 } else {
109new_segment: 112new_segment:
110 if (!sg) 113 if (!sg)
@@ -114,10 +117,11 @@ new_segment:
114 sg = sg_next(sg); 117 sg = sg_next(sg);
115 } 118 }
116 119
117 sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset); 120 sg_set_page(sg, iv.bv_page, iv.bv_len, iv.bv_offset);
118 segments++; 121 segments++;
119 } 122 }
120 123
124 prev = 1;
121 ivprv = iv; 125 ivprv = iv;
122 } 126 }
123 127
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 9b5b561cb928..97a733cf3d5f 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -108,17 +108,25 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
108 req_sects = end_sect - sector; 108 req_sects = end_sect - sector;
109 } 109 }
110 110
111 bio->bi_sector = sector; 111 bio->bi_iter.bi_sector = sector;
112 bio->bi_end_io = bio_batch_end_io; 112 bio->bi_end_io = bio_batch_end_io;
113 bio->bi_bdev = bdev; 113 bio->bi_bdev = bdev;
114 bio->bi_private = &bb; 114 bio->bi_private = &bb;
115 115
116 bio->bi_size = req_sects << 9; 116 bio->bi_iter.bi_size = req_sects << 9;
117 nr_sects -= req_sects; 117 nr_sects -= req_sects;
118 sector = end_sect; 118 sector = end_sect;
119 119
120 atomic_inc(&bb.done); 120 atomic_inc(&bb.done);
121 submit_bio(type, bio); 121 submit_bio(type, bio);
122
123 /*
124 * We can loop for a long time in here, if someone does
125 * full device discards (like mkfs). Be nice and allow
126 * us to schedule out to avoid softlocking if preempt
127 * is disabled.
128 */
129 cond_resched();
122 } 130 }
123 blk_finish_plug(&plug); 131 blk_finish_plug(&plug);
124 132
@@ -174,7 +182,7 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
174 break; 182 break;
175 } 183 }
176 184
177 bio->bi_sector = sector; 185 bio->bi_iter.bi_sector = sector;
178 bio->bi_end_io = bio_batch_end_io; 186 bio->bi_end_io = bio_batch_end_io;
179 bio->bi_bdev = bdev; 187 bio->bi_bdev = bdev;
180 bio->bi_private = &bb; 188 bio->bi_private = &bb;
@@ -184,11 +192,11 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
184 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); 192 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
185 193
186 if (nr_sects > max_write_same_sectors) { 194 if (nr_sects > max_write_same_sectors) {
187 bio->bi_size = max_write_same_sectors << 9; 195 bio->bi_iter.bi_size = max_write_same_sectors << 9;
188 nr_sects -= max_write_same_sectors; 196 nr_sects -= max_write_same_sectors;
189 sector += max_write_same_sectors; 197 sector += max_write_same_sectors;
190 } else { 198 } else {
191 bio->bi_size = nr_sects << 9; 199 bio->bi_iter.bi_size = nr_sects << 9;
192 nr_sects = 0; 200 nr_sects = 0;
193 } 201 }
194 202
@@ -240,7 +248,7 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
240 break; 248 break;
241 } 249 }
242 250
243 bio->bi_sector = sector; 251 bio->bi_iter.bi_sector = sector;
244 bio->bi_bdev = bdev; 252 bio->bi_bdev = bdev;
245 bio->bi_end_io = bio_batch_end_io; 253 bio->bi_end_io = bio_batch_end_io;
246 bio->bi_private = &bb; 254 bio->bi_private = &bb;
diff --git a/block/blk-map.c b/block/blk-map.c
index 62382ad5b010..cca6356d216d 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -20,7 +20,7 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq,
20 rq->biotail->bi_next = bio; 20 rq->biotail->bi_next = bio;
21 rq->biotail = bio; 21 rq->biotail = bio;
22 22
23 rq->__data_len += bio->bi_size; 23 rq->__data_len += bio->bi_iter.bi_size;
24 } 24 }
25 return 0; 25 return 0;
26} 26}
@@ -76,7 +76,7 @@ static int __blk_rq_map_user(struct request_queue *q, struct request *rq,
76 76
77 ret = blk_rq_append_bio(q, rq, bio); 77 ret = blk_rq_append_bio(q, rq, bio);
78 if (!ret) 78 if (!ret)
79 return bio->bi_size; 79 return bio->bi_iter.bi_size;
80 80
81 /* if it was boucned we must call the end io function */ 81 /* if it was boucned we must call the end io function */
82 bio_endio(bio, 0); 82 bio_endio(bio, 0);
@@ -220,7 +220,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
220 if (IS_ERR(bio)) 220 if (IS_ERR(bio))
221 return PTR_ERR(bio); 221 return PTR_ERR(bio);
222 222
223 if (bio->bi_size != len) { 223 if (bio->bi_iter.bi_size != len) {
224 /* 224 /*
225 * Grab an extra reference to this bio, as bio_unmap_user() 225 * Grab an extra reference to this bio, as bio_unmap_user()
226 * expects to be able to drop it twice as it happens on the 226 * expects to be able to drop it twice as it happens on the
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 1ffc58977835..6c583f9c5b65 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -12,38 +12,47 @@
12static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 12static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
13 struct bio *bio) 13 struct bio *bio)
14{ 14{
15 struct bio_vec *bv, *bvprv = NULL; 15 struct bio_vec bv, bvprv = { NULL };
16 int cluster, i, high, highprv = 1; 16 int cluster, high, highprv = 1;
17 unsigned int seg_size, nr_phys_segs; 17 unsigned int seg_size, nr_phys_segs;
18 struct bio *fbio, *bbio; 18 struct bio *fbio, *bbio;
19 struct bvec_iter iter;
19 20
20 if (!bio) 21 if (!bio)
21 return 0; 22 return 0;
22 23
24 /*
25 * This should probably be returning 0, but blk_add_request_payload()
26 * (Christoph!!!!)
27 */
28 if (bio->bi_rw & REQ_DISCARD)
29 return 1;
30
31 if (bio->bi_rw & REQ_WRITE_SAME)
32 return 1;
33
23 fbio = bio; 34 fbio = bio;
24 cluster = blk_queue_cluster(q); 35 cluster = blk_queue_cluster(q);
25 seg_size = 0; 36 seg_size = 0;
26 nr_phys_segs = 0; 37 nr_phys_segs = 0;
27 for_each_bio(bio) { 38 for_each_bio(bio) {
28 bio_for_each_segment(bv, bio, i) { 39 bio_for_each_segment(bv, bio, iter) {
29 /* 40 /*
30 * the trick here is making sure that a high page is 41 * the trick here is making sure that a high page is
31 * never considered part of another segment, since that 42 * never considered part of another segment, since that
32 * might change with the bounce page. 43 * might change with the bounce page.
33 */ 44 */
34 high = page_to_pfn(bv->bv_page) > queue_bounce_pfn(q); 45 high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q);
35 if (high || highprv) 46 if (!high && !highprv && cluster) {
36 goto new_segment; 47 if (seg_size + bv.bv_len
37 if (cluster) {
38 if (seg_size + bv->bv_len
39 > queue_max_segment_size(q)) 48 > queue_max_segment_size(q))
40 goto new_segment; 49 goto new_segment;
41 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) 50 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv))
42 goto new_segment; 51 goto new_segment;
43 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) 52 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv))
44 goto new_segment; 53 goto new_segment;
45 54
46 seg_size += bv->bv_len; 55 seg_size += bv.bv_len;
47 bvprv = bv; 56 bvprv = bv;
48 continue; 57 continue;
49 } 58 }
@@ -54,7 +63,7 @@ new_segment:
54 63
55 nr_phys_segs++; 64 nr_phys_segs++;
56 bvprv = bv; 65 bvprv = bv;
57 seg_size = bv->bv_len; 66 seg_size = bv.bv_len;
58 highprv = high; 67 highprv = high;
59 } 68 }
60 bbio = bio; 69 bbio = bio;
@@ -87,6 +96,9 @@ EXPORT_SYMBOL(blk_recount_segments);
87static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 96static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
88 struct bio *nxt) 97 struct bio *nxt)
89{ 98{
99 struct bio_vec end_bv = { NULL }, nxt_bv;
100 struct bvec_iter iter;
101
90 if (!blk_queue_cluster(q)) 102 if (!blk_queue_cluster(q))
91 return 0; 103 return 0;
92 104
@@ -97,34 +109,40 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
97 if (!bio_has_data(bio)) 109 if (!bio_has_data(bio))
98 return 1; 110 return 1;
99 111
100 if (!BIOVEC_PHYS_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt))) 112 bio_for_each_segment(end_bv, bio, iter)
113 if (end_bv.bv_len == iter.bi_size)
114 break;
115
116 nxt_bv = bio_iovec(nxt);
117
118 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv))
101 return 0; 119 return 0;
102 120
103 /* 121 /*
104 * bio and nxt are contiguous in memory; check if the queue allows 122 * bio and nxt are contiguous in memory; check if the queue allows
105 * these two to be merged into one 123 * these two to be merged into one
106 */ 124 */
107 if (BIO_SEG_BOUNDARY(q, bio, nxt)) 125 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv))
108 return 1; 126 return 1;
109 127
110 return 0; 128 return 0;
111} 129}
112 130
113static void 131static inline void
114__blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 132__blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec,
115 struct scatterlist *sglist, struct bio_vec **bvprv, 133 struct scatterlist *sglist, struct bio_vec *bvprv,
116 struct scatterlist **sg, int *nsegs, int *cluster) 134 struct scatterlist **sg, int *nsegs, int *cluster)
117{ 135{
118 136
119 int nbytes = bvec->bv_len; 137 int nbytes = bvec->bv_len;
120 138
121 if (*bvprv && *cluster) { 139 if (*sg && *cluster) {
122 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 140 if ((*sg)->length + nbytes > queue_max_segment_size(q))
123 goto new_segment; 141 goto new_segment;
124 142
125 if (!BIOVEC_PHYS_MERGEABLE(*bvprv, bvec)) 143 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec))
126 goto new_segment; 144 goto new_segment;
127 if (!BIOVEC_SEG_BOUNDARY(q, *bvprv, bvec)) 145 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec))
128 goto new_segment; 146 goto new_segment;
129 147
130 (*sg)->length += nbytes; 148 (*sg)->length += nbytes;
@@ -150,7 +168,49 @@ new_segment:
150 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 168 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset);
151 (*nsegs)++; 169 (*nsegs)++;
152 } 170 }
153 *bvprv = bvec; 171 *bvprv = *bvec;
172}
173
174static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
175 struct scatterlist *sglist,
176 struct scatterlist **sg)
177{
178 struct bio_vec bvec, bvprv = { NULL };
179 struct bvec_iter iter;
180 int nsegs, cluster;
181
182 nsegs = 0;
183 cluster = blk_queue_cluster(q);
184
185 if (bio->bi_rw & REQ_DISCARD) {
186 /*
187 * This is a hack - drivers should be neither modifying the
188 * biovec, nor relying on bi_vcnt - but because of
189 * blk_add_request_payload(), a discard bio may or may not have
190 * a payload we need to set up here (thank you Christoph) and
191 * bi_vcnt is really the only way of telling if we need to.
192 */
193
194 if (bio->bi_vcnt)
195 goto single_segment;
196
197 return 0;
198 }
199
200 if (bio->bi_rw & REQ_WRITE_SAME) {
201single_segment:
202 *sg = sglist;
203 bvec = bio_iovec(bio);
204 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset);
205 return 1;
206 }
207
208 for_each_bio(bio)
209 bio_for_each_segment(bvec, bio, iter)
210 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg,
211 &nsegs, &cluster);
212
213 return nsegs;
154} 214}
155 215
156/* 216/*
@@ -160,24 +220,11 @@ new_segment:
160int blk_rq_map_sg(struct request_queue *q, struct request *rq, 220int blk_rq_map_sg(struct request_queue *q, struct request *rq,
161 struct scatterlist *sglist) 221 struct scatterlist *sglist)
162{ 222{
163 struct bio_vec *bvec, *bvprv; 223 struct scatterlist *sg = NULL;
164 struct req_iterator iter; 224 int nsegs = 0;
165 struct scatterlist *sg;
166 int nsegs, cluster;
167
168 nsegs = 0;
169 cluster = blk_queue_cluster(q);
170
171 /*
172 * for each bio in rq
173 */
174 bvprv = NULL;
175 sg = NULL;
176 rq_for_each_segment(bvec, rq, iter) {
177 __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg,
178 &nsegs, &cluster);
179 } /* segments in rq */
180 225
226 if (rq->bio)
227 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg);
181 228
182 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 229 if (unlikely(rq->cmd_flags & REQ_COPY_USER) &&
183 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 230 (blk_rq_bytes(rq) & q->dma_pad_mask)) {
@@ -223,21 +270,13 @@ EXPORT_SYMBOL(blk_rq_map_sg);
223int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 270int blk_bio_map_sg(struct request_queue *q, struct bio *bio,
224 struct scatterlist *sglist) 271 struct scatterlist *sglist)
225{ 272{
226 struct bio_vec *bvec, *bvprv; 273 struct scatterlist *sg = NULL;
227 struct scatterlist *sg; 274 int nsegs;
228 int nsegs, cluster; 275 struct bio *next = bio->bi_next;
229 unsigned long i; 276 bio->bi_next = NULL;
230
231 nsegs = 0;
232 cluster = blk_queue_cluster(q);
233
234 bvprv = NULL;
235 sg = NULL;
236 bio_for_each_segment(bvec, bio, i) {
237 __blk_segment_map_sg(q, bvec, sglist, &bvprv, &sg,
238 &nsegs, &cluster);
239 } /* segments in bio */
240 277
278 nsegs = __blk_bios_map_sg(q, bio, sglist, &sg);
279 bio->bi_next = next;
241 if (sg) 280 if (sg)
242 sg_mark_end(sg); 281 sg_mark_end(sg);
243 282
@@ -543,9 +582,9 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
543 582
544int blk_try_merge(struct request *rq, struct bio *bio) 583int blk_try_merge(struct request *rq, struct bio *bio)
545{ 584{
546 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_sector) 585 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
547 return ELEVATOR_BACK_MERGE; 586 return ELEVATOR_BACK_MERGE;
548 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_sector) 587 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
549 return ELEVATOR_FRONT_MERGE; 588 return ELEVATOR_FRONT_MERGE;
550 return ELEVATOR_NO_MERGE; 589 return ELEVATOR_NO_MERGE;
551} 590}
diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c
index 0045ace9bdf0..3146befb56aa 100644
--- a/block/blk-mq-cpu.c
+++ b/block/blk-mq-cpu.c
@@ -28,36 +28,6 @@ static int blk_mq_main_cpu_notify(struct notifier_block *self,
28 return NOTIFY_OK; 28 return NOTIFY_OK;
29} 29}
30 30
31static void blk_mq_cpu_notify(void *data, unsigned long action,
32 unsigned int cpu)
33{
34 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
35 /*
36 * If the CPU goes away, ensure that we run any pending
37 * completions.
38 */
39 struct llist_node *node;
40 struct request *rq;
41
42 local_irq_disable();
43
44 node = llist_del_all(&per_cpu(ipi_lists, cpu));
45 while (node) {
46 struct llist_node *next = node->next;
47
48 rq = llist_entry(node, struct request, ll_list);
49 __blk_mq_end_io(rq, rq->errors);
50 node = next;
51 }
52
53 local_irq_enable();
54 }
55}
56
57static struct notifier_block __cpuinitdata blk_mq_main_cpu_notifier = {
58 .notifier_call = blk_mq_main_cpu_notify,
59};
60
61void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) 31void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
62{ 32{
63 BUG_ON(!notifier->notify); 33 BUG_ON(!notifier->notify);
@@ -82,12 +52,7 @@ void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
82 notifier->data = data; 52 notifier->data = data;
83} 53}
84 54
85static struct blk_mq_cpu_notifier __cpuinitdata cpu_notifier = {
86 .notify = blk_mq_cpu_notify,
87};
88
89void __init blk_mq_cpu_init(void) 55void __init blk_mq_cpu_init(void)
90{ 56{
91 register_hotcpu_notifier(&blk_mq_main_cpu_notifier); 57 hotcpu_notifier(blk_mq_main_cpu_notify, 0);
92 blk_mq_register_cpu_notifier(&cpu_notifier);
93} 58}
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index ba6cf8e9aa0a..b91ce75bd35d 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -335,9 +335,22 @@ static struct kobj_type blk_mq_hw_ktype = {
335void blk_mq_unregister_disk(struct gendisk *disk) 335void blk_mq_unregister_disk(struct gendisk *disk)
336{ 336{
337 struct request_queue *q = disk->queue; 337 struct request_queue *q = disk->queue;
338 struct blk_mq_hw_ctx *hctx;
339 struct blk_mq_ctx *ctx;
340 int i, j;
341
342 queue_for_each_hw_ctx(q, hctx, i) {
343 hctx_for_each_ctx(hctx, ctx, j) {
344 kobject_del(&ctx->kobj);
345 kobject_put(&ctx->kobj);
346 }
347 kobject_del(&hctx->kobj);
348 kobject_put(&hctx->kobj);
349 }
338 350
339 kobject_uevent(&q->mq_kobj, KOBJ_REMOVE); 351 kobject_uevent(&q->mq_kobj, KOBJ_REMOVE);
340 kobject_del(&q->mq_kobj); 352 kobject_del(&q->mq_kobj);
353 kobject_put(&q->mq_kobj);
341 354
342 kobject_put(&disk_to_dev(disk)->kobj); 355 kobject_put(&disk_to_dev(disk)->kobj);
343} 356}
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index d64a02fb1f73..83ae96c51a27 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -36,7 +36,8 @@ static unsigned int __blk_mq_get_tag(struct blk_mq_tags *tags, gfp_t gfp)
36{ 36{
37 int tag; 37 int tag;
38 38
39 tag = percpu_ida_alloc(&tags->free_tags, gfp); 39 tag = percpu_ida_alloc(&tags->free_tags, (gfp & __GFP_WAIT) ?
40 TASK_UNINTERRUPTIBLE : TASK_RUNNING);
40 if (tag < 0) 41 if (tag < 0)
41 return BLK_MQ_TAG_FAIL; 42 return BLK_MQ_TAG_FAIL;
42 return tag + tags->nr_reserved_tags; 43 return tag + tags->nr_reserved_tags;
@@ -52,7 +53,8 @@ static unsigned int __blk_mq_get_reserved_tag(struct blk_mq_tags *tags,
52 return BLK_MQ_TAG_FAIL; 53 return BLK_MQ_TAG_FAIL;
53 } 54 }
54 55
55 tag = percpu_ida_alloc(&tags->reserved_tags, gfp); 56 tag = percpu_ida_alloc(&tags->reserved_tags, (gfp & __GFP_WAIT) ?
57 TASK_UNINTERRUPTIBLE : TASK_RUNNING);
56 if (tag < 0) 58 if (tag < 0)
57 return BLK_MQ_TAG_FAIL; 59 return BLK_MQ_TAG_FAIL;
58 return tag; 60 return tag;
@@ -182,7 +184,7 @@ void blk_mq_free_tags(struct blk_mq_tags *tags)
182ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page) 184ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
183{ 185{
184 char *orig_page = page; 186 char *orig_page = page;
185 int cpu; 187 unsigned int cpu;
186 188
187 if (!tags) 189 if (!tags)
188 return 0; 190 return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c79126e11030..1fa9dd153fde 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -27,8 +27,6 @@ static LIST_HEAD(all_q_list);
27 27
28static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx); 28static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx);
29 29
30DEFINE_PER_CPU(struct llist_head, ipi_lists);
31
32static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, 30static struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q,
33 unsigned int cpu) 31 unsigned int cpu)
34{ 32{
@@ -106,10 +104,13 @@ static int blk_mq_queue_enter(struct request_queue *q)
106 104
107 spin_lock_irq(q->queue_lock); 105 spin_lock_irq(q->queue_lock);
108 ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, 106 ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
109 !blk_queue_bypass(q), *q->queue_lock); 107 !blk_queue_bypass(q) || blk_queue_dying(q),
108 *q->queue_lock);
110 /* inc usage with lock hold to avoid freeze_queue runs here */ 109 /* inc usage with lock hold to avoid freeze_queue runs here */
111 if (!ret) 110 if (!ret && !blk_queue_dying(q))
112 __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); 111 __percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
112 else if (blk_queue_dying(q))
113 ret = -ENODEV;
113 spin_unlock_irq(q->queue_lock); 114 spin_unlock_irq(q->queue_lock);
114 115
115 return ret; 116 return ret;
@@ -120,6 +121,22 @@ static void blk_mq_queue_exit(struct request_queue *q)
120 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); 121 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
121} 122}
122 123
124static void __blk_mq_drain_queue(struct request_queue *q)
125{
126 while (true) {
127 s64 count;
128
129 spin_lock_irq(q->queue_lock);
130 count = percpu_counter_sum(&q->mq_usage_counter);
131 spin_unlock_irq(q->queue_lock);
132
133 if (count == 0)
134 break;
135 blk_mq_run_queues(q, false);
136 msleep(10);
137 }
138}
139
123/* 140/*
124 * Guarantee no request is in use, so we can change any data structure of 141 * Guarantee no request is in use, so we can change any data structure of
125 * the queue afterward. 142 * the queue afterward.
@@ -133,21 +150,13 @@ static void blk_mq_freeze_queue(struct request_queue *q)
133 queue_flag_set(QUEUE_FLAG_BYPASS, q); 150 queue_flag_set(QUEUE_FLAG_BYPASS, q);
134 spin_unlock_irq(q->queue_lock); 151 spin_unlock_irq(q->queue_lock);
135 152
136 if (!drain) 153 if (drain)
137 return; 154 __blk_mq_drain_queue(q);
138 155}
139 while (true) {
140 s64 count;
141
142 spin_lock_irq(q->queue_lock);
143 count = percpu_counter_sum(&q->mq_usage_counter);
144 spin_unlock_irq(q->queue_lock);
145 156
146 if (count == 0) 157void blk_mq_drain_queue(struct request_queue *q)
147 break; 158{
148 blk_mq_run_queues(q, false); 159 __blk_mq_drain_queue(q);
149 msleep(10);
150 }
151} 160}
152 161
153static void blk_mq_unfreeze_queue(struct request_queue *q) 162static void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -179,6 +188,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
179 188
180 rq->mq_ctx = ctx; 189 rq->mq_ctx = ctx;
181 rq->cmd_flags = rw_flags; 190 rq->cmd_flags = rw_flags;
191 rq->start_time = jiffies;
192 set_start_time_ns(rq);
182 ctx->rq_dispatched[rw_is_sync(rw_flags)]++; 193 ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
183} 194}
184 195
@@ -215,15 +226,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
215 return rq; 226 return rq;
216} 227}
217 228
218struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 229struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
219 gfp_t gfp, bool reserved)
220{ 230{
221 struct request *rq; 231 struct request *rq;
222 232
223 if (blk_mq_queue_enter(q)) 233 if (blk_mq_queue_enter(q))
224 return NULL; 234 return NULL;
225 235
226 rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); 236 rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
227 if (rq) 237 if (rq)
228 blk_mq_put_ctx(rq->mq_ctx); 238 blk_mq_put_ctx(rq->mq_ctx);
229 return rq; 239 return rq;
@@ -247,7 +257,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
247/* 257/*
248 * Re-init and set pdu, if we have it 258 * Re-init and set pdu, if we have it
249 */ 259 */
250static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) 260void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq)
251{ 261{
252 blk_rq_init(hctx->queue, rq); 262 blk_rq_init(hctx->queue, rq);
253 263
@@ -294,7 +304,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error)
294 bio_endio(bio, error); 304 bio_endio(bio, error);
295} 305}
296 306
297void blk_mq_complete_request(struct request *rq, int error) 307void blk_mq_end_io(struct request *rq, int error)
298{ 308{
299 struct bio *bio = rq->bio; 309 struct bio *bio = rq->bio;
300 unsigned int bytes = 0; 310 unsigned int bytes = 0;
@@ -305,7 +315,7 @@ void blk_mq_complete_request(struct request *rq, int error)
305 struct bio *next = bio->bi_next; 315 struct bio *next = bio->bi_next;
306 316
307 bio->bi_next = NULL; 317 bio->bi_next = NULL;
308 bytes += bio->bi_size; 318 bytes += bio->bi_iter.bi_size;
309 blk_mq_bio_endio(rq, bio, error); 319 blk_mq_bio_endio(rq, bio, error);
310 bio = next; 320 bio = next;
311 } 321 }
@@ -319,87 +329,55 @@ void blk_mq_complete_request(struct request *rq, int error)
319 else 329 else
320 blk_mq_free_request(rq); 330 blk_mq_free_request(rq);
321} 331}
332EXPORT_SYMBOL(blk_mq_end_io);
322 333
323void __blk_mq_end_io(struct request *rq, int error) 334static void __blk_mq_complete_request_remote(void *data)
324{
325 if (!blk_mark_rq_complete(rq))
326 blk_mq_complete_request(rq, error);
327}
328
329#if defined(CONFIG_SMP)
330
331/*
332 * Called with interrupts disabled.
333 */
334static void ipi_end_io(void *data)
335{
336 struct llist_head *list = &per_cpu(ipi_lists, smp_processor_id());
337 struct llist_node *entry, *next;
338 struct request *rq;
339
340 entry = llist_del_all(list);
341
342 while (entry) {
343 next = entry->next;
344 rq = llist_entry(entry, struct request, ll_list);
345 __blk_mq_end_io(rq, rq->errors);
346 entry = next;
347 }
348}
349
350static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
351 struct request *rq, const int error)
352{ 335{
353 struct call_single_data *data = &rq->csd; 336 struct request *rq = data;
354
355 rq->errors = error;
356 rq->ll_list.next = NULL;
357
358 /*
359 * If the list is non-empty, an existing IPI must already
360 * be "in flight". If that is the case, we need not schedule
361 * a new one.
362 */
363 if (llist_add(&rq->ll_list, &per_cpu(ipi_lists, ctx->cpu))) {
364 data->func = ipi_end_io;
365 data->flags = 0;
366 __smp_call_function_single(ctx->cpu, data, 0);
367 }
368 337
369 return true; 338 rq->q->softirq_done_fn(rq);
370}
371#else /* CONFIG_SMP */
372static int ipi_remote_cpu(struct blk_mq_ctx *ctx, const int cpu,
373 struct request *rq, const int error)
374{
375 return false;
376} 339}
377#endif
378 340
379/* 341void __blk_mq_complete_request(struct request *rq)
380 * End IO on this request on a multiqueue enabled driver. We'll either do
381 * it directly inline, or punt to a local IPI handler on the matching
382 * remote CPU.
383 */
384void blk_mq_end_io(struct request *rq, int error)
385{ 342{
386 struct blk_mq_ctx *ctx = rq->mq_ctx; 343 struct blk_mq_ctx *ctx = rq->mq_ctx;
387 int cpu; 344 int cpu;
388 345
389 if (!ctx->ipi_redirect) 346 if (!ctx->ipi_redirect) {
390 return __blk_mq_end_io(rq, error); 347 rq->q->softirq_done_fn(rq);
348 return;
349 }
391 350
392 cpu = get_cpu(); 351 cpu = get_cpu();
393 352 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
394 if (cpu == ctx->cpu || !cpu_online(ctx->cpu) || 353 rq->csd.func = __blk_mq_complete_request_remote;
395 !ipi_remote_cpu(ctx, cpu, rq, error)) 354 rq->csd.info = rq;
396 __blk_mq_end_io(rq, error); 355 rq->csd.flags = 0;
397 356 __smp_call_function_single(ctx->cpu, &rq->csd, 0);
357 } else {
358 rq->q->softirq_done_fn(rq);
359 }
398 put_cpu(); 360 put_cpu();
399} 361}
400EXPORT_SYMBOL(blk_mq_end_io);
401 362
402static void blk_mq_start_request(struct request *rq) 363/**
364 * blk_mq_complete_request - end I/O on a request
365 * @rq: the request being processed
366 *
367 * Description:
368 * Ends all I/O on a request. It does not handle partial completions.
369 * The actual completion happens out-of-order, through a IPI handler.
370 **/
371void blk_mq_complete_request(struct request *rq)
372{
373 if (unlikely(blk_should_fake_timeout(rq->q)))
374 return;
375 if (!blk_mark_rq_complete(rq))
376 __blk_mq_complete_request(rq);
377}
378EXPORT_SYMBOL(blk_mq_complete_request);
379
380static void blk_mq_start_request(struct request *rq, bool last)
403{ 381{
404 struct request_queue *q = rq->q; 382 struct request_queue *q = rq->q;
405 383
@@ -412,6 +390,25 @@ static void blk_mq_start_request(struct request *rq)
412 */ 390 */
413 rq->deadline = jiffies + q->rq_timeout; 391 rq->deadline = jiffies + q->rq_timeout;
414 set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 392 set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
393
394 if (q->dma_drain_size && blk_rq_bytes(rq)) {
395 /*
396 * Make sure space for the drain appears. We know we can do
397 * this because max_hw_segments has been adjusted to be one
398 * fewer than the device can handle.
399 */
400 rq->nr_phys_segments++;
401 }
402
403 /*
404 * Flag the last request in the series so that drivers know when IO
405 * should be kicked off, if they don't do it on a per-request basis.
406 *
407 * Note: the flag isn't the only condition drivers should do kick off.
408 * If drive is busy, the last request might not have the bit set.
409 */
410 if (last)
411 rq->cmd_flags |= REQ_END;
415} 412}
416 413
417static void blk_mq_requeue_request(struct request *rq) 414static void blk_mq_requeue_request(struct request *rq)
@@ -420,6 +417,11 @@ static void blk_mq_requeue_request(struct request *rq)
420 417
421 trace_block_rq_requeue(q, rq); 418 trace_block_rq_requeue(q, rq);
422 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 419 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
420
421 rq->cmd_flags &= ~REQ_END;
422
423 if (q->dma_drain_size && blk_rq_bytes(rq))
424 rq->nr_phys_segments--;
423} 425}
424 426
425struct blk_mq_timeout_data { 427struct blk_mq_timeout_data {
@@ -587,19 +589,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
587 589
588 rq = list_first_entry(&rq_list, struct request, queuelist); 590 rq = list_first_entry(&rq_list, struct request, queuelist);
589 list_del_init(&rq->queuelist); 591 list_del_init(&rq->queuelist);
590 blk_mq_start_request(rq);
591 592
592 /* 593 blk_mq_start_request(rq, list_empty(&rq_list));
593 * Last request in the series. Flag it as such, this
594 * enables drivers to know when IO should be kicked off,
595 * if they don't do it on a per-request basis.
596 *
597 * Note: the flag isn't the only condition drivers
598 * should do kick off. If drive is busy, the last
599 * request might not have the bit set.
600 */
601 if (list_empty(&rq_list))
602 rq->cmd_flags |= REQ_END;
603 594
604 ret = q->mq_ops->queue_rq(hctx, rq); 595 ret = q->mq_ops->queue_rq(hctx, rq);
605 switch (ret) { 596 switch (ret) {
@@ -617,8 +608,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
617 break; 608 break;
618 default: 609 default:
619 pr_err("blk-mq: bad return on queue: %d\n", ret); 610 pr_err("blk-mq: bad return on queue: %d\n", ret);
620 rq->errors = -EIO;
621 case BLK_MQ_RQ_QUEUE_ERROR: 611 case BLK_MQ_RQ_QUEUE_ERROR:
612 rq->errors = -EIO;
622 blk_mq_end_io(rq, rq->errors); 613 blk_mq_end_io(rq, rq->errors);
623 break; 614 break;
624 } 615 }
@@ -721,13 +712,16 @@ static void blk_mq_work_fn(struct work_struct *work)
721} 712}
722 713
723static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, 714static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
724 struct request *rq) 715 struct request *rq, bool at_head)
725{ 716{
726 struct blk_mq_ctx *ctx = rq->mq_ctx; 717 struct blk_mq_ctx *ctx = rq->mq_ctx;
727 718
728 trace_block_rq_insert(hctx->queue, rq); 719 trace_block_rq_insert(hctx->queue, rq);
729 720
730 list_add_tail(&rq->queuelist, &ctx->rq_list); 721 if (at_head)
722 list_add(&rq->queuelist, &ctx->rq_list);
723 else
724 list_add_tail(&rq->queuelist, &ctx->rq_list);
731 blk_mq_hctx_mark_pending(hctx, ctx); 725 blk_mq_hctx_mark_pending(hctx, ctx);
732 726
733 /* 727 /*
@@ -737,7 +731,7 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
737} 731}
738 732
739void blk_mq_insert_request(struct request_queue *q, struct request *rq, 733void blk_mq_insert_request(struct request_queue *q, struct request *rq,
740 bool run_queue) 734 bool at_head, bool run_queue)
741{ 735{
742 struct blk_mq_hw_ctx *hctx; 736 struct blk_mq_hw_ctx *hctx;
743 struct blk_mq_ctx *ctx, *current_ctx; 737 struct blk_mq_ctx *ctx, *current_ctx;
@@ -756,7 +750,7 @@ void blk_mq_insert_request(struct request_queue *q, struct request *rq,
756 rq->mq_ctx = ctx; 750 rq->mq_ctx = ctx;
757 } 751 }
758 spin_lock(&ctx->lock); 752 spin_lock(&ctx->lock);
759 __blk_mq_insert_request(hctx, rq); 753 __blk_mq_insert_request(hctx, rq, at_head);
760 spin_unlock(&ctx->lock); 754 spin_unlock(&ctx->lock);
761 755
762 blk_mq_put_ctx(current_ctx); 756 blk_mq_put_ctx(current_ctx);
@@ -788,7 +782,7 @@ void blk_mq_run_request(struct request *rq, bool run_queue, bool async)
788 782
789 /* ctx->cpu might be offline */ 783 /* ctx->cpu might be offline */
790 spin_lock(&ctx->lock); 784 spin_lock(&ctx->lock);
791 __blk_mq_insert_request(hctx, rq); 785 __blk_mq_insert_request(hctx, rq, false);
792 spin_unlock(&ctx->lock); 786 spin_unlock(&ctx->lock);
793 787
794 blk_mq_put_ctx(current_ctx); 788 blk_mq_put_ctx(current_ctx);
@@ -826,7 +820,7 @@ static void blk_mq_insert_requests(struct request_queue *q,
826 rq = list_first_entry(list, struct request, queuelist); 820 rq = list_first_entry(list, struct request, queuelist);
827 list_del_init(&rq->queuelist); 821 list_del_init(&rq->queuelist);
828 rq->mq_ctx = ctx; 822 rq->mq_ctx = ctx;
829 __blk_mq_insert_request(hctx, rq); 823 __blk_mq_insert_request(hctx, rq, false);
830 } 824 }
831 spin_unlock(&ctx->lock); 825 spin_unlock(&ctx->lock);
832 826
@@ -916,6 +910,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
916 910
917 blk_queue_bounce(q, &bio); 911 blk_queue_bounce(q, &bio);
918 912
913 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
914 bio_endio(bio, -EIO);
915 return;
916 }
917
919 if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) 918 if (use_plug && blk_attempt_plug_merge(q, bio, &request_count))
920 return; 919 return;
921 920
@@ -978,7 +977,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
978 __blk_mq_free_request(hctx, ctx, rq); 977 __blk_mq_free_request(hctx, ctx, rq);
979 else { 978 else {
980 blk_mq_bio_to_request(rq, bio); 979 blk_mq_bio_to_request(rq, bio);
981 __blk_mq_insert_request(hctx, rq); 980 __blk_mq_insert_request(hctx, rq, false);
982 } 981 }
983 982
984 spin_unlock(&ctx->lock); 983 spin_unlock(&ctx->lock);
@@ -1091,8 +1090,8 @@ static void blk_mq_free_rq_map(struct blk_mq_hw_ctx *hctx)
1091 struct page *page; 1090 struct page *page;
1092 1091
1093 while (!list_empty(&hctx->page_list)) { 1092 while (!list_empty(&hctx->page_list)) {
1094 page = list_first_entry(&hctx->page_list, struct page, list); 1093 page = list_first_entry(&hctx->page_list, struct page, lru);
1095 list_del_init(&page->list); 1094 list_del_init(&page->lru);
1096 __free_pages(page, page->private); 1095 __free_pages(page, page->private);
1097 } 1096 }
1098 1097
@@ -1156,7 +1155,7 @@ static int blk_mq_init_rq_map(struct blk_mq_hw_ctx *hctx,
1156 break; 1155 break;
1157 1156
1158 page->private = this_order; 1157 page->private = this_order;
1159 list_add_tail(&page->list, &hctx->page_list); 1158 list_add_tail(&page->lru, &hctx->page_list);
1160 1159
1161 p = page_address(page); 1160 p = page_address(page);
1162 entries_per_page = order_to_size(this_order) / rq_size; 1161 entries_per_page = order_to_size(this_order) / rq_size;
@@ -1337,15 +1336,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1337 reg->queue_depth = BLK_MQ_MAX_DEPTH; 1336 reg->queue_depth = BLK_MQ_MAX_DEPTH;
1338 } 1337 }
1339 1338
1340 /*
1341 * Set aside a tag for flush requests. It will only be used while
1342 * another flush request is in progress but outside the driver.
1343 *
1344 * TODO: only allocate if flushes are supported
1345 */
1346 reg->queue_depth++;
1347 reg->reserved_tags++;
1348
1349 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) 1339 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
1350 return ERR_PTR(-EINVAL); 1340 return ERR_PTR(-EINVAL);
1351 1341
@@ -1388,17 +1378,27 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1388 q->mq_ops = reg->ops; 1378 q->mq_ops = reg->ops;
1389 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; 1379 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
1390 1380
1381 q->sg_reserved_size = INT_MAX;
1382
1391 blk_queue_make_request(q, blk_mq_make_request); 1383 blk_queue_make_request(q, blk_mq_make_request);
1392 blk_queue_rq_timed_out(q, reg->ops->timeout); 1384 blk_queue_rq_timed_out(q, reg->ops->timeout);
1393 if (reg->timeout) 1385 if (reg->timeout)
1394 blk_queue_rq_timeout(q, reg->timeout); 1386 blk_queue_rq_timeout(q, reg->timeout);
1395 1387
1388 if (reg->ops->complete)
1389 blk_queue_softirq_done(q, reg->ops->complete);
1390
1396 blk_mq_init_flush(q); 1391 blk_mq_init_flush(q);
1397 blk_mq_init_cpu_queues(q, reg->nr_hw_queues); 1392 blk_mq_init_cpu_queues(q, reg->nr_hw_queues);
1398 1393
1399 if (blk_mq_init_hw_queues(q, reg, driver_data)) 1394 q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size,
1395 cache_line_size()), GFP_KERNEL);
1396 if (!q->flush_rq)
1400 goto err_hw; 1397 goto err_hw;
1401 1398
1399 if (blk_mq_init_hw_queues(q, reg, driver_data))
1400 goto err_flush_rq;
1401
1402 blk_mq_map_swqueue(q); 1402 blk_mq_map_swqueue(q);
1403 1403
1404 mutex_lock(&all_q_mutex); 1404 mutex_lock(&all_q_mutex);
@@ -1406,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1406 mutex_unlock(&all_q_mutex); 1406 mutex_unlock(&all_q_mutex);
1407 1407
1408 return q; 1408 return q;
1409
1410err_flush_rq:
1411 kfree(q->flush_rq);
1409err_hw: 1412err_hw:
1410 kfree(q->mq_map); 1413 kfree(q->mq_map);
1411err_map: 1414err_map:
@@ -1429,7 +1432,6 @@ void blk_mq_free_queue(struct request_queue *q)
1429 int i; 1432 int i;
1430 1433
1431 queue_for_each_hw_ctx(q, hctx, i) { 1434 queue_for_each_hw_ctx(q, hctx, i) {
1432 cancel_delayed_work_sync(&hctx->delayed_work);
1433 kfree(hctx->ctx_map); 1435 kfree(hctx->ctx_map);
1434 kfree(hctx->ctxs); 1436 kfree(hctx->ctxs);
1435 blk_mq_free_rq_map(hctx); 1437 blk_mq_free_rq_map(hctx);
@@ -1451,7 +1453,6 @@ void blk_mq_free_queue(struct request_queue *q)
1451 list_del_init(&q->all_q_node); 1453 list_del_init(&q->all_q_node);
1452 mutex_unlock(&all_q_mutex); 1454 mutex_unlock(&all_q_mutex);
1453} 1455}
1454EXPORT_SYMBOL(blk_mq_free_queue);
1455 1456
1456/* Basically redo blk_mq_init_queue with queue frozen */ 1457/* Basically redo blk_mq_init_queue with queue frozen */
1457static void blk_mq_queue_reinit(struct request_queue *q) 1458static void blk_mq_queue_reinit(struct request_queue *q)
@@ -1495,11 +1496,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
1495 1496
1496static int __init blk_mq_init(void) 1497static int __init blk_mq_init(void)
1497{ 1498{
1498 unsigned int i;
1499
1500 for_each_possible_cpu(i)
1501 init_llist_head(&per_cpu(ipi_lists, i));
1502
1503 blk_mq_cpu_init(); 1499 blk_mq_cpu_init();
1504 1500
1505 /* Must be called after percpu_counter_hotcpu_callback() */ 1501 /* Must be called after percpu_counter_hotcpu_callback() */
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 52bf1f96a2c2..ed0035cd458e 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -22,11 +22,13 @@ struct blk_mq_ctx {
22 struct kobject kobj; 22 struct kobject kobj;
23}; 23};
24 24
25void __blk_mq_end_io(struct request *rq, int error); 25void __blk_mq_complete_request(struct request *rq);
26void blk_mq_complete_request(struct request *rq, int error);
27void blk_mq_run_request(struct request *rq, bool run_queue, bool async); 26void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
28void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 27void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
29void blk_mq_init_flush(struct request_queue *q); 28void blk_mq_init_flush(struct request_queue *q);
29void blk_mq_drain_queue(struct request_queue *q);
30void blk_mq_free_queue(struct request_queue *q);
31void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq);
30 32
31/* 33/*
32 * CPU hotplug helpers 34 * CPU hotplug helpers
@@ -38,7 +40,6 @@ void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
38void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); 40void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
39void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); 41void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
40void blk_mq_cpu_init(void); 42void blk_mq_cpu_init(void);
41DECLARE_PER_CPU(struct llist_head, ipi_lists);
42 43
43/* 44/*
44 * CPU -> queue mappings 45 * CPU -> queue mappings
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 05e826793e4e..5d21239bc859 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -592,6 +592,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
592 ret = -1; 592 ret = -1;
593 } 593 }
594 594
595 t->raid_partial_stripes_expensive =
596 max(t->raid_partial_stripes_expensive,
597 b->raid_partial_stripes_expensive);
598
595 /* Find lowest common alignment_offset */ 599 /* Find lowest common alignment_offset */
596 t->alignment_offset = lcm(t->alignment_offset, alignment) 600 t->alignment_offset = lcm(t->alignment_offset, alignment)
597 & (max(t->physical_block_size, t->io_min) - 1); 601 & (max(t->physical_block_size, t->io_min) - 1);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 97779522472f..7500f876dae4 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -11,6 +11,7 @@
11 11
12#include "blk.h" 12#include "blk.h"
13#include "blk-cgroup.h" 13#include "blk-cgroup.h"
14#include "blk-mq.h"
14 15
15struct queue_sysfs_entry { 16struct queue_sysfs_entry {
16 struct attribute attr; 17 struct attribute attr;
@@ -548,6 +549,8 @@ static void blk_release_queue(struct kobject *kobj)
548 if (q->mq_ops) 549 if (q->mq_ops)
549 blk_mq_free_queue(q); 550 blk_mq_free_queue(q);
550 551
552 kfree(q->flush_rq);
553
551 blk_trace_shutdown(q); 554 blk_trace_shutdown(q);
552 555
553 bdi_destroy(&q->backing_dev_info); 556 bdi_destroy(&q->backing_dev_info);
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 06534049afba..1474c3ab7e72 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -877,14 +877,14 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
877 do_div(tmp, HZ); 877 do_div(tmp, HZ);
878 bytes_allowed = tmp; 878 bytes_allowed = tmp;
879 879
880 if (tg->bytes_disp[rw] + bio->bi_size <= bytes_allowed) { 880 if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) {
881 if (wait) 881 if (wait)
882 *wait = 0; 882 *wait = 0;
883 return 1; 883 return 1;
884 } 884 }
885 885
886 /* Calc approx time to dispatch */ 886 /* Calc approx time to dispatch */
887 extra_bytes = tg->bytes_disp[rw] + bio->bi_size - bytes_allowed; 887 extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
888 jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]); 888 jiffy_wait = div64_u64(extra_bytes * HZ, tg->bps[rw]);
889 889
890 if (!jiffy_wait) 890 if (!jiffy_wait)
@@ -987,7 +987,7 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
987 bool rw = bio_data_dir(bio); 987 bool rw = bio_data_dir(bio);
988 988
989 /* Charge the bio to the group */ 989 /* Charge the bio to the group */
990 tg->bytes_disp[rw] += bio->bi_size; 990 tg->bytes_disp[rw] += bio->bi_iter.bi_size;
991 tg->io_disp[rw]++; 991 tg->io_disp[rw]++;
992 992
993 /* 993 /*
@@ -1003,8 +1003,8 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
1003 */ 1003 */
1004 if (!(bio->bi_rw & REQ_THROTTLED)) { 1004 if (!(bio->bi_rw & REQ_THROTTLED)) {
1005 bio->bi_rw |= REQ_THROTTLED; 1005 bio->bi_rw |= REQ_THROTTLED;
1006 throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, 1006 throtl_update_dispatch_stats(tg_to_blkg(tg),
1007 bio->bi_rw); 1007 bio->bi_iter.bi_size, bio->bi_rw);
1008 } 1008 }
1009} 1009}
1010 1010
@@ -1303,13 +1303,10 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf,
1303 return __blkg_prfill_rwstat(sf, pd, &rwstat); 1303 return __blkg_prfill_rwstat(sf, pd, &rwstat);
1304} 1304}
1305 1305
1306static int tg_print_cpu_rwstat(struct cgroup_subsys_state *css, 1306static int tg_print_cpu_rwstat(struct seq_file *sf, void *v)
1307 struct cftype *cft, struct seq_file *sf)
1308{ 1307{
1309 struct blkcg *blkcg = css_to_blkcg(css); 1308 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_cpu_rwstat,
1310 1309 &blkcg_policy_throtl, seq_cft(sf)->private, true);
1311 blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl,
1312 cft->private, true);
1313 return 0; 1310 return 0;
1314} 1311}
1315 1312
@@ -1335,19 +1332,17 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd,
1335 return __blkg_prfill_u64(sf, pd, v); 1332 return __blkg_prfill_u64(sf, pd, v);
1336} 1333}
1337 1334
1338static int tg_print_conf_u64(struct cgroup_subsys_state *css, 1335static int tg_print_conf_u64(struct seq_file *sf, void *v)
1339 struct cftype *cft, struct seq_file *sf)
1340{ 1336{
1341 blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_u64, 1337 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_u64,
1342 &blkcg_policy_throtl, cft->private, false); 1338 &blkcg_policy_throtl, seq_cft(sf)->private, false);
1343 return 0; 1339 return 0;
1344} 1340}
1345 1341
1346static int tg_print_conf_uint(struct cgroup_subsys_state *css, 1342static int tg_print_conf_uint(struct seq_file *sf, void *v)
1347 struct cftype *cft, struct seq_file *sf)
1348{ 1343{
1349 blkcg_print_blkgs(sf, css_to_blkcg(css), tg_prfill_conf_uint, 1344 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), tg_prfill_conf_uint,
1350 &blkcg_policy_throtl, cft->private, false); 1345 &blkcg_policy_throtl, seq_cft(sf)->private, false);
1351 return 0; 1346 return 0;
1352} 1347}
1353 1348
@@ -1428,40 +1423,40 @@ static struct cftype throtl_files[] = {
1428 { 1423 {
1429 .name = "throttle.read_bps_device", 1424 .name = "throttle.read_bps_device",
1430 .private = offsetof(struct throtl_grp, bps[READ]), 1425 .private = offsetof(struct throtl_grp, bps[READ]),
1431 .read_seq_string = tg_print_conf_u64, 1426 .seq_show = tg_print_conf_u64,
1432 .write_string = tg_set_conf_u64, 1427 .write_string = tg_set_conf_u64,
1433 .max_write_len = 256, 1428 .max_write_len = 256,
1434 }, 1429 },
1435 { 1430 {
1436 .name = "throttle.write_bps_device", 1431 .name = "throttle.write_bps_device",
1437 .private = offsetof(struct throtl_grp, bps[WRITE]), 1432 .private = offsetof(struct throtl_grp, bps[WRITE]),
1438 .read_seq_string = tg_print_conf_u64, 1433 .seq_show = tg_print_conf_u64,
1439 .write_string = tg_set_conf_u64, 1434 .write_string = tg_set_conf_u64,
1440 .max_write_len = 256, 1435 .max_write_len = 256,
1441 }, 1436 },
1442 { 1437 {
1443 .name = "throttle.read_iops_device", 1438 .name = "throttle.read_iops_device",
1444 .private = offsetof(struct throtl_grp, iops[READ]), 1439 .private = offsetof(struct throtl_grp, iops[READ]),
1445 .read_seq_string = tg_print_conf_uint, 1440 .seq_show = tg_print_conf_uint,
1446 .write_string = tg_set_conf_uint, 1441 .write_string = tg_set_conf_uint,
1447 .max_write_len = 256, 1442 .max_write_len = 256,
1448 }, 1443 },
1449 { 1444 {
1450 .name = "throttle.write_iops_device", 1445 .name = "throttle.write_iops_device",
1451 .private = offsetof(struct throtl_grp, iops[WRITE]), 1446 .private = offsetof(struct throtl_grp, iops[WRITE]),
1452 .read_seq_string = tg_print_conf_uint, 1447 .seq_show = tg_print_conf_uint,
1453 .write_string = tg_set_conf_uint, 1448 .write_string = tg_set_conf_uint,
1454 .max_write_len = 256, 1449 .max_write_len = 256,
1455 }, 1450 },
1456 { 1451 {
1457 .name = "throttle.io_service_bytes", 1452 .name = "throttle.io_service_bytes",
1458 .private = offsetof(struct tg_stats_cpu, service_bytes), 1453 .private = offsetof(struct tg_stats_cpu, service_bytes),
1459 .read_seq_string = tg_print_cpu_rwstat, 1454 .seq_show = tg_print_cpu_rwstat,
1460 }, 1455 },
1461 { 1456 {
1462 .name = "throttle.io_serviced", 1457 .name = "throttle.io_serviced",
1463 .private = offsetof(struct tg_stats_cpu, serviced), 1458 .private = offsetof(struct tg_stats_cpu, serviced),
1464 .read_seq_string = tg_print_cpu_rwstat, 1459 .seq_show = tg_print_cpu_rwstat,
1465 }, 1460 },
1466 { } /* terminate */ 1461 { } /* terminate */
1467}; 1462};
@@ -1508,7 +1503,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
1508 if (tg) { 1503 if (tg) {
1509 if (!tg->has_rules[rw]) { 1504 if (!tg->has_rules[rw]) {
1510 throtl_update_dispatch_stats(tg_to_blkg(tg), 1505 throtl_update_dispatch_stats(tg_to_blkg(tg),
1511 bio->bi_size, bio->bi_rw); 1506 bio->bi_iter.bi_size, bio->bi_rw);
1512 goto out_unlock_rcu; 1507 goto out_unlock_rcu;
1513 } 1508 }
1514 } 1509 }
@@ -1564,7 +1559,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
1564 /* out-of-limit, queue to @tg */ 1559 /* out-of-limit, queue to @tg */
1565 throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d", 1560 throtl_log(sq, "[%c] bio. bdisp=%llu sz=%u bps=%llu iodisp=%u iops=%u queued=%d/%d",
1566 rw == READ ? 'R' : 'W', 1561 rw == READ ? 'R' : 'W',
1567 tg->bytes_disp[rw], bio->bi_size, tg->bps[rw], 1562 tg->bytes_disp[rw], bio->bi_iter.bi_size, tg->bps[rw],
1568 tg->io_disp[rw], tg->iops[rw], 1563 tg->io_disp[rw], tg->iops[rw],
1569 sq->nr_queued[READ], sq->nr_queued[WRITE]); 1564 sq->nr_queued[READ], sq->nr_queued[WRITE]);
1570 1565
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index bba81c9348e1..d96f7061c6fd 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req)
91 case BLK_EH_HANDLED: 91 case BLK_EH_HANDLED:
92 /* Can we use req->errors here? */ 92 /* Can we use req->errors here? */
93 if (q->mq_ops) 93 if (q->mq_ops)
94 blk_mq_complete_request(req, req->errors); 94 __blk_mq_complete_request(req);
95 else 95 else
96 __blk_complete_request(req); 96 __blk_complete_request(req);
97 break; 97 break;
diff --git a/block/blk.h b/block/blk.h
index c90e1d8f7a2b..d23b415b8a28 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -113,7 +113,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
113 q->flush_queue_delayed = 1; 113 q->flush_queue_delayed = 1;
114 return NULL; 114 return NULL;
115 } 115 }
116 if (unlikely(blk_queue_dying(q)) || 116 if (unlikely(blk_queue_bypass(q)) ||
117 !q->elevator->type->ops.elevator_dispatch_fn(q, 0)) 117 !q->elevator->type->ops.elevator_dispatch_fn(q, 0))
118 return NULL; 118 return NULL;
119 } 119 }
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 4d5cec1ad80d..744833b630c6 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1632,11 +1632,11 @@ static u64 cfqg_prfill_weight_device(struct seq_file *sf,
1632 return __blkg_prfill_u64(sf, pd, cfqg->dev_weight); 1632 return __blkg_prfill_u64(sf, pd, cfqg->dev_weight);
1633} 1633}
1634 1634
1635static int cfqg_print_weight_device(struct cgroup_subsys_state *css, 1635static int cfqg_print_weight_device(struct seq_file *sf, void *v)
1636 struct cftype *cft, struct seq_file *sf)
1637{ 1636{
1638 blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_weight_device, 1637 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1639 &blkcg_policy_cfq, 0, false); 1638 cfqg_prfill_weight_device, &blkcg_policy_cfq,
1639 0, false);
1640 return 0; 1640 return 0;
1641} 1641}
1642 1642
@@ -1650,26 +1650,23 @@ static u64 cfqg_prfill_leaf_weight_device(struct seq_file *sf,
1650 return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight); 1650 return __blkg_prfill_u64(sf, pd, cfqg->dev_leaf_weight);
1651} 1651}
1652 1652
1653static int cfqg_print_leaf_weight_device(struct cgroup_subsys_state *css, 1653static int cfqg_print_leaf_weight_device(struct seq_file *sf, void *v)
1654 struct cftype *cft,
1655 struct seq_file *sf)
1656{ 1654{
1657 blkcg_print_blkgs(sf, css_to_blkcg(css), cfqg_prfill_leaf_weight_device, 1655 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1658 &blkcg_policy_cfq, 0, false); 1656 cfqg_prfill_leaf_weight_device, &blkcg_policy_cfq,
1657 0, false);
1659 return 0; 1658 return 0;
1660} 1659}
1661 1660
1662static int cfq_print_weight(struct cgroup_subsys_state *css, struct cftype *cft, 1661static int cfq_print_weight(struct seq_file *sf, void *v)
1663 struct seq_file *sf)
1664{ 1662{
1665 seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_weight); 1663 seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_weight);
1666 return 0; 1664 return 0;
1667} 1665}
1668 1666
1669static int cfq_print_leaf_weight(struct cgroup_subsys_state *css, 1667static int cfq_print_leaf_weight(struct seq_file *sf, void *v)
1670 struct cftype *cft, struct seq_file *sf)
1671{ 1668{
1672 seq_printf(sf, "%u\n", css_to_blkcg(css)->cfq_leaf_weight); 1669 seq_printf(sf, "%u\n", css_to_blkcg(seq_css(sf))->cfq_leaf_weight);
1673 return 0; 1670 return 0;
1674} 1671}
1675 1672
@@ -1762,23 +1759,17 @@ static int cfq_set_leaf_weight(struct cgroup_subsys_state *css,
1762 return __cfq_set_weight(css, cft, val, true); 1759 return __cfq_set_weight(css, cft, val, true);
1763} 1760}
1764 1761
1765static int cfqg_print_stat(struct cgroup_subsys_state *css, struct cftype *cft, 1762static int cfqg_print_stat(struct seq_file *sf, void *v)
1766 struct seq_file *sf)
1767{ 1763{
1768 struct blkcg *blkcg = css_to_blkcg(css); 1764 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_stat,
1769 1765 &blkcg_policy_cfq, seq_cft(sf)->private, false);
1770 blkcg_print_blkgs(sf, blkcg, blkg_prfill_stat, &blkcg_policy_cfq,
1771 cft->private, false);
1772 return 0; 1766 return 0;
1773} 1767}
1774 1768
1775static int cfqg_print_rwstat(struct cgroup_subsys_state *css, 1769static int cfqg_print_rwstat(struct seq_file *sf, void *v)
1776 struct cftype *cft, struct seq_file *sf)
1777{ 1770{
1778 struct blkcg *blkcg = css_to_blkcg(css); 1771 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)), blkg_prfill_rwstat,
1779 1772 &blkcg_policy_cfq, seq_cft(sf)->private, true);
1780 blkcg_print_blkgs(sf, blkcg, blkg_prfill_rwstat, &blkcg_policy_cfq,
1781 cft->private, true);
1782 return 0; 1773 return 0;
1783} 1774}
1784 1775
@@ -1798,23 +1789,19 @@ static u64 cfqg_prfill_rwstat_recursive(struct seq_file *sf,
1798 return __blkg_prfill_rwstat(sf, pd, &sum); 1789 return __blkg_prfill_rwstat(sf, pd, &sum);
1799} 1790}
1800 1791
1801static int cfqg_print_stat_recursive(struct cgroup_subsys_state *css, 1792static int cfqg_print_stat_recursive(struct seq_file *sf, void *v)
1802 struct cftype *cft, struct seq_file *sf)
1803{ 1793{
1804 struct blkcg *blkcg = css_to_blkcg(css); 1794 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1805 1795 cfqg_prfill_stat_recursive, &blkcg_policy_cfq,
1806 blkcg_print_blkgs(sf, blkcg, cfqg_prfill_stat_recursive, 1796 seq_cft(sf)->private, false);
1807 &blkcg_policy_cfq, cft->private, false);
1808 return 0; 1797 return 0;
1809} 1798}
1810 1799
1811static int cfqg_print_rwstat_recursive(struct cgroup_subsys_state *css, 1800static int cfqg_print_rwstat_recursive(struct seq_file *sf, void *v)
1812 struct cftype *cft, struct seq_file *sf)
1813{ 1801{
1814 struct blkcg *blkcg = css_to_blkcg(css); 1802 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1815 1803 cfqg_prfill_rwstat_recursive, &blkcg_policy_cfq,
1816 blkcg_print_blkgs(sf, blkcg, cfqg_prfill_rwstat_recursive, 1804 seq_cft(sf)->private, true);
1817 &blkcg_policy_cfq, cft->private, true);
1818 return 0; 1805 return 0;
1819} 1806}
1820 1807
@@ -1835,13 +1822,11 @@ static u64 cfqg_prfill_avg_queue_size(struct seq_file *sf,
1835} 1822}
1836 1823
1837/* print avg_queue_size */ 1824/* print avg_queue_size */
1838static int cfqg_print_avg_queue_size(struct cgroup_subsys_state *css, 1825static int cfqg_print_avg_queue_size(struct seq_file *sf, void *v)
1839 struct cftype *cft, struct seq_file *sf)
1840{ 1826{
1841 struct blkcg *blkcg = css_to_blkcg(css); 1827 blkcg_print_blkgs(sf, css_to_blkcg(seq_css(sf)),
1842 1828 cfqg_prfill_avg_queue_size, &blkcg_policy_cfq,
1843 blkcg_print_blkgs(sf, blkcg, cfqg_prfill_avg_queue_size, 1829 0, false);
1844 &blkcg_policy_cfq, 0, false);
1845 return 0; 1830 return 0;
1846} 1831}
1847#endif /* CONFIG_DEBUG_BLK_CGROUP */ 1832#endif /* CONFIG_DEBUG_BLK_CGROUP */
@@ -1851,14 +1836,14 @@ static struct cftype cfq_blkcg_files[] = {
1851 { 1836 {
1852 .name = "weight_device", 1837 .name = "weight_device",
1853 .flags = CFTYPE_ONLY_ON_ROOT, 1838 .flags = CFTYPE_ONLY_ON_ROOT,
1854 .read_seq_string = cfqg_print_leaf_weight_device, 1839 .seq_show = cfqg_print_leaf_weight_device,
1855 .write_string = cfqg_set_leaf_weight_device, 1840 .write_string = cfqg_set_leaf_weight_device,
1856 .max_write_len = 256, 1841 .max_write_len = 256,
1857 }, 1842 },
1858 { 1843 {
1859 .name = "weight", 1844 .name = "weight",
1860 .flags = CFTYPE_ONLY_ON_ROOT, 1845 .flags = CFTYPE_ONLY_ON_ROOT,
1861 .read_seq_string = cfq_print_leaf_weight, 1846 .seq_show = cfq_print_leaf_weight,
1862 .write_u64 = cfq_set_leaf_weight, 1847 .write_u64 = cfq_set_leaf_weight,
1863 }, 1848 },
1864 1849
@@ -1866,26 +1851,26 @@ static struct cftype cfq_blkcg_files[] = {
1866 { 1851 {
1867 .name = "weight_device", 1852 .name = "weight_device",
1868 .flags = CFTYPE_NOT_ON_ROOT, 1853 .flags = CFTYPE_NOT_ON_ROOT,
1869 .read_seq_string = cfqg_print_weight_device, 1854 .seq_show = cfqg_print_weight_device,
1870 .write_string = cfqg_set_weight_device, 1855 .write_string = cfqg_set_weight_device,
1871 .max_write_len = 256, 1856 .max_write_len = 256,
1872 }, 1857 },
1873 { 1858 {
1874 .name = "weight", 1859 .name = "weight",
1875 .flags = CFTYPE_NOT_ON_ROOT, 1860 .flags = CFTYPE_NOT_ON_ROOT,
1876 .read_seq_string = cfq_print_weight, 1861 .seq_show = cfq_print_weight,
1877 .write_u64 = cfq_set_weight, 1862 .write_u64 = cfq_set_weight,
1878 }, 1863 },
1879 1864
1880 { 1865 {
1881 .name = "leaf_weight_device", 1866 .name = "leaf_weight_device",
1882 .read_seq_string = cfqg_print_leaf_weight_device, 1867 .seq_show = cfqg_print_leaf_weight_device,
1883 .write_string = cfqg_set_leaf_weight_device, 1868 .write_string = cfqg_set_leaf_weight_device,
1884 .max_write_len = 256, 1869 .max_write_len = 256,
1885 }, 1870 },
1886 { 1871 {
1887 .name = "leaf_weight", 1872 .name = "leaf_weight",
1888 .read_seq_string = cfq_print_leaf_weight, 1873 .seq_show = cfq_print_leaf_weight,
1889 .write_u64 = cfq_set_leaf_weight, 1874 .write_u64 = cfq_set_leaf_weight,
1890 }, 1875 },
1891 1876
@@ -1893,114 +1878,114 @@ static struct cftype cfq_blkcg_files[] = {
1893 { 1878 {
1894 .name = "time", 1879 .name = "time",
1895 .private = offsetof(struct cfq_group, stats.time), 1880 .private = offsetof(struct cfq_group, stats.time),
1896 .read_seq_string = cfqg_print_stat, 1881 .seq_show = cfqg_print_stat,
1897 }, 1882 },
1898 { 1883 {
1899 .name = "sectors", 1884 .name = "sectors",
1900 .private = offsetof(struct cfq_group, stats.sectors), 1885 .private = offsetof(struct cfq_group, stats.sectors),
1901 .read_seq_string = cfqg_print_stat, 1886 .seq_show = cfqg_print_stat,
1902 }, 1887 },
1903 { 1888 {
1904 .name = "io_service_bytes", 1889 .name = "io_service_bytes",
1905 .private = offsetof(struct cfq_group, stats.service_bytes), 1890 .private = offsetof(struct cfq_group, stats.service_bytes),
1906 .read_seq_string = cfqg_print_rwstat, 1891 .seq_show = cfqg_print_rwstat,
1907 }, 1892 },
1908 { 1893 {
1909 .name = "io_serviced", 1894 .name = "io_serviced",
1910 .private = offsetof(struct cfq_group, stats.serviced), 1895 .private = offsetof(struct cfq_group, stats.serviced),
1911 .read_seq_string = cfqg_print_rwstat, 1896 .seq_show = cfqg_print_rwstat,
1912 }, 1897 },
1913 { 1898 {
1914 .name = "io_service_time", 1899 .name = "io_service_time",
1915 .private = offsetof(struct cfq_group, stats.service_time), 1900 .private = offsetof(struct cfq_group, stats.service_time),
1916 .read_seq_string = cfqg_print_rwstat, 1901 .seq_show = cfqg_print_rwstat,
1917 }, 1902 },
1918 { 1903 {
1919 .name = "io_wait_time", 1904 .name = "io_wait_time",
1920 .private = offsetof(struct cfq_group, stats.wait_time), 1905 .private = offsetof(struct cfq_group, stats.wait_time),
1921 .read_seq_string = cfqg_print_rwstat, 1906 .seq_show = cfqg_print_rwstat,
1922 }, 1907 },
1923 { 1908 {
1924 .name = "io_merged", 1909 .name = "io_merged",
1925 .private = offsetof(struct cfq_group, stats.merged), 1910 .private = offsetof(struct cfq_group, stats.merged),
1926 .read_seq_string = cfqg_print_rwstat, 1911 .seq_show = cfqg_print_rwstat,
1927 }, 1912 },
1928 { 1913 {
1929 .name = "io_queued", 1914 .name = "io_queued",
1930 .private = offsetof(struct cfq_group, stats.queued), 1915 .private = offsetof(struct cfq_group, stats.queued),
1931 .read_seq_string = cfqg_print_rwstat, 1916 .seq_show = cfqg_print_rwstat,
1932 }, 1917 },
1933 1918
1934 /* the same statictics which cover the cfqg and its descendants */ 1919 /* the same statictics which cover the cfqg and its descendants */
1935 { 1920 {
1936 .name = "time_recursive", 1921 .name = "time_recursive",
1937 .private = offsetof(struct cfq_group, stats.time), 1922 .private = offsetof(struct cfq_group, stats.time),
1938 .read_seq_string = cfqg_print_stat_recursive, 1923 .seq_show = cfqg_print_stat_recursive,
1939 }, 1924 },
1940 { 1925 {
1941 .name = "sectors_recursive", 1926 .name = "sectors_recursive",
1942 .private = offsetof(struct cfq_group, stats.sectors), 1927 .private = offsetof(struct cfq_group, stats.sectors),
1943 .read_seq_string = cfqg_print_stat_recursive, 1928 .seq_show = cfqg_print_stat_recursive,
1944 }, 1929 },
1945 { 1930 {
1946 .name = "io_service_bytes_recursive", 1931 .name = "io_service_bytes_recursive",
1947 .private = offsetof(struct cfq_group, stats.service_bytes), 1932 .private = offsetof(struct cfq_group, stats.service_bytes),
1948 .read_seq_string = cfqg_print_rwstat_recursive, 1933 .seq_show = cfqg_print_rwstat_recursive,
1949 }, 1934 },
1950 { 1935 {
1951 .name = "io_serviced_recursive", 1936 .name = "io_serviced_recursive",
1952 .private = offsetof(struct cfq_group, stats.serviced), 1937 .private = offsetof(struct cfq_group, stats.serviced),
1953 .read_seq_string = cfqg_print_rwstat_recursive, 1938 .seq_show = cfqg_print_rwstat_recursive,
1954 }, 1939 },
1955 { 1940 {
1956 .name = "io_service_time_recursive", 1941 .name = "io_service_time_recursive",
1957 .private = offsetof(struct cfq_group, stats.service_time), 1942 .private = offsetof(struct cfq_group, stats.service_time),
1958 .read_seq_string = cfqg_print_rwstat_recursive, 1943 .seq_show = cfqg_print_rwstat_recursive,
1959 }, 1944 },
1960 { 1945 {
1961 .name = "io_wait_time_recursive", 1946 .name = "io_wait_time_recursive",
1962 .private = offsetof(struct cfq_group, stats.wait_time), 1947 .private = offsetof(struct cfq_group, stats.wait_time),
1963 .read_seq_string = cfqg_print_rwstat_recursive, 1948 .seq_show = cfqg_print_rwstat_recursive,
1964 }, 1949 },
1965 { 1950 {
1966 .name = "io_merged_recursive", 1951 .name = "io_merged_recursive",
1967 .private = offsetof(struct cfq_group, stats.merged), 1952 .private = offsetof(struct cfq_group, stats.merged),
1968 .read_seq_string = cfqg_print_rwstat_recursive, 1953 .seq_show = cfqg_print_rwstat_recursive,
1969 }, 1954 },
1970 { 1955 {
1971 .name = "io_queued_recursive", 1956 .name = "io_queued_recursive",
1972 .private = offsetof(struct cfq_group, stats.queued), 1957 .private = offsetof(struct cfq_group, stats.queued),
1973 .read_seq_string = cfqg_print_rwstat_recursive, 1958 .seq_show = cfqg_print_rwstat_recursive,
1974 }, 1959 },
1975#ifdef CONFIG_DEBUG_BLK_CGROUP 1960#ifdef CONFIG_DEBUG_BLK_CGROUP
1976 { 1961 {
1977 .name = "avg_queue_size", 1962 .name = "avg_queue_size",
1978 .read_seq_string = cfqg_print_avg_queue_size, 1963 .seq_show = cfqg_print_avg_queue_size,
1979 }, 1964 },
1980 { 1965 {
1981 .name = "group_wait_time", 1966 .name = "group_wait_time",
1982 .private = offsetof(struct cfq_group, stats.group_wait_time), 1967 .private = offsetof(struct cfq_group, stats.group_wait_time),
1983 .read_seq_string = cfqg_print_stat, 1968 .seq_show = cfqg_print_stat,
1984 }, 1969 },
1985 { 1970 {
1986 .name = "idle_time", 1971 .name = "idle_time",
1987 .private = offsetof(struct cfq_group, stats.idle_time), 1972 .private = offsetof(struct cfq_group, stats.idle_time),
1988 .read_seq_string = cfqg_print_stat, 1973 .seq_show = cfqg_print_stat,
1989 }, 1974 },
1990 { 1975 {
1991 .name = "empty_time", 1976 .name = "empty_time",
1992 .private = offsetof(struct cfq_group, stats.empty_time), 1977 .private = offsetof(struct cfq_group, stats.empty_time),
1993 .read_seq_string = cfqg_print_stat, 1978 .seq_show = cfqg_print_stat,
1994 }, 1979 },
1995 { 1980 {
1996 .name = "dequeue", 1981 .name = "dequeue",
1997 .private = offsetof(struct cfq_group, stats.dequeue), 1982 .private = offsetof(struct cfq_group, stats.dequeue),
1998 .read_seq_string = cfqg_print_stat, 1983 .seq_show = cfqg_print_stat,
1999 }, 1984 },
2000 { 1985 {
2001 .name = "unaccounted_time", 1986 .name = "unaccounted_time",
2002 .private = offsetof(struct cfq_group, stats.unaccounted_time), 1987 .private = offsetof(struct cfq_group, stats.unaccounted_time),
2003 .read_seq_string = cfqg_print_stat, 1988 .seq_show = cfqg_print_stat,
2004 }, 1989 },
2005#endif /* CONFIG_DEBUG_BLK_CGROUP */ 1990#endif /* CONFIG_DEBUG_BLK_CGROUP */
2006 { } /* terminate */ 1991 { } /* terminate */
diff --git a/block/cmdline-parser.c b/block/cmdline-parser.c
index cc2637f8674e..9dbc67e42a99 100644
--- a/block/cmdline-parser.c
+++ b/block/cmdline-parser.c
@@ -4,8 +4,7 @@
4 * Written by Cai Zhiyong <caizhiyong@huawei.com> 4 * Written by Cai Zhiyong <caizhiyong@huawei.com>
5 * 5 *
6 */ 6 */
7#include <linux/buffer_head.h> 7#include <linux/export.h>
8#include <linux/module.h>
9#include <linux/cmdline-parser.h> 8#include <linux/cmdline-parser.h>
10 9
11static int parse_subpart(struct cmdline_subpart **subpart, char *partdef) 10static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
@@ -159,6 +158,7 @@ void cmdline_parts_free(struct cmdline_parts **parts)
159 *parts = next_parts; 158 *parts = next_parts;
160 } 159 }
161} 160}
161EXPORT_SYMBOL(cmdline_parts_free);
162 162
163int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline) 163int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline)
164{ 164{
@@ -206,6 +206,7 @@ fail:
206 cmdline_parts_free(parts); 206 cmdline_parts_free(parts);
207 goto done; 207 goto done;
208} 208}
209EXPORT_SYMBOL(cmdline_parts_parse);
209 210
210struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, 211struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
211 const char *bdev) 212 const char *bdev)
@@ -214,17 +215,17 @@ struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
214 parts = parts->next_parts; 215 parts = parts->next_parts;
215 return parts; 216 return parts;
216} 217}
218EXPORT_SYMBOL(cmdline_parts_find);
217 219
218/* 220/*
219 * add_part() 221 * add_part()
220 * 0 success. 222 * 0 success.
221 * 1 can not add so many partitions. 223 * 1 can not add so many partitions.
222 */ 224 */
223void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, 225int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
224 int slot, 226 int slot,
225 int (*add_part)(int, struct cmdline_subpart *, void *), 227 int (*add_part)(int, struct cmdline_subpart *, void *),
226 void *param) 228 void *param)
227
228{ 229{
229 sector_t from = 0; 230 sector_t from = 0;
230 struct cmdline_subpart *subpart; 231 struct cmdline_subpart *subpart;
@@ -247,4 +248,7 @@ void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
247 if (add_part(slot, subpart, param)) 248 if (add_part(slot, subpart, param))
248 break; 249 break;
249 } 250 }
251
252 return slot;
250} 253}
254EXPORT_SYMBOL(cmdline_parts_set);
diff --git a/block/elevator.c b/block/elevator.c
index b7ff2861b6bd..42c45a7d6714 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -440,7 +440,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
440 /* 440 /*
441 * See if our hash lookup can find a potential backmerge. 441 * See if our hash lookup can find a potential backmerge.
442 */ 442 */
443 __rq = elv_rqhash_find(q, bio->bi_sector); 443 __rq = elv_rqhash_find(q, bio->bi_iter.bi_sector);
444 if (__rq && elv_rq_merge_ok(__rq, bio)) { 444 if (__rq && elv_rq_merge_ok(__rq, bio)) {
445 *req = __rq; 445 *req = __rq;
446 return ELEVATOR_BACK_MERGE; 446 return ELEVATOR_BACK_MERGE;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 625e3e471d65..26487972ac54 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -323,12 +323,14 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
323 323
324 if (hdr->iovec_count) { 324 if (hdr->iovec_count) {
325 size_t iov_data_len; 325 size_t iov_data_len;
326 struct iovec *iov; 326 struct iovec *iov = NULL;
327 327
328 ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count, 328 ret = rw_copy_check_uvector(-1, hdr->dxferp, hdr->iovec_count,
329 0, NULL, &iov); 329 0, NULL, &iov);
330 if (ret < 0) 330 if (ret < 0) {
331 kfree(iov);
331 goto out; 332 goto out;
333 }
332 334
333 iov_data_len = ret; 335 iov_data_len = ret;
334 ret = 0; 336 ret = 0;