aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-12-13 17:14:23 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-13 17:14:23 -0500
commitcaf292ae5bb9d57198ce001d8b762f7abae3a94d (patch)
tree5fd5d6d971503818ab2824407134cf36a80c53d0
parent8f4385d590d4296ec38e228d17b1d002f6031dd2 (diff)
parentfcbf6a087a7e4d3f03d28333678a1010810a53c3 (diff)
Merge branch 'for-3.19/core' of git://git.kernel.dk/linux-block
Pull block driver core update from Jens Axboe: "This is the pull request for the core block IO changes for 3.19. Not a huge round this time, mostly lots of little good fixes: - Fix a bug in sysfs blktrace interface causing a NULL pointer dereference, when enabled/disabled through that API. From Arianna Avanzini. - Various updates/fixes/improvements for blk-mq: - A set of updates from Bart, mostly fixing buts in the tag handling. - Cleanup/code consolidation from Christoph. - Extend queue_rq API to be able to handle batching issues of IO requests. NVMe will utilize this shortly. From me. - A few tag and request handling updates from me. - Cleanup of the preempt handling for running queues from Paolo. - Prevent running of unmapped hardware queues from Ming Lei. - Move the kdump memory limiting check to be in the correct location, from Shaohua. - Initialize all software queues at init time from Takashi. This prevents a kobject warning when CPUs are brought online that weren't online when a queue was registered. - Single writeback fix for I_DIRTY clearing from Tejun. Queued with the core IO changes, since it's just a single fix. - Version X of the __bio_add_page() segment addition retry from Maurizio. Hope the Xth time is the charm. - Documentation fixup for IO scheduler merging from Jan. - Introduce (and use) generic IO stat accounting helpers for non-rq drivers, from Gu Zheng. - Kill off artificial limiting of max sectors in a request from Christoph" * 'for-3.19/core' of git://git.kernel.dk/linux-block: (26 commits) bio: modify __bio_add_page() to accept pages that don't start a new segment blk-mq: Fix uninitialized kobject at CPU hotplugging blktrace: don't let the sysfs interface remove trace from running list blk-mq: Use all available hardware queues blk-mq: Micro-optimize bt_get() blk-mq: Fix a race between bt_clear_tag() and bt_get() blk-mq: Avoid that __bt_get_word() wraps multiple times blk-mq: Fix a use-after-free blk-mq: prevent unmapped hw queue from being scheduled blk-mq: re-check for available tags after running the hardware queue blk-mq: fix hang in bt_get() blk-mq: move the kdump check to blk_mq_alloc_tag_set blk-mq: cleanup tag free handling blk-mq: use 'nr_cpu_ids' as highest CPU ID count for hwq <-> cpu map blk: introduce generic io stat accounting help function blk-mq: handle the single queue case in blk_mq_hctx_next_cpu genhd: check for int overflow in disk_expand_part_tbl() blk-mq: add blk_mq_free_hctx_request() blk-mq: export blk_mq_free_request() blk-mq: use get_cpu/put_cpu instead of preempt_disable/preempt_enable ...
-rw-r--r--Documentation/block/biodoc.txt6
-rw-r--r--block/bio.c82
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-mq-cpumap.c4
-rw-r--r--block/blk-mq-sysfs.c9
-rw-r--r--block/blk-mq-tag.c60
-rw-r--r--block/blk-mq.c126
-rw-r--r--block/blk-mq.h5
-rw-r--r--block/blk-settings.c4
-rw-r--r--block/blk-sysfs.c12
-rw-r--r--block/genhd.c11
-rw-r--r--drivers/block/aoe/aoeblk.c2
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c5
-rw-r--r--drivers/block/null_blk.c10
-rw-r--r--drivers/block/virtio_blk.c7
-rw-r--r--drivers/scsi/scsi_lib.c5
-rw-r--r--fs/fs-writeback.c29
-rw-r--r--include/linux/bio.h5
-rw-r--r--include/linux/blk-mq.h10
-rw-r--r--include/linux/blkdev.h1
-rw-r--r--kernel/trace/blktrace.c3
21 files changed, 254 insertions, 145 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6b972b287795..5aabc08de811 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -942,7 +942,11 @@ elevator_allow_merge_fn called whenever the block layer determines
942 request safely. The io scheduler may still 942 request safely. The io scheduler may still
943 want to stop a merge at this point if it 943 want to stop a merge at this point if it
944 results in some sort of conflict internally, 944 results in some sort of conflict internally,
945 this hook allows it to do that. 945 this hook allows it to do that. Note however
946 that two *requests* can still be merged at later
947 time. Currently the io scheduler has no way to
948 prevent that. It can only learn about the fact
949 from elevator_merge_req_fn callback.
946 950
947elevator_dispatch_fn* fills the dispatch queue with ready requests. 951elevator_dispatch_fn* fills the dispatch queue with ready requests.
948 I/O schedulers are free to postpone requests by 952 I/O schedulers are free to postpone requests by
diff --git a/block/bio.c b/block/bio.c
index 3e6e1986a5b2..471d7382c7d1 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -748,6 +748,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
748 } 748 }
749 } 749 }
750 750
751 bio->bi_iter.bi_size += len;
751 goto done; 752 goto done;
752 } 753 }
753 754
@@ -764,29 +765,32 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
764 return 0; 765 return 0;
765 766
766 /* 767 /*
767 * we might lose a segment or two here, but rather that than 768 * setup the new entry, we might clear it again later if we
768 * make this too complex. 769 * cannot add the page
770 */
771 bvec = &bio->bi_io_vec[bio->bi_vcnt];
772 bvec->bv_page = page;
773 bvec->bv_len = len;
774 bvec->bv_offset = offset;
775 bio->bi_vcnt++;
776 bio->bi_phys_segments++;
777 bio->bi_iter.bi_size += len;
778
779 /*
780 * Perform a recount if the number of segments is greater
781 * than queue_max_segments(q).
769 */ 782 */
770 783
771 while (bio->bi_phys_segments >= queue_max_segments(q)) { 784 while (bio->bi_phys_segments > queue_max_segments(q)) {
772 785
773 if (retried_segments) 786 if (retried_segments)
774 return 0; 787 goto failed;
775 788
776 retried_segments = 1; 789 retried_segments = 1;
777 blk_recount_segments(q, bio); 790 blk_recount_segments(q, bio);
778 } 791 }
779 792
780 /* 793 /*
781 * setup the new entry, we might clear it again later if we
782 * cannot add the page
783 */
784 bvec = &bio->bi_io_vec[bio->bi_vcnt];
785 bvec->bv_page = page;
786 bvec->bv_len = len;
787 bvec->bv_offset = offset;
788
789 /*
790 * if queue has other restrictions (eg varying max sector size 794 * if queue has other restrictions (eg varying max sector size
791 * depending on offset), it can specify a merge_bvec_fn in the 795 * depending on offset), it can specify a merge_bvec_fn in the
792 * queue to get further control 796 * queue to get further control
@@ -795,7 +799,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
795 struct bvec_merge_data bvm = { 799 struct bvec_merge_data bvm = {
796 .bi_bdev = bio->bi_bdev, 800 .bi_bdev = bio->bi_bdev,
797 .bi_sector = bio->bi_iter.bi_sector, 801 .bi_sector = bio->bi_iter.bi_sector,
798 .bi_size = bio->bi_iter.bi_size, 802 .bi_size = bio->bi_iter.bi_size - len,
799 .bi_rw = bio->bi_rw, 803 .bi_rw = bio->bi_rw,
800 }; 804 };
801 805
@@ -803,23 +807,25 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
803 * merge_bvec_fn() returns number of bytes it can accept 807 * merge_bvec_fn() returns number of bytes it can accept
804 * at this offset 808 * at this offset
805 */ 809 */
806 if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) { 810 if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
807 bvec->bv_page = NULL; 811 goto failed;
808 bvec->bv_len = 0;
809 bvec->bv_offset = 0;
810 return 0;
811 }
812 } 812 }
813 813
814 /* If we may be able to merge these biovecs, force a recount */ 814 /* If we may be able to merge these biovecs, force a recount */
815 if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) 815 if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
816 bio->bi_flags &= ~(1 << BIO_SEG_VALID); 816 bio->bi_flags &= ~(1 << BIO_SEG_VALID);
817 817
818 bio->bi_vcnt++;
819 bio->bi_phys_segments++;
820 done: 818 done:
821 bio->bi_iter.bi_size += len;
822 return len; 819 return len;
820
821 failed:
822 bvec->bv_page = NULL;
823 bvec->bv_len = 0;
824 bvec->bv_offset = 0;
825 bio->bi_vcnt--;
826 bio->bi_iter.bi_size -= len;
827 blk_recount_segments(q, bio);
828 return 0;
823} 829}
824 830
825/** 831/**
@@ -1739,6 +1745,34 @@ void bio_check_pages_dirty(struct bio *bio)
1739 } 1745 }
1740} 1746}
1741 1747
1748void generic_start_io_acct(int rw, unsigned long sectors,
1749 struct hd_struct *part)
1750{
1751 int cpu = part_stat_lock();
1752
1753 part_round_stats(cpu, part);
1754 part_stat_inc(cpu, part, ios[rw]);
1755 part_stat_add(cpu, part, sectors[rw], sectors);
1756 part_inc_in_flight(part, rw);
1757
1758 part_stat_unlock();
1759}
1760EXPORT_SYMBOL(generic_start_io_acct);
1761
1762void generic_end_io_acct(int rw, struct hd_struct *part,
1763 unsigned long start_time)
1764{
1765 unsigned long duration = jiffies - start_time;
1766 int cpu = part_stat_lock();
1767
1768 part_stat_add(cpu, part, ticks[rw], duration);
1769 part_round_stats(cpu, part);
1770 part_dec_in_flight(part, rw);
1771
1772 part_stat_unlock();
1773}
1774EXPORT_SYMBOL(generic_end_io_acct);
1775
1742#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1776#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
1743void bio_flush_dcache_pages(struct bio *bi) 1777void bio_flush_dcache_pages(struct bio *bi)
1744{ 1778{
diff --git a/block/blk-core.c b/block/blk-core.c
index ea1c4d0d7a44..30f6153a40c2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -525,6 +525,9 @@ void blk_cleanup_queue(struct request_queue *q)
525 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); 525 del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
526 blk_sync_queue(q); 526 blk_sync_queue(q);
527 527
528 if (q->mq_ops)
529 blk_mq_free_queue(q);
530
528 spin_lock_irq(lock); 531 spin_lock_irq(lock);
529 if (q->queue_lock != &q->__queue_lock) 532 if (q->queue_lock != &q->__queue_lock)
530 q->queue_lock = &q->__queue_lock; 533 q->queue_lock = &q->__queue_lock;
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 1065d7c65fa1..5f13f4d0bcce 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -17,7 +17,7 @@
17static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues, 17static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
18 const int cpu) 18 const int cpu)
19{ 19{
20 return cpu / ((nr_cpus + nr_queues - 1) / nr_queues); 20 return cpu * nr_queues / nr_cpus;
21} 21}
22 22
23static int get_first_sibling(unsigned int cpu) 23static int get_first_sibling(unsigned int cpu)
@@ -90,7 +90,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
90 unsigned int *map; 90 unsigned int *map;
91 91
92 /* If cpus are offline, map them to first hctx */ 92 /* If cpus are offline, map them to first hctx */
93 map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, 93 map = kzalloc_node(sizeof(*map) * nr_cpu_ids, GFP_KERNEL,
94 set->numa_node); 94 set->numa_node);
95 if (!map) 95 if (!map)
96 return NULL; 96 return NULL;
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 371d8800b48a..1630a20d5dcf 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -390,16 +390,15 @@ static void blk_mq_sysfs_init(struct request_queue *q)
390{ 390{
391 struct blk_mq_hw_ctx *hctx; 391 struct blk_mq_hw_ctx *hctx;
392 struct blk_mq_ctx *ctx; 392 struct blk_mq_ctx *ctx;
393 int i, j; 393 int i;
394 394
395 kobject_init(&q->mq_kobj, &blk_mq_ktype); 395 kobject_init(&q->mq_kobj, &blk_mq_ktype);
396 396
397 queue_for_each_hw_ctx(q, hctx, i) { 397 queue_for_each_hw_ctx(q, hctx, i)
398 kobject_init(&hctx->kobj, &blk_mq_hw_ktype); 398 kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
399 399
400 hctx_for_each_ctx(hctx, ctx, j) 400 queue_for_each_ctx(q, ctx, i)
401 kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); 401 kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
402 }
403} 402}
404 403
405/* see blk_register_queue() */ 404/* see blk_register_queue() */
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 728b9a4d5f56..e3d4e4043b49 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -137,6 +137,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
137static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) 137static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
138{ 138{
139 int tag, org_last_tag, end; 139 int tag, org_last_tag, end;
140 bool wrap = last_tag != 0;
140 141
141 org_last_tag = last_tag; 142 org_last_tag = last_tag;
142 end = bm->depth; 143 end = bm->depth;
@@ -148,15 +149,16 @@ restart:
148 * We started with an offset, start from 0 to 149 * We started with an offset, start from 0 to
149 * exhaust the map. 150 * exhaust the map.
150 */ 151 */
151 if (org_last_tag && last_tag) { 152 if (wrap) {
152 end = last_tag; 153 wrap = false;
154 end = org_last_tag;
153 last_tag = 0; 155 last_tag = 0;
154 goto restart; 156 goto restart;
155 } 157 }
156 return -1; 158 return -1;
157 } 159 }
158 last_tag = tag + 1; 160 last_tag = tag + 1;
159 } while (test_and_set_bit_lock(tag, &bm->word)); 161 } while (test_and_set_bit(tag, &bm->word));
160 162
161 return tag; 163 return tag;
162} 164}
@@ -246,14 +248,29 @@ static int bt_get(struct blk_mq_alloc_data *data,
246 if (!(data->gfp & __GFP_WAIT)) 248 if (!(data->gfp & __GFP_WAIT))
247 return -1; 249 return -1;
248 250
249 bs = bt_wait_ptr(bt, hctx);
250 do { 251 do {
252 bs = bt_wait_ptr(bt, hctx);
251 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); 253 prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE);
252 254
253 tag = __bt_get(hctx, bt, last_tag); 255 tag = __bt_get(hctx, bt, last_tag);
254 if (tag != -1) 256 if (tag != -1)
255 break; 257 break;
256 258
259 /*
260 * We're out of tags on this hardware queue, kick any
261 * pending IO submits before going to sleep waiting for
262 * some to complete.
263 */
264 blk_mq_run_hw_queue(hctx, false);
265
266 /*
267 * Retry tag allocation after running the hardware queue,
268 * as running the queue may also have found completions.
269 */
270 tag = __bt_get(hctx, bt, last_tag);
271 if (tag != -1)
272 break;
273
257 blk_mq_put_ctx(data->ctx); 274 blk_mq_put_ctx(data->ctx);
258 275
259 io_schedule(); 276 io_schedule();
@@ -268,8 +285,6 @@ static int bt_get(struct blk_mq_alloc_data *data,
268 hctx = data->hctx; 285 hctx = data->hctx;
269 bt = &hctx->tags->bitmap_tags; 286 bt = &hctx->tags->bitmap_tags;
270 } 287 }
271 finish_wait(&bs->wait, &wait);
272 bs = bt_wait_ptr(bt, hctx);
273 } while (1); 288 } while (1);
274 289
275 finish_wait(&bs->wait, &wait); 290 finish_wait(&bs->wait, &wait);
@@ -340,11 +355,10 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
340 struct bt_wait_state *bs; 355 struct bt_wait_state *bs;
341 int wait_cnt; 356 int wait_cnt;
342 357
343 /* 358 clear_bit(TAG_TO_BIT(bt, tag), &bt->map[index].word);
344 * The unlock memory barrier need to order access to req in free 359
345 * path and clearing tag bit 360 /* Ensure that the wait list checks occur after clear_bit(). */
346 */ 361 smp_mb();
347 clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word);
348 362
349 bs = bt_wake_ptr(bt); 363 bs = bt_wake_ptr(bt);
350 if (!bs) 364 if (!bs)
@@ -360,21 +374,6 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
360 } 374 }
361} 375}
362 376
363static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag)
364{
365 BUG_ON(tag >= tags->nr_tags);
366
367 bt_clear_tag(&tags->bitmap_tags, tag);
368}
369
370static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
371 unsigned int tag)
372{
373 BUG_ON(tag >= tags->nr_reserved_tags);
374
375 bt_clear_tag(&tags->breserved_tags, tag);
376}
377
378void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, 377void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
379 unsigned int *last_tag) 378 unsigned int *last_tag)
380{ 379{
@@ -383,10 +382,13 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
383 if (tag >= tags->nr_reserved_tags) { 382 if (tag >= tags->nr_reserved_tags) {
384 const int real_tag = tag - tags->nr_reserved_tags; 383 const int real_tag = tag - tags->nr_reserved_tags;
385 384
386 __blk_mq_put_tag(tags, real_tag); 385 BUG_ON(real_tag >= tags->nr_tags);
386 bt_clear_tag(&tags->bitmap_tags, real_tag);
387 *last_tag = real_tag; 387 *last_tag = real_tag;
388 } else 388 } else {
389 __blk_mq_put_reserved_tag(tags, tag); 389 BUG_ON(tag >= tags->nr_reserved_tags);
390 bt_clear_tag(&tags->breserved_tags, tag);
391 }
390} 392}
391 393
392static void bt_for_each(struct blk_mq_hw_ctx *hctx, 394static void bt_for_each(struct blk_mq_hw_ctx *hctx,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 92ceef0d2ab9..da1ab5641227 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -279,17 +279,25 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
279 blk_mq_queue_exit(q); 279 blk_mq_queue_exit(q);
280} 280}
281 281
282void blk_mq_free_request(struct request *rq) 282void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
283{ 283{
284 struct blk_mq_ctx *ctx = rq->mq_ctx; 284 struct blk_mq_ctx *ctx = rq->mq_ctx;
285 struct blk_mq_hw_ctx *hctx;
286 struct request_queue *q = rq->q;
287 285
288 ctx->rq_completed[rq_is_sync(rq)]++; 286 ctx->rq_completed[rq_is_sync(rq)]++;
289
290 hctx = q->mq_ops->map_queue(q, ctx->cpu);
291 __blk_mq_free_request(hctx, ctx, rq); 287 __blk_mq_free_request(hctx, ctx, rq);
288
289}
290EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
291
292void blk_mq_free_request(struct request *rq)
293{
294 struct blk_mq_hw_ctx *hctx;
295 struct request_queue *q = rq->q;
296
297 hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
298 blk_mq_free_hctx_request(hctx, rq);
292} 299}
300EXPORT_SYMBOL_GPL(blk_mq_free_request);
293 301
294inline void __blk_mq_end_request(struct request *rq, int error) 302inline void __blk_mq_end_request(struct request *rq, int error)
295{ 303{
@@ -591,7 +599,7 @@ static void blk_mq_rq_timer(unsigned long priv)
591 * If not software queues are currently mapped to this 599 * If not software queues are currently mapped to this
592 * hardware queue, there's nothing to check 600 * hardware queue, there's nothing to check
593 */ 601 */
594 if (!hctx->nr_ctx || !hctx->tags) 602 if (!blk_mq_hw_queue_mapped(hctx))
595 continue; 603 continue;
596 604
597 blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data); 605 blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
@@ -690,6 +698,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
690 struct request_queue *q = hctx->queue; 698 struct request_queue *q = hctx->queue;
691 struct request *rq; 699 struct request *rq;
692 LIST_HEAD(rq_list); 700 LIST_HEAD(rq_list);
701 LIST_HEAD(driver_list);
702 struct list_head *dptr;
693 int queued; 703 int queued;
694 704
695 WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)); 705 WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
@@ -716,16 +726,27 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
716 } 726 }
717 727
718 /* 728 /*
729 * Start off with dptr being NULL, so we start the first request
730 * immediately, even if we have more pending.
731 */
732 dptr = NULL;
733
734 /*
719 * Now process all the entries, sending them to the driver. 735 * Now process all the entries, sending them to the driver.
720 */ 736 */
721 queued = 0; 737 queued = 0;
722 while (!list_empty(&rq_list)) { 738 while (!list_empty(&rq_list)) {
739 struct blk_mq_queue_data bd;
723 int ret; 740 int ret;
724 741
725 rq = list_first_entry(&rq_list, struct request, queuelist); 742 rq = list_first_entry(&rq_list, struct request, queuelist);
726 list_del_init(&rq->queuelist); 743 list_del_init(&rq->queuelist);
727 744
728 ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list)); 745 bd.rq = rq;
746 bd.list = dptr;
747 bd.last = list_empty(&rq_list);
748
749 ret = q->mq_ops->queue_rq(hctx, &bd);
729 switch (ret) { 750 switch (ret) {
730 case BLK_MQ_RQ_QUEUE_OK: 751 case BLK_MQ_RQ_QUEUE_OK:
731 queued++; 752 queued++;
@@ -744,6 +765,13 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
744 765
745 if (ret == BLK_MQ_RQ_QUEUE_BUSY) 766 if (ret == BLK_MQ_RQ_QUEUE_BUSY)
746 break; 767 break;
768
769 /*
770 * We've done the first request. If we have more than 1
771 * left in the list, set dptr to defer issue.
772 */
773 if (!dptr && rq_list.next != rq_list.prev)
774 dptr = &driver_list;
747 } 775 }
748 776
749 if (!queued) 777 if (!queued)
@@ -770,10 +798,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
770 */ 798 */
771static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) 799static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
772{ 800{
773 int cpu = hctx->next_cpu; 801 if (hctx->queue->nr_hw_queues == 1)
802 return WORK_CPU_UNBOUND;
774 803
775 if (--hctx->next_cpu_batch <= 0) { 804 if (--hctx->next_cpu_batch <= 0) {
776 int next_cpu; 805 int cpu = hctx->next_cpu, next_cpu;
777 806
778 next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask); 807 next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
779 if (next_cpu >= nr_cpu_ids) 808 if (next_cpu >= nr_cpu_ids)
@@ -781,26 +810,32 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
781 810
782 hctx->next_cpu = next_cpu; 811 hctx->next_cpu = next_cpu;
783 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; 812 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
813
814 return cpu;
784 } 815 }
785 816
786 return cpu; 817 return hctx->next_cpu;
787} 818}
788 819
789void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) 820void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
790{ 821{
791 if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) 822 if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state) ||
823 !blk_mq_hw_queue_mapped(hctx)))
792 return; 824 return;
793 825
794 if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask)) 826 if (!async) {
795 __blk_mq_run_hw_queue(hctx); 827 int cpu = get_cpu();
796 else if (hctx->queue->nr_hw_queues == 1) 828 if (cpumask_test_cpu(cpu, hctx->cpumask)) {
797 kblockd_schedule_delayed_work(&hctx->run_work, 0); 829 __blk_mq_run_hw_queue(hctx);
798 else { 830 put_cpu();
799 unsigned int cpu; 831 return;
832 }
800 833
801 cpu = blk_mq_hctx_next_cpu(hctx); 834 put_cpu();
802 kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0);
803 } 835 }
836
837 kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
838 &hctx->run_work, 0);
804} 839}
805 840
806void blk_mq_run_queues(struct request_queue *q, bool async) 841void blk_mq_run_queues(struct request_queue *q, bool async)
@@ -814,9 +849,7 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
814 test_bit(BLK_MQ_S_STOPPED, &hctx->state)) 849 test_bit(BLK_MQ_S_STOPPED, &hctx->state))
815 continue; 850 continue;
816 851
817 preempt_disable();
818 blk_mq_run_hw_queue(hctx, async); 852 blk_mq_run_hw_queue(hctx, async);
819 preempt_enable();
820 } 853 }
821} 854}
822EXPORT_SYMBOL(blk_mq_run_queues); 855EXPORT_SYMBOL(blk_mq_run_queues);
@@ -843,9 +876,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
843{ 876{
844 clear_bit(BLK_MQ_S_STOPPED, &hctx->state); 877 clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
845 878
846 preempt_disable();
847 blk_mq_run_hw_queue(hctx, false); 879 blk_mq_run_hw_queue(hctx, false);
848 preempt_enable();
849} 880}
850EXPORT_SYMBOL(blk_mq_start_hw_queue); 881EXPORT_SYMBOL(blk_mq_start_hw_queue);
851 882
@@ -870,9 +901,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
870 continue; 901 continue;
871 902
872 clear_bit(BLK_MQ_S_STOPPED, &hctx->state); 903 clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
873 preempt_disable();
874 blk_mq_run_hw_queue(hctx, async); 904 blk_mq_run_hw_queue(hctx, async);
875 preempt_enable();
876 } 905 }
877} 906}
878EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); 907EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
@@ -898,16 +927,11 @@ static void blk_mq_delay_work_fn(struct work_struct *work)
898 927
899void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) 928void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
900{ 929{
901 unsigned long tmo = msecs_to_jiffies(msecs); 930 if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
902 931 return;
903 if (hctx->queue->nr_hw_queues == 1)
904 kblockd_schedule_delayed_work(&hctx->delay_work, tmo);
905 else {
906 unsigned int cpu;
907 932
908 cpu = blk_mq_hctx_next_cpu(hctx); 933 kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
909 kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo); 934 &hctx->delay_work, msecs_to_jiffies(msecs));
910 }
911} 935}
912EXPORT_SYMBOL(blk_mq_delay_queue); 936EXPORT_SYMBOL(blk_mq_delay_queue);
913 937
@@ -1162,7 +1186,17 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1162 goto run_queue; 1186 goto run_queue;
1163 } 1187 }
1164 1188
1165 if (is_sync) { 1189 /*
1190 * If the driver supports defer issued based on 'last', then
1191 * queue it up like normal since we can potentially save some
1192 * CPU this way.
1193 */
1194 if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
1195 struct blk_mq_queue_data bd = {
1196 .rq = rq,
1197 .list = NULL,
1198 .last = 1
1199 };
1166 int ret; 1200 int ret;
1167 1201
1168 blk_mq_bio_to_request(rq, bio); 1202 blk_mq_bio_to_request(rq, bio);
@@ -1172,7 +1206,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1172 * error (busy), just add it to our list as we previously 1206 * error (busy), just add it to our list as we previously
1173 * would have done 1207 * would have done
1174 */ 1208 */
1175 ret = q->mq_ops->queue_rq(data.hctx, rq, true); 1209 ret = q->mq_ops->queue_rq(data.hctx, &bd);
1176 if (ret == BLK_MQ_RQ_QUEUE_OK) 1210 if (ret == BLK_MQ_RQ_QUEUE_OK)
1177 goto done; 1211 goto done;
1178 else { 1212 else {
@@ -1784,16 +1818,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1784 if (!ctx) 1818 if (!ctx)
1785 return ERR_PTR(-ENOMEM); 1819 return ERR_PTR(-ENOMEM);
1786 1820
1787 /*
1788 * If a crashdump is active, then we are potentially in a very
1789 * memory constrained environment. Limit us to 1 queue and
1790 * 64 tags to prevent using too much memory.
1791 */
1792 if (is_kdump_kernel()) {
1793 set->nr_hw_queues = 1;
1794 set->queue_depth = min(64U, set->queue_depth);
1795 }
1796
1797 hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, 1821 hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
1798 set->numa_node); 1822 set->numa_node);
1799 1823
@@ -2067,6 +2091,16 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
2067 set->queue_depth = BLK_MQ_MAX_DEPTH; 2091 set->queue_depth = BLK_MQ_MAX_DEPTH;
2068 } 2092 }
2069 2093
2094 /*
2095 * If a crashdump is active, then we are potentially in a very
2096 * memory constrained environment. Limit us to 1 queue and
2097 * 64 tags to prevent using too much memory.
2098 */
2099 if (is_kdump_kernel()) {
2100 set->nr_hw_queues = 1;
2101 set->queue_depth = min(64U, set->queue_depth);
2102 }
2103
2070 set->tags = kmalloc_node(set->nr_hw_queues * 2104 set->tags = kmalloc_node(set->nr_hw_queues *
2071 sizeof(struct blk_mq_tags *), 2105 sizeof(struct blk_mq_tags *),
2072 GFP_KERNEL, set->numa_node); 2106 GFP_KERNEL, set->numa_node);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d567d5283ffa..206230e64f79 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -115,4 +115,9 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
115 data->hctx = hctx; 115 data->hctx = hctx;
116} 116}
117 117
118static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
119{
120 return hctx->nr_ctx && hctx->tags;
121}
122
118#endif 123#endif
diff --git a/block/blk-settings.c b/block/blk-settings.c
index aa02247d227e..6ed2cbe5e8c9 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -257,9 +257,7 @@ void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_
257 __func__, max_hw_sectors); 257 __func__, max_hw_sectors);
258 } 258 }
259 259
260 limits->max_hw_sectors = max_hw_sectors; 260 limits->max_sectors = limits->max_hw_sectors = max_hw_sectors;
261 limits->max_sectors = min_t(unsigned int, max_hw_sectors,
262 BLK_DEF_MAX_SECTORS);
263} 261}
264EXPORT_SYMBOL(blk_limits_max_hw_sectors); 262EXPORT_SYMBOL(blk_limits_max_hw_sectors);
265 263
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 1fac43408911..935ea2aa0730 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -492,17 +492,15 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
492 * Currently, its primary task it to free all the &struct request 492 * Currently, its primary task it to free all the &struct request
493 * structures that were allocated to the queue and the queue itself. 493 * structures that were allocated to the queue and the queue itself.
494 * 494 *
495 * Caveat: 495 * Note:
496 * Hopefully the low level driver will have finished any 496 * The low level driver must have finished any outstanding requests first
497 * outstanding requests first... 497 * via blk_cleanup_queue().
498 **/ 498 **/
499static void blk_release_queue(struct kobject *kobj) 499static void blk_release_queue(struct kobject *kobj)
500{ 500{
501 struct request_queue *q = 501 struct request_queue *q =
502 container_of(kobj, struct request_queue, kobj); 502 container_of(kobj, struct request_queue, kobj);
503 503
504 blk_sync_queue(q);
505
506 blkcg_exit_queue(q); 504 blkcg_exit_queue(q);
507 505
508 if (q->elevator) { 506 if (q->elevator) {
@@ -517,9 +515,7 @@ static void blk_release_queue(struct kobject *kobj)
517 if (q->queue_tags) 515 if (q->queue_tags)
518 __blk_queue_free_tags(q); 516 __blk_queue_free_tags(q);
519 517
520 if (q->mq_ops) 518 if (!q->mq_ops)
521 blk_mq_free_queue(q);
522 else
523 blk_free_flush_queue(q->fq); 519 blk_free_flush_queue(q->fq);
524 520
525 blk_trace_shutdown(q); 521 blk_trace_shutdown(q);
diff --git a/block/genhd.c b/block/genhd.c
index bd3060684ab2..0a536dc05f3b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1070,9 +1070,16 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
1070 struct disk_part_tbl *old_ptbl = disk->part_tbl; 1070 struct disk_part_tbl *old_ptbl = disk->part_tbl;
1071 struct disk_part_tbl *new_ptbl; 1071 struct disk_part_tbl *new_ptbl;
1072 int len = old_ptbl ? old_ptbl->len : 0; 1072 int len = old_ptbl ? old_ptbl->len : 0;
1073 int target = partno + 1; 1073 int i, target;
1074 size_t size; 1074 size_t size;
1075 int i; 1075
1076 /*
1077 * check for int overflow, since we can get here from blkpg_ioctl()
1078 * with a user passed 'partno'.
1079 */
1080 target = partno + 1;
1081 if (target < 0)
1082 return -EINVAL;
1076 1083
1077 /* disk_max_parts() is zero during initialization, ignore if so */ 1084 /* disk_max_parts() is zero during initialization, ignore if so */
1078 if (disk_max_parts(disk) && target > disk_max_parts(disk)) 1085 if (disk_max_parts(disk) && target > disk_max_parts(disk))
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index dd73e1ff1759..46c282fff104 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -395,7 +395,7 @@ aoeblk_gdalloc(void *vp)
395 WARN_ON(d->flags & DEVFL_TKILL); 395 WARN_ON(d->flags & DEVFL_TKILL);
396 WARN_ON(d->gd); 396 WARN_ON(d->gd);
397 WARN_ON(d->flags & DEVFL_UP); 397 WARN_ON(d->flags & DEVFL_UP);
398 blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); 398 blk_queue_max_hw_sectors(q, 1024);
399 q->backing_dev_info.name = "aoe"; 399 q->backing_dev_info.name = "aoe";
400 q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; 400 q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
401 d->bufpool = mp; 401 d->bufpool = mp;
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 1bd5f523f8fd..3bd7ca9853a8 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3775,9 +3775,10 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
3775 return false; 3775 return false;
3776} 3776}
3777 3777
3778static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq, 3778static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
3779 bool last) 3779 const struct blk_mq_queue_data *bd)
3780{ 3780{
3781 struct request *rq = bd->rq;
3781 int ret; 3782 int ret;
3782 3783
3783 if (unlikely(mtip_check_unal_depth(hctx, rq))) 3784 if (unlikely(mtip_check_unal_depth(hctx, rq)))
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 8001e812018b..caa61212fdb5 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -313,15 +313,15 @@ static void null_request_fn(struct request_queue *q)
313 } 313 }
314} 314}
315 315
316static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq, 316static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
317 bool last) 317 const struct blk_mq_queue_data *bd)
318{ 318{
319 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); 319 struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
320 320
321 cmd->rq = rq; 321 cmd->rq = bd->rq;
322 cmd->nq = hctx->driver_data; 322 cmd->nq = hctx->driver_data;
323 323
324 blk_mq_start_request(rq); 324 blk_mq_start_request(bd->rq);
325 325
326 null_handle_cmd(cmd); 326 null_handle_cmd(cmd);
327 return BLK_MQ_RQ_QUEUE_OK; 327 return BLK_MQ_RQ_QUEUE_OK;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 1fb9e09fbbc5..7ef7c098708f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -159,10 +159,11 @@ static void virtblk_done(struct virtqueue *vq)
159 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 159 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
160} 160}
161 161
162static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, 162static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
163 bool last) 163 const struct blk_mq_queue_data *bd)
164{ 164{
165 struct virtio_blk *vblk = hctx->queue->queuedata; 165 struct virtio_blk *vblk = hctx->queue->queuedata;
166 struct request *req = bd->rq;
166 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); 167 struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
167 unsigned long flags; 168 unsigned long flags;
168 unsigned int num; 169 unsigned int num;
@@ -223,7 +224,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
223 return BLK_MQ_RQ_QUEUE_ERROR; 224 return BLK_MQ_RQ_QUEUE_ERROR;
224 } 225 }
225 226
226 if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) 227 if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
227 notify = true; 228 notify = true;
228 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); 229 spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
229 230
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7e3d954c9cac..43318d556cbc 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1947,9 +1947,10 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
1947 blk_mq_complete_request(cmd->request); 1947 blk_mq_complete_request(cmd->request);
1948} 1948}
1949 1949
1950static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, 1950static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
1951 bool last) 1951 const struct blk_mq_queue_data *bd)
1952{ 1952{
1953 struct request *req = bd->rq;
1953 struct request_queue *q = req->q; 1954 struct request_queue *q = req->q;
1954 struct scsi_device *sdev = q->queuedata; 1955 struct scsi_device *sdev = q->queuedata;
1955 struct Scsi_Host *shost = sdev->host; 1956 struct Scsi_Host *shost = sdev->host;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ef9bef118342..2d609a5fbfea 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -479,12 +479,28 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
479 * write_inode() 479 * write_inode()
480 */ 480 */
481 spin_lock(&inode->i_lock); 481 spin_lock(&inode->i_lock);
482 /* Clear I_DIRTY_PAGES if we've written out all dirty pages */ 482
483 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
484 inode->i_state &= ~I_DIRTY_PAGES;
485 dirty = inode->i_state & I_DIRTY; 483 dirty = inode->i_state & I_DIRTY;
486 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 484 inode->i_state &= ~I_DIRTY;
485
486 /*
487 * Paired with smp_mb() in __mark_inode_dirty(). This allows
488 * __mark_inode_dirty() to test i_state without grabbing i_lock -
489 * either they see the I_DIRTY bits cleared or we see the dirtied
490 * inode.
491 *
492 * I_DIRTY_PAGES is always cleared together above even if @mapping
493 * still has dirty pages. The flag is reinstated after smp_mb() if
494 * necessary. This guarantees that either __mark_inode_dirty()
495 * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
496 */
497 smp_mb();
498
499 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
500 inode->i_state |= I_DIRTY_PAGES;
501
487 spin_unlock(&inode->i_lock); 502 spin_unlock(&inode->i_lock);
503
488 /* Don't write the inode if only I_DIRTY_PAGES was set */ 504 /* Don't write the inode if only I_DIRTY_PAGES was set */
489 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 505 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
490 int err = write_inode(inode, wbc); 506 int err = write_inode(inode, wbc);
@@ -1148,12 +1164,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
1148 } 1164 }
1149 1165
1150 /* 1166 /*
1151 * make sure that changes are seen by all cpus before we test i_state 1167 * Paired with smp_mb() in __writeback_single_inode() for the
1152 * -- mikulas 1168 * following lockless i_state test. See there for details.
1153 */ 1169 */
1154 smp_mb(); 1170 smp_mb();
1155 1171
1156 /* avoid the locking if we can */
1157 if ((inode->i_state & flags) == flags) 1172 if ((inode->i_state & flags) == flags)
1158 return; 1173 return;
1159 1174
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7347f486ceca..efead0b532c4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -443,6 +443,11 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
443extern void bio_set_pages_dirty(struct bio *bio); 443extern void bio_set_pages_dirty(struct bio *bio);
444extern void bio_check_pages_dirty(struct bio *bio); 444extern void bio_check_pages_dirty(struct bio *bio);
445 445
446void generic_start_io_acct(int rw, unsigned long sectors,
447 struct hd_struct *part);
448void generic_end_io_acct(int rw, struct hd_struct *part,
449 unsigned long start_time);
450
446#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 451#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
447# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" 452# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
448#endif 453#endif
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 15f7034aa377..8aded9ab2e4e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -79,7 +79,13 @@ struct blk_mq_tag_set {
79 struct list_head tag_list; 79 struct list_head tag_list;
80}; 80};
81 81
82typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool); 82struct blk_mq_queue_data {
83 struct request *rq;
84 struct list_head *list;
85 bool last;
86};
87
88typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
83typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); 89typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
84typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); 90typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
85typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); 91typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
@@ -140,6 +146,7 @@ enum {
140 BLK_MQ_F_TAG_SHARED = 1 << 1, 146 BLK_MQ_F_TAG_SHARED = 1 << 1,
141 BLK_MQ_F_SG_MERGE = 1 << 2, 147 BLK_MQ_F_SG_MERGE = 1 << 2,
142 BLK_MQ_F_SYSFS_UP = 1 << 3, 148 BLK_MQ_F_SYSFS_UP = 1 << 3,
149 BLK_MQ_F_DEFER_ISSUE = 1 << 4,
143 150
144 BLK_MQ_S_STOPPED = 0, 151 BLK_MQ_S_STOPPED = 0,
145 BLK_MQ_S_TAG_ACTIVE = 1, 152 BLK_MQ_S_TAG_ACTIVE = 1,
@@ -162,6 +169,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
162void blk_mq_insert_request(struct request *, bool, bool, bool); 169void blk_mq_insert_request(struct request *, bool, bool, bool);
163void blk_mq_run_queues(struct request_queue *q, bool async); 170void blk_mq_run_queues(struct request_queue *q, bool async);
164void blk_mq_free_request(struct request *rq); 171void blk_mq_free_request(struct request *rq);
172void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
165bool blk_mq_can_queue(struct blk_mq_hw_ctx *); 173bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
166struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 174struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
167 gfp_t gfp, bool reserved); 175 gfp_t gfp, bool reserved);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0495e3854247..92f4b4b288dd 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1184,7 +1184,6 @@ extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
1184enum blk_default_limits { 1184enum blk_default_limits {
1185 BLK_MAX_SEGMENTS = 128, 1185 BLK_MAX_SEGMENTS = 128,
1186 BLK_SAFE_MAX_SECTORS = 255, 1186 BLK_SAFE_MAX_SECTORS = 255,
1187 BLK_DEF_MAX_SECTORS = 1024,
1188 BLK_MAX_SEGMENT_SIZE = 65536, 1187 BLK_MAX_SEGMENT_SIZE = 65536,
1189 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, 1188 BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL,
1190}; 1189};
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 11b9cb36092b..483cecfa5c17 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1477,9 +1477,6 @@ static int blk_trace_remove_queue(struct request_queue *q)
1477 if (atomic_dec_and_test(&blk_probes_ref)) 1477 if (atomic_dec_and_test(&blk_probes_ref))
1478 blk_unregister_tracepoints(); 1478 blk_unregister_tracepoints();
1479 1479
1480 spin_lock_irq(&running_trace_lock);
1481 list_del(&bt->running_list);
1482 spin_unlock_irq(&running_trace_lock);
1483 blk_trace_free(bt); 1480 blk_trace_free(bt);
1484 return 0; 1481 return 0;
1485} 1482}