diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-13 17:14:23 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-13 17:14:23 -0500 |
commit | caf292ae5bb9d57198ce001d8b762f7abae3a94d (patch) | |
tree | 5fd5d6d971503818ab2824407134cf36a80c53d0 | |
parent | 8f4385d590d4296ec38e228d17b1d002f6031dd2 (diff) | |
parent | fcbf6a087a7e4d3f03d28333678a1010810a53c3 (diff) |
Merge branch 'for-3.19/core' of git://git.kernel.dk/linux-block
Pull block driver core update from Jens Axboe:
"This is the pull request for the core block IO changes for 3.19. Not
a huge round this time, mostly lots of little good fixes:
- Fix a bug in sysfs blktrace interface causing a NULL pointer
dereference, when enabled/disabled through that API. From Arianna
Avanzini.
- Various updates/fixes/improvements for blk-mq:
- A set of updates from Bart, mostly fixing buts in the tag
handling.
- Cleanup/code consolidation from Christoph.
- Extend queue_rq API to be able to handle batching issues of IO
requests. NVMe will utilize this shortly. From me.
- A few tag and request handling updates from me.
- Cleanup of the preempt handling for running queues from Paolo.
- Prevent running of unmapped hardware queues from Ming Lei.
- Move the kdump memory limiting check to be in the correct
location, from Shaohua.
- Initialize all software queues at init time from Takashi. This
prevents a kobject warning when CPUs are brought online that
weren't online when a queue was registered.
- Single writeback fix for I_DIRTY clearing from Tejun. Queued with
the core IO changes, since it's just a single fix.
- Version X of the __bio_add_page() segment addition retry from
Maurizio. Hope the Xth time is the charm.
- Documentation fixup for IO scheduler merging from Jan.
- Introduce (and use) generic IO stat accounting helpers for non-rq
drivers, from Gu Zheng.
- Kill off artificial limiting of max sectors in a request from
Christoph"
* 'for-3.19/core' of git://git.kernel.dk/linux-block: (26 commits)
bio: modify __bio_add_page() to accept pages that don't start a new segment
blk-mq: Fix uninitialized kobject at CPU hotplugging
blktrace: don't let the sysfs interface remove trace from running list
blk-mq: Use all available hardware queues
blk-mq: Micro-optimize bt_get()
blk-mq: Fix a race between bt_clear_tag() and bt_get()
blk-mq: Avoid that __bt_get_word() wraps multiple times
blk-mq: Fix a use-after-free
blk-mq: prevent unmapped hw queue from being scheduled
blk-mq: re-check for available tags after running the hardware queue
blk-mq: fix hang in bt_get()
blk-mq: move the kdump check to blk_mq_alloc_tag_set
blk-mq: cleanup tag free handling
blk-mq: use 'nr_cpu_ids' as highest CPU ID count for hwq <-> cpu map
blk: introduce generic io stat accounting help function
blk-mq: handle the single queue case in blk_mq_hctx_next_cpu
genhd: check for int overflow in disk_expand_part_tbl()
blk-mq: add blk_mq_free_hctx_request()
blk-mq: export blk_mq_free_request()
blk-mq: use get_cpu/put_cpu instead of preempt_disable/preempt_enable
...
-rw-r--r-- | Documentation/block/biodoc.txt | 6 | ||||
-rw-r--r-- | block/bio.c | 82 | ||||
-rw-r--r-- | block/blk-core.c | 3 | ||||
-rw-r--r-- | block/blk-mq-cpumap.c | 4 | ||||
-rw-r--r-- | block/blk-mq-sysfs.c | 9 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 60 | ||||
-rw-r--r-- | block/blk-mq.c | 126 | ||||
-rw-r--r-- | block/blk-mq.h | 5 | ||||
-rw-r--r-- | block/blk-settings.c | 4 | ||||
-rw-r--r-- | block/blk-sysfs.c | 12 | ||||
-rw-r--r-- | block/genhd.c | 11 | ||||
-rw-r--r-- | drivers/block/aoe/aoeblk.c | 2 | ||||
-rw-r--r-- | drivers/block/mtip32xx/mtip32xx.c | 5 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 10 | ||||
-rw-r--r-- | drivers/block/virtio_blk.c | 7 | ||||
-rw-r--r-- | drivers/scsi/scsi_lib.c | 5 | ||||
-rw-r--r-- | fs/fs-writeback.c | 29 | ||||
-rw-r--r-- | include/linux/bio.h | 5 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 10 | ||||
-rw-r--r-- | include/linux/blkdev.h | 1 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 3 |
21 files changed, 254 insertions, 145 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 6b972b287795..5aabc08de811 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt | |||
@@ -942,7 +942,11 @@ elevator_allow_merge_fn called whenever the block layer determines | |||
942 | request safely. The io scheduler may still | 942 | request safely. The io scheduler may still |
943 | want to stop a merge at this point if it | 943 | want to stop a merge at this point if it |
944 | results in some sort of conflict internally, | 944 | results in some sort of conflict internally, |
945 | this hook allows it to do that. | 945 | this hook allows it to do that. Note however |
946 | that two *requests* can still be merged at later | ||
947 | time. Currently the io scheduler has no way to | ||
948 | prevent that. It can only learn about the fact | ||
949 | from elevator_merge_req_fn callback. | ||
946 | 950 | ||
947 | elevator_dispatch_fn* fills the dispatch queue with ready requests. | 951 | elevator_dispatch_fn* fills the dispatch queue with ready requests. |
948 | I/O schedulers are free to postpone requests by | 952 | I/O schedulers are free to postpone requests by |
diff --git a/block/bio.c b/block/bio.c index 3e6e1986a5b2..471d7382c7d1 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -748,6 +748,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
748 | } | 748 | } |
749 | } | 749 | } |
750 | 750 | ||
751 | bio->bi_iter.bi_size += len; | ||
751 | goto done; | 752 | goto done; |
752 | } | 753 | } |
753 | 754 | ||
@@ -764,29 +765,32 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
764 | return 0; | 765 | return 0; |
765 | 766 | ||
766 | /* | 767 | /* |
767 | * we might lose a segment or two here, but rather that than | 768 | * setup the new entry, we might clear it again later if we |
768 | * make this too complex. | 769 | * cannot add the page |
770 | */ | ||
771 | bvec = &bio->bi_io_vec[bio->bi_vcnt]; | ||
772 | bvec->bv_page = page; | ||
773 | bvec->bv_len = len; | ||
774 | bvec->bv_offset = offset; | ||
775 | bio->bi_vcnt++; | ||
776 | bio->bi_phys_segments++; | ||
777 | bio->bi_iter.bi_size += len; | ||
778 | |||
779 | /* | ||
780 | * Perform a recount if the number of segments is greater | ||
781 | * than queue_max_segments(q). | ||
769 | */ | 782 | */ |
770 | 783 | ||
771 | while (bio->bi_phys_segments >= queue_max_segments(q)) { | 784 | while (bio->bi_phys_segments > queue_max_segments(q)) { |
772 | 785 | ||
773 | if (retried_segments) | 786 | if (retried_segments) |
774 | return 0; | 787 | goto failed; |
775 | 788 | ||
776 | retried_segments = 1; | 789 | retried_segments = 1; |
777 | blk_recount_segments(q, bio); | 790 | blk_recount_segments(q, bio); |
778 | } | 791 | } |
779 | 792 | ||
780 | /* | 793 | /* |
781 | * setup the new entry, we might clear it again later if we | ||
782 | * cannot add the page | ||
783 | */ | ||
784 | bvec = &bio->bi_io_vec[bio->bi_vcnt]; | ||
785 | bvec->bv_page = page; | ||
786 | bvec->bv_len = len; | ||
787 | bvec->bv_offset = offset; | ||
788 | |||
789 | /* | ||
790 | * if queue has other restrictions (eg varying max sector size | 794 | * if queue has other restrictions (eg varying max sector size |
791 | * depending on offset), it can specify a merge_bvec_fn in the | 795 | * depending on offset), it can specify a merge_bvec_fn in the |
792 | * queue to get further control | 796 | * queue to get further control |
@@ -795,7 +799,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
795 | struct bvec_merge_data bvm = { | 799 | struct bvec_merge_data bvm = { |
796 | .bi_bdev = bio->bi_bdev, | 800 | .bi_bdev = bio->bi_bdev, |
797 | .bi_sector = bio->bi_iter.bi_sector, | 801 | .bi_sector = bio->bi_iter.bi_sector, |
798 | .bi_size = bio->bi_iter.bi_size, | 802 | .bi_size = bio->bi_iter.bi_size - len, |
799 | .bi_rw = bio->bi_rw, | 803 | .bi_rw = bio->bi_rw, |
800 | }; | 804 | }; |
801 | 805 | ||
@@ -803,23 +807,25 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
803 | * merge_bvec_fn() returns number of bytes it can accept | 807 | * merge_bvec_fn() returns number of bytes it can accept |
804 | * at this offset | 808 | * at this offset |
805 | */ | 809 | */ |
806 | if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) { | 810 | if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) |
807 | bvec->bv_page = NULL; | 811 | goto failed; |
808 | bvec->bv_len = 0; | ||
809 | bvec->bv_offset = 0; | ||
810 | return 0; | ||
811 | } | ||
812 | } | 812 | } |
813 | 813 | ||
814 | /* If we may be able to merge these biovecs, force a recount */ | 814 | /* If we may be able to merge these biovecs, force a recount */ |
815 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) | 815 | if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) |
816 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); | 816 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); |
817 | 817 | ||
818 | bio->bi_vcnt++; | ||
819 | bio->bi_phys_segments++; | ||
820 | done: | 818 | done: |
821 | bio->bi_iter.bi_size += len; | ||
822 | return len; | 819 | return len; |
820 | |||
821 | failed: | ||
822 | bvec->bv_page = NULL; | ||
823 | bvec->bv_len = 0; | ||
824 | bvec->bv_offset = 0; | ||
825 | bio->bi_vcnt--; | ||
826 | bio->bi_iter.bi_size -= len; | ||
827 | blk_recount_segments(q, bio); | ||
828 | return 0; | ||
823 | } | 829 | } |
824 | 830 | ||
825 | /** | 831 | /** |
@@ -1739,6 +1745,34 @@ void bio_check_pages_dirty(struct bio *bio) | |||
1739 | } | 1745 | } |
1740 | } | 1746 | } |
1741 | 1747 | ||
1748 | void generic_start_io_acct(int rw, unsigned long sectors, | ||
1749 | struct hd_struct *part) | ||
1750 | { | ||
1751 | int cpu = part_stat_lock(); | ||
1752 | |||
1753 | part_round_stats(cpu, part); | ||
1754 | part_stat_inc(cpu, part, ios[rw]); | ||
1755 | part_stat_add(cpu, part, sectors[rw], sectors); | ||
1756 | part_inc_in_flight(part, rw); | ||
1757 | |||
1758 | part_stat_unlock(); | ||
1759 | } | ||
1760 | EXPORT_SYMBOL(generic_start_io_acct); | ||
1761 | |||
1762 | void generic_end_io_acct(int rw, struct hd_struct *part, | ||
1763 | unsigned long start_time) | ||
1764 | { | ||
1765 | unsigned long duration = jiffies - start_time; | ||
1766 | int cpu = part_stat_lock(); | ||
1767 | |||
1768 | part_stat_add(cpu, part, ticks[rw], duration); | ||
1769 | part_round_stats(cpu, part); | ||
1770 | part_dec_in_flight(part, rw); | ||
1771 | |||
1772 | part_stat_unlock(); | ||
1773 | } | ||
1774 | EXPORT_SYMBOL(generic_end_io_acct); | ||
1775 | |||
1742 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE | 1776 | #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
1743 | void bio_flush_dcache_pages(struct bio *bi) | 1777 | void bio_flush_dcache_pages(struct bio *bi) |
1744 | { | 1778 | { |
diff --git a/block/blk-core.c b/block/blk-core.c index ea1c4d0d7a44..30f6153a40c2 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -525,6 +525,9 @@ void blk_cleanup_queue(struct request_queue *q) | |||
525 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); | 525 | del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer); |
526 | blk_sync_queue(q); | 526 | blk_sync_queue(q); |
527 | 527 | ||
528 | if (q->mq_ops) | ||
529 | blk_mq_free_queue(q); | ||
530 | |||
528 | spin_lock_irq(lock); | 531 | spin_lock_irq(lock); |
529 | if (q->queue_lock != &q->__queue_lock) | 532 | if (q->queue_lock != &q->__queue_lock) |
530 | q->queue_lock = &q->__queue_lock; | 533 | q->queue_lock = &q->__queue_lock; |
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c index 1065d7c65fa1..5f13f4d0bcce 100644 --- a/block/blk-mq-cpumap.c +++ b/block/blk-mq-cpumap.c | |||
@@ -17,7 +17,7 @@ | |||
17 | static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues, | 17 | static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues, |
18 | const int cpu) | 18 | const int cpu) |
19 | { | 19 | { |
20 | return cpu / ((nr_cpus + nr_queues - 1) / nr_queues); | 20 | return cpu * nr_queues / nr_cpus; |
21 | } | 21 | } |
22 | 22 | ||
23 | static int get_first_sibling(unsigned int cpu) | 23 | static int get_first_sibling(unsigned int cpu) |
@@ -90,7 +90,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set) | |||
90 | unsigned int *map; | 90 | unsigned int *map; |
91 | 91 | ||
92 | /* If cpus are offline, map them to first hctx */ | 92 | /* If cpus are offline, map them to first hctx */ |
93 | map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL, | 93 | map = kzalloc_node(sizeof(*map) * nr_cpu_ids, GFP_KERNEL, |
94 | set->numa_node); | 94 | set->numa_node); |
95 | if (!map) | 95 | if (!map) |
96 | return NULL; | 96 | return NULL; |
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 371d8800b48a..1630a20d5dcf 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
@@ -390,16 +390,15 @@ static void blk_mq_sysfs_init(struct request_queue *q) | |||
390 | { | 390 | { |
391 | struct blk_mq_hw_ctx *hctx; | 391 | struct blk_mq_hw_ctx *hctx; |
392 | struct blk_mq_ctx *ctx; | 392 | struct blk_mq_ctx *ctx; |
393 | int i, j; | 393 | int i; |
394 | 394 | ||
395 | kobject_init(&q->mq_kobj, &blk_mq_ktype); | 395 | kobject_init(&q->mq_kobj, &blk_mq_ktype); |
396 | 396 | ||
397 | queue_for_each_hw_ctx(q, hctx, i) { | 397 | queue_for_each_hw_ctx(q, hctx, i) |
398 | kobject_init(&hctx->kobj, &blk_mq_hw_ktype); | 398 | kobject_init(&hctx->kobj, &blk_mq_hw_ktype); |
399 | 399 | ||
400 | hctx_for_each_ctx(hctx, ctx, j) | 400 | queue_for_each_ctx(q, ctx, i) |
401 | kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); | 401 | kobject_init(&ctx->kobj, &blk_mq_ctx_ktype); |
402 | } | ||
403 | } | 402 | } |
404 | 403 | ||
405 | /* see blk_register_queue() */ | 404 | /* see blk_register_queue() */ |
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 728b9a4d5f56..e3d4e4043b49 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -137,6 +137,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx, | |||
137 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) | 137 | static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag) |
138 | { | 138 | { |
139 | int tag, org_last_tag, end; | 139 | int tag, org_last_tag, end; |
140 | bool wrap = last_tag != 0; | ||
140 | 141 | ||
141 | org_last_tag = last_tag; | 142 | org_last_tag = last_tag; |
142 | end = bm->depth; | 143 | end = bm->depth; |
@@ -148,15 +149,16 @@ restart: | |||
148 | * We started with an offset, start from 0 to | 149 | * We started with an offset, start from 0 to |
149 | * exhaust the map. | 150 | * exhaust the map. |
150 | */ | 151 | */ |
151 | if (org_last_tag && last_tag) { | 152 | if (wrap) { |
152 | end = last_tag; | 153 | wrap = false; |
154 | end = org_last_tag; | ||
153 | last_tag = 0; | 155 | last_tag = 0; |
154 | goto restart; | 156 | goto restart; |
155 | } | 157 | } |
156 | return -1; | 158 | return -1; |
157 | } | 159 | } |
158 | last_tag = tag + 1; | 160 | last_tag = tag + 1; |
159 | } while (test_and_set_bit_lock(tag, &bm->word)); | 161 | } while (test_and_set_bit(tag, &bm->word)); |
160 | 162 | ||
161 | return tag; | 163 | return tag; |
162 | } | 164 | } |
@@ -246,14 +248,29 @@ static int bt_get(struct blk_mq_alloc_data *data, | |||
246 | if (!(data->gfp & __GFP_WAIT)) | 248 | if (!(data->gfp & __GFP_WAIT)) |
247 | return -1; | 249 | return -1; |
248 | 250 | ||
249 | bs = bt_wait_ptr(bt, hctx); | ||
250 | do { | 251 | do { |
252 | bs = bt_wait_ptr(bt, hctx); | ||
251 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); | 253 | prepare_to_wait(&bs->wait, &wait, TASK_UNINTERRUPTIBLE); |
252 | 254 | ||
253 | tag = __bt_get(hctx, bt, last_tag); | 255 | tag = __bt_get(hctx, bt, last_tag); |
254 | if (tag != -1) | 256 | if (tag != -1) |
255 | break; | 257 | break; |
256 | 258 | ||
259 | /* | ||
260 | * We're out of tags on this hardware queue, kick any | ||
261 | * pending IO submits before going to sleep waiting for | ||
262 | * some to complete. | ||
263 | */ | ||
264 | blk_mq_run_hw_queue(hctx, false); | ||
265 | |||
266 | /* | ||
267 | * Retry tag allocation after running the hardware queue, | ||
268 | * as running the queue may also have found completions. | ||
269 | */ | ||
270 | tag = __bt_get(hctx, bt, last_tag); | ||
271 | if (tag != -1) | ||
272 | break; | ||
273 | |||
257 | blk_mq_put_ctx(data->ctx); | 274 | blk_mq_put_ctx(data->ctx); |
258 | 275 | ||
259 | io_schedule(); | 276 | io_schedule(); |
@@ -268,8 +285,6 @@ static int bt_get(struct blk_mq_alloc_data *data, | |||
268 | hctx = data->hctx; | 285 | hctx = data->hctx; |
269 | bt = &hctx->tags->bitmap_tags; | 286 | bt = &hctx->tags->bitmap_tags; |
270 | } | 287 | } |
271 | finish_wait(&bs->wait, &wait); | ||
272 | bs = bt_wait_ptr(bt, hctx); | ||
273 | } while (1); | 288 | } while (1); |
274 | 289 | ||
275 | finish_wait(&bs->wait, &wait); | 290 | finish_wait(&bs->wait, &wait); |
@@ -340,11 +355,10 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) | |||
340 | struct bt_wait_state *bs; | 355 | struct bt_wait_state *bs; |
341 | int wait_cnt; | 356 | int wait_cnt; |
342 | 357 | ||
343 | /* | 358 | clear_bit(TAG_TO_BIT(bt, tag), &bt->map[index].word); |
344 | * The unlock memory barrier need to order access to req in free | 359 | |
345 | * path and clearing tag bit | 360 | /* Ensure that the wait list checks occur after clear_bit(). */ |
346 | */ | 361 | smp_mb(); |
347 | clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word); | ||
348 | 362 | ||
349 | bs = bt_wake_ptr(bt); | 363 | bs = bt_wake_ptr(bt); |
350 | if (!bs) | 364 | if (!bs) |
@@ -360,21 +374,6 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag) | |||
360 | } | 374 | } |
361 | } | 375 | } |
362 | 376 | ||
363 | static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag) | ||
364 | { | ||
365 | BUG_ON(tag >= tags->nr_tags); | ||
366 | |||
367 | bt_clear_tag(&tags->bitmap_tags, tag); | ||
368 | } | ||
369 | |||
370 | static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags, | ||
371 | unsigned int tag) | ||
372 | { | ||
373 | BUG_ON(tag >= tags->nr_reserved_tags); | ||
374 | |||
375 | bt_clear_tag(&tags->breserved_tags, tag); | ||
376 | } | ||
377 | |||
378 | void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, | 377 | void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, |
379 | unsigned int *last_tag) | 378 | unsigned int *last_tag) |
380 | { | 379 | { |
@@ -383,10 +382,13 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag, | |||
383 | if (tag >= tags->nr_reserved_tags) { | 382 | if (tag >= tags->nr_reserved_tags) { |
384 | const int real_tag = tag - tags->nr_reserved_tags; | 383 | const int real_tag = tag - tags->nr_reserved_tags; |
385 | 384 | ||
386 | __blk_mq_put_tag(tags, real_tag); | 385 | BUG_ON(real_tag >= tags->nr_tags); |
386 | bt_clear_tag(&tags->bitmap_tags, real_tag); | ||
387 | *last_tag = real_tag; | 387 | *last_tag = real_tag; |
388 | } else | 388 | } else { |
389 | __blk_mq_put_reserved_tag(tags, tag); | 389 | BUG_ON(tag >= tags->nr_reserved_tags); |
390 | bt_clear_tag(&tags->breserved_tags, tag); | ||
391 | } | ||
390 | } | 392 | } |
391 | 393 | ||
392 | static void bt_for_each(struct blk_mq_hw_ctx *hctx, | 394 | static void bt_for_each(struct blk_mq_hw_ctx *hctx, |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 92ceef0d2ab9..da1ab5641227 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -279,17 +279,25 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, | |||
279 | blk_mq_queue_exit(q); | 279 | blk_mq_queue_exit(q); |
280 | } | 280 | } |
281 | 281 | ||
282 | void blk_mq_free_request(struct request *rq) | 282 | void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq) |
283 | { | 283 | { |
284 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 284 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
285 | struct blk_mq_hw_ctx *hctx; | ||
286 | struct request_queue *q = rq->q; | ||
287 | 285 | ||
288 | ctx->rq_completed[rq_is_sync(rq)]++; | 286 | ctx->rq_completed[rq_is_sync(rq)]++; |
289 | |||
290 | hctx = q->mq_ops->map_queue(q, ctx->cpu); | ||
291 | __blk_mq_free_request(hctx, ctx, rq); | 287 | __blk_mq_free_request(hctx, ctx, rq); |
288 | |||
289 | } | ||
290 | EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request); | ||
291 | |||
292 | void blk_mq_free_request(struct request *rq) | ||
293 | { | ||
294 | struct blk_mq_hw_ctx *hctx; | ||
295 | struct request_queue *q = rq->q; | ||
296 | |||
297 | hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu); | ||
298 | blk_mq_free_hctx_request(hctx, rq); | ||
292 | } | 299 | } |
300 | EXPORT_SYMBOL_GPL(blk_mq_free_request); | ||
293 | 301 | ||
294 | inline void __blk_mq_end_request(struct request *rq, int error) | 302 | inline void __blk_mq_end_request(struct request *rq, int error) |
295 | { | 303 | { |
@@ -591,7 +599,7 @@ static void blk_mq_rq_timer(unsigned long priv) | |||
591 | * If not software queues are currently mapped to this | 599 | * If not software queues are currently mapped to this |
592 | * hardware queue, there's nothing to check | 600 | * hardware queue, there's nothing to check |
593 | */ | 601 | */ |
594 | if (!hctx->nr_ctx || !hctx->tags) | 602 | if (!blk_mq_hw_queue_mapped(hctx)) |
595 | continue; | 603 | continue; |
596 | 604 | ||
597 | blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data); | 605 | blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data); |
@@ -690,6 +698,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
690 | struct request_queue *q = hctx->queue; | 698 | struct request_queue *q = hctx->queue; |
691 | struct request *rq; | 699 | struct request *rq; |
692 | LIST_HEAD(rq_list); | 700 | LIST_HEAD(rq_list); |
701 | LIST_HEAD(driver_list); | ||
702 | struct list_head *dptr; | ||
693 | int queued; | 703 | int queued; |
694 | 704 | ||
695 | WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)); | 705 | WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask)); |
@@ -716,16 +726,27 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
716 | } | 726 | } |
717 | 727 | ||
718 | /* | 728 | /* |
729 | * Start off with dptr being NULL, so we start the first request | ||
730 | * immediately, even if we have more pending. | ||
731 | */ | ||
732 | dptr = NULL; | ||
733 | |||
734 | /* | ||
719 | * Now process all the entries, sending them to the driver. | 735 | * Now process all the entries, sending them to the driver. |
720 | */ | 736 | */ |
721 | queued = 0; | 737 | queued = 0; |
722 | while (!list_empty(&rq_list)) { | 738 | while (!list_empty(&rq_list)) { |
739 | struct blk_mq_queue_data bd; | ||
723 | int ret; | 740 | int ret; |
724 | 741 | ||
725 | rq = list_first_entry(&rq_list, struct request, queuelist); | 742 | rq = list_first_entry(&rq_list, struct request, queuelist); |
726 | list_del_init(&rq->queuelist); | 743 | list_del_init(&rq->queuelist); |
727 | 744 | ||
728 | ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list)); | 745 | bd.rq = rq; |
746 | bd.list = dptr; | ||
747 | bd.last = list_empty(&rq_list); | ||
748 | |||
749 | ret = q->mq_ops->queue_rq(hctx, &bd); | ||
729 | switch (ret) { | 750 | switch (ret) { |
730 | case BLK_MQ_RQ_QUEUE_OK: | 751 | case BLK_MQ_RQ_QUEUE_OK: |
731 | queued++; | 752 | queued++; |
@@ -744,6 +765,13 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
744 | 765 | ||
745 | if (ret == BLK_MQ_RQ_QUEUE_BUSY) | 766 | if (ret == BLK_MQ_RQ_QUEUE_BUSY) |
746 | break; | 767 | break; |
768 | |||
769 | /* | ||
770 | * We've done the first request. If we have more than 1 | ||
771 | * left in the list, set dptr to defer issue. | ||
772 | */ | ||
773 | if (!dptr && rq_list.next != rq_list.prev) | ||
774 | dptr = &driver_list; | ||
747 | } | 775 | } |
748 | 776 | ||
749 | if (!queued) | 777 | if (!queued) |
@@ -770,10 +798,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
770 | */ | 798 | */ |
771 | static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) | 799 | static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) |
772 | { | 800 | { |
773 | int cpu = hctx->next_cpu; | 801 | if (hctx->queue->nr_hw_queues == 1) |
802 | return WORK_CPU_UNBOUND; | ||
774 | 803 | ||
775 | if (--hctx->next_cpu_batch <= 0) { | 804 | if (--hctx->next_cpu_batch <= 0) { |
776 | int next_cpu; | 805 | int cpu = hctx->next_cpu, next_cpu; |
777 | 806 | ||
778 | next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask); | 807 | next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask); |
779 | if (next_cpu >= nr_cpu_ids) | 808 | if (next_cpu >= nr_cpu_ids) |
@@ -781,26 +810,32 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) | |||
781 | 810 | ||
782 | hctx->next_cpu = next_cpu; | 811 | hctx->next_cpu = next_cpu; |
783 | hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; | 812 | hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH; |
813 | |||
814 | return cpu; | ||
784 | } | 815 | } |
785 | 816 | ||
786 | return cpu; | 817 | return hctx->next_cpu; |
787 | } | 818 | } |
788 | 819 | ||
789 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) | 820 | void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) |
790 | { | 821 | { |
791 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) | 822 | if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state) || |
823 | !blk_mq_hw_queue_mapped(hctx))) | ||
792 | return; | 824 | return; |
793 | 825 | ||
794 | if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask)) | 826 | if (!async) { |
795 | __blk_mq_run_hw_queue(hctx); | 827 | int cpu = get_cpu(); |
796 | else if (hctx->queue->nr_hw_queues == 1) | 828 | if (cpumask_test_cpu(cpu, hctx->cpumask)) { |
797 | kblockd_schedule_delayed_work(&hctx->run_work, 0); | 829 | __blk_mq_run_hw_queue(hctx); |
798 | else { | 830 | put_cpu(); |
799 | unsigned int cpu; | 831 | return; |
832 | } | ||
800 | 833 | ||
801 | cpu = blk_mq_hctx_next_cpu(hctx); | 834 | put_cpu(); |
802 | kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0); | ||
803 | } | 835 | } |
836 | |||
837 | kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), | ||
838 | &hctx->run_work, 0); | ||
804 | } | 839 | } |
805 | 840 | ||
806 | void blk_mq_run_queues(struct request_queue *q, bool async) | 841 | void blk_mq_run_queues(struct request_queue *q, bool async) |
@@ -814,9 +849,7 @@ void blk_mq_run_queues(struct request_queue *q, bool async) | |||
814 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) | 849 | test_bit(BLK_MQ_S_STOPPED, &hctx->state)) |
815 | continue; | 850 | continue; |
816 | 851 | ||
817 | preempt_disable(); | ||
818 | blk_mq_run_hw_queue(hctx, async); | 852 | blk_mq_run_hw_queue(hctx, async); |
819 | preempt_enable(); | ||
820 | } | 853 | } |
821 | } | 854 | } |
822 | EXPORT_SYMBOL(blk_mq_run_queues); | 855 | EXPORT_SYMBOL(blk_mq_run_queues); |
@@ -843,9 +876,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx) | |||
843 | { | 876 | { |
844 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); | 877 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); |
845 | 878 | ||
846 | preempt_disable(); | ||
847 | blk_mq_run_hw_queue(hctx, false); | 879 | blk_mq_run_hw_queue(hctx, false); |
848 | preempt_enable(); | ||
849 | } | 880 | } |
850 | EXPORT_SYMBOL(blk_mq_start_hw_queue); | 881 | EXPORT_SYMBOL(blk_mq_start_hw_queue); |
851 | 882 | ||
@@ -870,9 +901,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async) | |||
870 | continue; | 901 | continue; |
871 | 902 | ||
872 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); | 903 | clear_bit(BLK_MQ_S_STOPPED, &hctx->state); |
873 | preempt_disable(); | ||
874 | blk_mq_run_hw_queue(hctx, async); | 904 | blk_mq_run_hw_queue(hctx, async); |
875 | preempt_enable(); | ||
876 | } | 905 | } |
877 | } | 906 | } |
878 | EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); | 907 | EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues); |
@@ -898,16 +927,11 @@ static void blk_mq_delay_work_fn(struct work_struct *work) | |||
898 | 927 | ||
899 | void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) | 928 | void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) |
900 | { | 929 | { |
901 | unsigned long tmo = msecs_to_jiffies(msecs); | 930 | if (unlikely(!blk_mq_hw_queue_mapped(hctx))) |
902 | 931 | return; | |
903 | if (hctx->queue->nr_hw_queues == 1) | ||
904 | kblockd_schedule_delayed_work(&hctx->delay_work, tmo); | ||
905 | else { | ||
906 | unsigned int cpu; | ||
907 | 932 | ||
908 | cpu = blk_mq_hctx_next_cpu(hctx); | 933 | kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx), |
909 | kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo); | 934 | &hctx->delay_work, msecs_to_jiffies(msecs)); |
910 | } | ||
911 | } | 935 | } |
912 | EXPORT_SYMBOL(blk_mq_delay_queue); | 936 | EXPORT_SYMBOL(blk_mq_delay_queue); |
913 | 937 | ||
@@ -1162,7 +1186,17 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1162 | goto run_queue; | 1186 | goto run_queue; |
1163 | } | 1187 | } |
1164 | 1188 | ||
1165 | if (is_sync) { | 1189 | /* |
1190 | * If the driver supports defer issued based on 'last', then | ||
1191 | * queue it up like normal since we can potentially save some | ||
1192 | * CPU this way. | ||
1193 | */ | ||
1194 | if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) { | ||
1195 | struct blk_mq_queue_data bd = { | ||
1196 | .rq = rq, | ||
1197 | .list = NULL, | ||
1198 | .last = 1 | ||
1199 | }; | ||
1166 | int ret; | 1200 | int ret; |
1167 | 1201 | ||
1168 | blk_mq_bio_to_request(rq, bio); | 1202 | blk_mq_bio_to_request(rq, bio); |
@@ -1172,7 +1206,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1172 | * error (busy), just add it to our list as we previously | 1206 | * error (busy), just add it to our list as we previously |
1173 | * would have done | 1207 | * would have done |
1174 | */ | 1208 | */ |
1175 | ret = q->mq_ops->queue_rq(data.hctx, rq, true); | 1209 | ret = q->mq_ops->queue_rq(data.hctx, &bd); |
1176 | if (ret == BLK_MQ_RQ_QUEUE_OK) | 1210 | if (ret == BLK_MQ_RQ_QUEUE_OK) |
1177 | goto done; | 1211 | goto done; |
1178 | else { | 1212 | else { |
@@ -1784,16 +1818,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1784 | if (!ctx) | 1818 | if (!ctx) |
1785 | return ERR_PTR(-ENOMEM); | 1819 | return ERR_PTR(-ENOMEM); |
1786 | 1820 | ||
1787 | /* | ||
1788 | * If a crashdump is active, then we are potentially in a very | ||
1789 | * memory constrained environment. Limit us to 1 queue and | ||
1790 | * 64 tags to prevent using too much memory. | ||
1791 | */ | ||
1792 | if (is_kdump_kernel()) { | ||
1793 | set->nr_hw_queues = 1; | ||
1794 | set->queue_depth = min(64U, set->queue_depth); | ||
1795 | } | ||
1796 | |||
1797 | hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, | 1821 | hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL, |
1798 | set->numa_node); | 1822 | set->numa_node); |
1799 | 1823 | ||
@@ -2067,6 +2091,16 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) | |||
2067 | set->queue_depth = BLK_MQ_MAX_DEPTH; | 2091 | set->queue_depth = BLK_MQ_MAX_DEPTH; |
2068 | } | 2092 | } |
2069 | 2093 | ||
2094 | /* | ||
2095 | * If a crashdump is active, then we are potentially in a very | ||
2096 | * memory constrained environment. Limit us to 1 queue and | ||
2097 | * 64 tags to prevent using too much memory. | ||
2098 | */ | ||
2099 | if (is_kdump_kernel()) { | ||
2100 | set->nr_hw_queues = 1; | ||
2101 | set->queue_depth = min(64U, set->queue_depth); | ||
2102 | } | ||
2103 | |||
2070 | set->tags = kmalloc_node(set->nr_hw_queues * | 2104 | set->tags = kmalloc_node(set->nr_hw_queues * |
2071 | sizeof(struct blk_mq_tags *), | 2105 | sizeof(struct blk_mq_tags *), |
2072 | GFP_KERNEL, set->numa_node); | 2106 | GFP_KERNEL, set->numa_node); |
diff --git a/block/blk-mq.h b/block/blk-mq.h index d567d5283ffa..206230e64f79 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -115,4 +115,9 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data, | |||
115 | data->hctx = hctx; | 115 | data->hctx = hctx; |
116 | } | 116 | } |
117 | 117 | ||
118 | static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx) | ||
119 | { | ||
120 | return hctx->nr_ctx && hctx->tags; | ||
121 | } | ||
122 | |||
118 | #endif | 123 | #endif |
diff --git a/block/blk-settings.c b/block/blk-settings.c index aa02247d227e..6ed2cbe5e8c9 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -257,9 +257,7 @@ void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_ | |||
257 | __func__, max_hw_sectors); | 257 | __func__, max_hw_sectors); |
258 | } | 258 | } |
259 | 259 | ||
260 | limits->max_hw_sectors = max_hw_sectors; | 260 | limits->max_sectors = limits->max_hw_sectors = max_hw_sectors; |
261 | limits->max_sectors = min_t(unsigned int, max_hw_sectors, | ||
262 | BLK_DEF_MAX_SECTORS); | ||
263 | } | 261 | } |
264 | EXPORT_SYMBOL(blk_limits_max_hw_sectors); | 262 | EXPORT_SYMBOL(blk_limits_max_hw_sectors); |
265 | 263 | ||
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 1fac43408911..935ea2aa0730 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -492,17 +492,15 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head) | |||
492 | * Currently, its primary task it to free all the &struct request | 492 | * Currently, its primary task it to free all the &struct request |
493 | * structures that were allocated to the queue and the queue itself. | 493 | * structures that were allocated to the queue and the queue itself. |
494 | * | 494 | * |
495 | * Caveat: | 495 | * Note: |
496 | * Hopefully the low level driver will have finished any | 496 | * The low level driver must have finished any outstanding requests first |
497 | * outstanding requests first... | 497 | * via blk_cleanup_queue(). |
498 | **/ | 498 | **/ |
499 | static void blk_release_queue(struct kobject *kobj) | 499 | static void blk_release_queue(struct kobject *kobj) |
500 | { | 500 | { |
501 | struct request_queue *q = | 501 | struct request_queue *q = |
502 | container_of(kobj, struct request_queue, kobj); | 502 | container_of(kobj, struct request_queue, kobj); |
503 | 503 | ||
504 | blk_sync_queue(q); | ||
505 | |||
506 | blkcg_exit_queue(q); | 504 | blkcg_exit_queue(q); |
507 | 505 | ||
508 | if (q->elevator) { | 506 | if (q->elevator) { |
@@ -517,9 +515,7 @@ static void blk_release_queue(struct kobject *kobj) | |||
517 | if (q->queue_tags) | 515 | if (q->queue_tags) |
518 | __blk_queue_free_tags(q); | 516 | __blk_queue_free_tags(q); |
519 | 517 | ||
520 | if (q->mq_ops) | 518 | if (!q->mq_ops) |
521 | blk_mq_free_queue(q); | ||
522 | else | ||
523 | blk_free_flush_queue(q->fq); | 519 | blk_free_flush_queue(q->fq); |
524 | 520 | ||
525 | blk_trace_shutdown(q); | 521 | blk_trace_shutdown(q); |
diff --git a/block/genhd.c b/block/genhd.c index bd3060684ab2..0a536dc05f3b 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -1070,9 +1070,16 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno) | |||
1070 | struct disk_part_tbl *old_ptbl = disk->part_tbl; | 1070 | struct disk_part_tbl *old_ptbl = disk->part_tbl; |
1071 | struct disk_part_tbl *new_ptbl; | 1071 | struct disk_part_tbl *new_ptbl; |
1072 | int len = old_ptbl ? old_ptbl->len : 0; | 1072 | int len = old_ptbl ? old_ptbl->len : 0; |
1073 | int target = partno + 1; | 1073 | int i, target; |
1074 | size_t size; | 1074 | size_t size; |
1075 | int i; | 1075 | |
1076 | /* | ||
1077 | * check for int overflow, since we can get here from blkpg_ioctl() | ||
1078 | * with a user passed 'partno'. | ||
1079 | */ | ||
1080 | target = partno + 1; | ||
1081 | if (target < 0) | ||
1082 | return -EINVAL; | ||
1076 | 1083 | ||
1077 | /* disk_max_parts() is zero during initialization, ignore if so */ | 1084 | /* disk_max_parts() is zero during initialization, ignore if so */ |
1078 | if (disk_max_parts(disk) && target > disk_max_parts(disk)) | 1085 | if (disk_max_parts(disk) && target > disk_max_parts(disk)) |
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c index dd73e1ff1759..46c282fff104 100644 --- a/drivers/block/aoe/aoeblk.c +++ b/drivers/block/aoe/aoeblk.c | |||
@@ -395,7 +395,7 @@ aoeblk_gdalloc(void *vp) | |||
395 | WARN_ON(d->flags & DEVFL_TKILL); | 395 | WARN_ON(d->flags & DEVFL_TKILL); |
396 | WARN_ON(d->gd); | 396 | WARN_ON(d->gd); |
397 | WARN_ON(d->flags & DEVFL_UP); | 397 | WARN_ON(d->flags & DEVFL_UP); |
398 | blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS); | 398 | blk_queue_max_hw_sectors(q, 1024); |
399 | q->backing_dev_info.name = "aoe"; | 399 | q->backing_dev_info.name = "aoe"; |
400 | q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; | 400 | q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE; |
401 | d->bufpool = mp; | 401 | d->bufpool = mp; |
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 1bd5f523f8fd..3bd7ca9853a8 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c | |||
@@ -3775,9 +3775,10 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx, | |||
3775 | return false; | 3775 | return false; |
3776 | } | 3776 | } |
3777 | 3777 | ||
3778 | static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq, | 3778 | static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, |
3779 | bool last) | 3779 | const struct blk_mq_queue_data *bd) |
3780 | { | 3780 | { |
3781 | struct request *rq = bd->rq; | ||
3781 | int ret; | 3782 | int ret; |
3782 | 3783 | ||
3783 | if (unlikely(mtip_check_unal_depth(hctx, rq))) | 3784 | if (unlikely(mtip_check_unal_depth(hctx, rq))) |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 8001e812018b..caa61212fdb5 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
@@ -313,15 +313,15 @@ static void null_request_fn(struct request_queue *q) | |||
313 | } | 313 | } |
314 | } | 314 | } |
315 | 315 | ||
316 | static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq, | 316 | static int null_queue_rq(struct blk_mq_hw_ctx *hctx, |
317 | bool last) | 317 | const struct blk_mq_queue_data *bd) |
318 | { | 318 | { |
319 | struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq); | 319 | struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq); |
320 | 320 | ||
321 | cmd->rq = rq; | 321 | cmd->rq = bd->rq; |
322 | cmd->nq = hctx->driver_data; | 322 | cmd->nq = hctx->driver_data; |
323 | 323 | ||
324 | blk_mq_start_request(rq); | 324 | blk_mq_start_request(bd->rq); |
325 | 325 | ||
326 | null_handle_cmd(cmd); | 326 | null_handle_cmd(cmd); |
327 | return BLK_MQ_RQ_QUEUE_OK; | 327 | return BLK_MQ_RQ_QUEUE_OK; |
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 1fb9e09fbbc5..7ef7c098708f 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c | |||
@@ -159,10 +159,11 @@ static void virtblk_done(struct virtqueue *vq) | |||
159 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); | 159 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); |
160 | } | 160 | } |
161 | 161 | ||
162 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, | 162 | static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, |
163 | bool last) | 163 | const struct blk_mq_queue_data *bd) |
164 | { | 164 | { |
165 | struct virtio_blk *vblk = hctx->queue->queuedata; | 165 | struct virtio_blk *vblk = hctx->queue->queuedata; |
166 | struct request *req = bd->rq; | ||
166 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); | 167 | struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); |
167 | unsigned long flags; | 168 | unsigned long flags; |
168 | unsigned int num; | 169 | unsigned int num; |
@@ -223,7 +224,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, | |||
223 | return BLK_MQ_RQ_QUEUE_ERROR; | 224 | return BLK_MQ_RQ_QUEUE_ERROR; |
224 | } | 225 | } |
225 | 226 | ||
226 | if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) | 227 | if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq)) |
227 | notify = true; | 228 | notify = true; |
228 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); | 229 | spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); |
229 | 230 | ||
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7e3d954c9cac..43318d556cbc 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c | |||
@@ -1947,9 +1947,10 @@ static void scsi_mq_done(struct scsi_cmnd *cmd) | |||
1947 | blk_mq_complete_request(cmd->request); | 1947 | blk_mq_complete_request(cmd->request); |
1948 | } | 1948 | } |
1949 | 1949 | ||
1950 | static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req, | 1950 | static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, |
1951 | bool last) | 1951 | const struct blk_mq_queue_data *bd) |
1952 | { | 1952 | { |
1953 | struct request *req = bd->rq; | ||
1953 | struct request_queue *q = req->q; | 1954 | struct request_queue *q = req->q; |
1954 | struct scsi_device *sdev = q->queuedata; | 1955 | struct scsi_device *sdev = q->queuedata; |
1955 | struct Scsi_Host *shost = sdev->host; | 1956 | struct Scsi_Host *shost = sdev->host; |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index ef9bef118342..2d609a5fbfea 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -479,12 +479,28 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
479 | * write_inode() | 479 | * write_inode() |
480 | */ | 480 | */ |
481 | spin_lock(&inode->i_lock); | 481 | spin_lock(&inode->i_lock); |
482 | /* Clear I_DIRTY_PAGES if we've written out all dirty pages */ | 482 | |
483 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | ||
484 | inode->i_state &= ~I_DIRTY_PAGES; | ||
485 | dirty = inode->i_state & I_DIRTY; | 483 | dirty = inode->i_state & I_DIRTY; |
486 | inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); | 484 | inode->i_state &= ~I_DIRTY; |
485 | |||
486 | /* | ||
487 | * Paired with smp_mb() in __mark_inode_dirty(). This allows | ||
488 | * __mark_inode_dirty() to test i_state without grabbing i_lock - | ||
489 | * either they see the I_DIRTY bits cleared or we see the dirtied | ||
490 | * inode. | ||
491 | * | ||
492 | * I_DIRTY_PAGES is always cleared together above even if @mapping | ||
493 | * still has dirty pages. The flag is reinstated after smp_mb() if | ||
494 | * necessary. This guarantees that either __mark_inode_dirty() | ||
495 | * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY. | ||
496 | */ | ||
497 | smp_mb(); | ||
498 | |||
499 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | ||
500 | inode->i_state |= I_DIRTY_PAGES; | ||
501 | |||
487 | spin_unlock(&inode->i_lock); | 502 | spin_unlock(&inode->i_lock); |
503 | |||
488 | /* Don't write the inode if only I_DIRTY_PAGES was set */ | 504 | /* Don't write the inode if only I_DIRTY_PAGES was set */ |
489 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { | 505 | if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { |
490 | int err = write_inode(inode, wbc); | 506 | int err = write_inode(inode, wbc); |
@@ -1148,12 +1164,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
1148 | } | 1164 | } |
1149 | 1165 | ||
1150 | /* | 1166 | /* |
1151 | * make sure that changes are seen by all cpus before we test i_state | 1167 | * Paired with smp_mb() in __writeback_single_inode() for the |
1152 | * -- mikulas | 1168 | * following lockless i_state test. See there for details. |
1153 | */ | 1169 | */ |
1154 | smp_mb(); | 1170 | smp_mb(); |
1155 | 1171 | ||
1156 | /* avoid the locking if we can */ | ||
1157 | if ((inode->i_state & flags) == flags) | 1172 | if ((inode->i_state & flags) == flags) |
1158 | return; | 1173 | return; |
1159 | 1174 | ||
diff --git a/include/linux/bio.h b/include/linux/bio.h index 7347f486ceca..efead0b532c4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -443,6 +443,11 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int, | |||
443 | extern void bio_set_pages_dirty(struct bio *bio); | 443 | extern void bio_set_pages_dirty(struct bio *bio); |
444 | extern void bio_check_pages_dirty(struct bio *bio); | 444 | extern void bio_check_pages_dirty(struct bio *bio); |
445 | 445 | ||
446 | void generic_start_io_acct(int rw, unsigned long sectors, | ||
447 | struct hd_struct *part); | ||
448 | void generic_end_io_acct(int rw, struct hd_struct *part, | ||
449 | unsigned long start_time); | ||
450 | |||
446 | #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE | 451 | #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE |
447 | # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" | 452 | # error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" |
448 | #endif | 453 | #endif |
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 15f7034aa377..8aded9ab2e4e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -79,7 +79,13 @@ struct blk_mq_tag_set { | |||
79 | struct list_head tag_list; | 79 | struct list_head tag_list; |
80 | }; | 80 | }; |
81 | 81 | ||
82 | typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool); | 82 | struct blk_mq_queue_data { |
83 | struct request *rq; | ||
84 | struct list_head *list; | ||
85 | bool last; | ||
86 | }; | ||
87 | |||
88 | typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); | ||
83 | typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); | 89 | typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); |
84 | typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); | 90 | typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); |
85 | typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); | 91 | typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); |
@@ -140,6 +146,7 @@ enum { | |||
140 | BLK_MQ_F_TAG_SHARED = 1 << 1, | 146 | BLK_MQ_F_TAG_SHARED = 1 << 1, |
141 | BLK_MQ_F_SG_MERGE = 1 << 2, | 147 | BLK_MQ_F_SG_MERGE = 1 << 2, |
142 | BLK_MQ_F_SYSFS_UP = 1 << 3, | 148 | BLK_MQ_F_SYSFS_UP = 1 << 3, |
149 | BLK_MQ_F_DEFER_ISSUE = 1 << 4, | ||
143 | 150 | ||
144 | BLK_MQ_S_STOPPED = 0, | 151 | BLK_MQ_S_STOPPED = 0, |
145 | BLK_MQ_S_TAG_ACTIVE = 1, | 152 | BLK_MQ_S_TAG_ACTIVE = 1, |
@@ -162,6 +169,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); | |||
162 | void blk_mq_insert_request(struct request *, bool, bool, bool); | 169 | void blk_mq_insert_request(struct request *, bool, bool, bool); |
163 | void blk_mq_run_queues(struct request_queue *q, bool async); | 170 | void blk_mq_run_queues(struct request_queue *q, bool async); |
164 | void blk_mq_free_request(struct request *rq); | 171 | void blk_mq_free_request(struct request *rq); |
172 | void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq); | ||
165 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); | 173 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *); |
166 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, | 174 | struct request *blk_mq_alloc_request(struct request_queue *q, int rw, |
167 | gfp_t gfp, bool reserved); | 175 | gfp_t gfp, bool reserved); |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0495e3854247..92f4b4b288dd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -1184,7 +1184,6 @@ extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); | |||
1184 | enum blk_default_limits { | 1184 | enum blk_default_limits { |
1185 | BLK_MAX_SEGMENTS = 128, | 1185 | BLK_MAX_SEGMENTS = 128, |
1186 | BLK_SAFE_MAX_SECTORS = 255, | 1186 | BLK_SAFE_MAX_SECTORS = 255, |
1187 | BLK_DEF_MAX_SECTORS = 1024, | ||
1188 | BLK_MAX_SEGMENT_SIZE = 65536, | 1187 | BLK_MAX_SEGMENT_SIZE = 65536, |
1189 | BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, | 1188 | BLK_SEG_BOUNDARY_MASK = 0xFFFFFFFFUL, |
1190 | }; | 1189 | }; |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 11b9cb36092b..483cecfa5c17 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -1477,9 +1477,6 @@ static int blk_trace_remove_queue(struct request_queue *q) | |||
1477 | if (atomic_dec_and_test(&blk_probes_ref)) | 1477 | if (atomic_dec_and_test(&blk_probes_ref)) |
1478 | blk_unregister_tracepoints(); | 1478 | blk_unregister_tracepoints(); |
1479 | 1479 | ||
1480 | spin_lock_irq(&running_trace_lock); | ||
1481 | list_del(&bt->running_list); | ||
1482 | spin_unlock_irq(&running_trace_lock); | ||
1483 | blk_trace_free(bt); | 1480 | blk_trace_free(bt); |
1484 | return 0; | 1481 | return 0; |
1485 | } | 1482 | } |