aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 23:28:10 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-11-04 23:28:10 -0500
commitd9734e0d1ccf87e828ad172c58a96dff97cfc0ba (patch)
treefcce824c45f17dbcc954bc55b8b642a967b962e0
parent0d51ce9ca1116e8f4dc87cb51db8dd250327e9bb (diff)
parent2404e607a9ee36db361bebe32787dafa1f7d6c00 (diff)
Merge branch 'for-4.4/core' of git://git.kernel.dk/linux-block
Pull core block updates from Jens Axboe: "This is the core block pull request for 4.4. I've got a few more topic branches this time around, some of them will layer on top of the core+drivers changes and will come in a separate round. So not a huge chunk of changes in this round. This pull request contains: - Enable blk-mq page allocation tracking with kmemleak, from Catalin. - Unused prototype removal in blk-mq from Christoph. - Cleanup of the q->blk_trace exchange, using cmpxchg instead of two xchg()'s, from Davidlohr. - A plug flush fix from Jeff. - Also from Jeff, a fix that means we don't have to update shared tag sets at init time unless we do a state change. This cuts down boot times on thousands of devices a lot with scsi/blk-mq. - blk-mq waitqueue barrier fix from Kosuke. - Various fixes from Ming: - Fixes for segment merging and splitting, and checks, for the old core and blk-mq. - Potential blk-mq speedup by marking ctx pending at the end of a plug insertion batch in blk-mq. - direct-io no page dirty on kernel direct reads. - A WRITE_SYNC fix for mpage from Roman" * 'for-4.4/core' of git://git.kernel.dk/linux-block: blk-mq: avoid excessive boot delays with large lun counts blktrace: re-write setting q->blk_trace blk-mq: mark ctx as pending at batch in flush plug path blk-mq: fix for trace_block_plug() block: check bio_mergeable() early before merging blk-mq: check bio_mergeable() early before merging block: avoid to merge splitted bio block: setup bi_phys_segments after splitting block: fix plug list flushing for nomerge queues blk-mq: remove unused blk_mq_clone_flush_request prototype blk-mq: fix waitqueue_active without memory barrier in block/blk-mq-tag.c fs: direct-io: don't dirtying pages for ITER_BVEC/ITER_KVEC direct read fs/mpage.c: forgotten WRITE_SYNC in case of data integrity write block: kmemleak: Track the page allocations for struct request
-rw-r--r--block/blk-core.c32
-rw-r--r--block/blk-merge.c32
-rw-r--r--block/blk-mq-tag.c4
-rw-r--r--block/blk-mq.c89
-rw-r--r--block/blk-mq.h2
-rw-r--r--block/blk.h1
-rw-r--r--block/elevator.c2
-rw-r--r--fs/direct-io.c9
-rw-r--r--fs/mpage.c23
-rw-r--r--kernel/trace/blktrace.c16
10 files changed, 148 insertions, 62 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 18e92a6645e2..f888f23d796f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1594,6 +1594,30 @@ out:
1594 return ret; 1594 return ret;
1595} 1595}
1596 1596
1597unsigned int blk_plug_queued_count(struct request_queue *q)
1598{
1599 struct blk_plug *plug;
1600 struct request *rq;
1601 struct list_head *plug_list;
1602 unsigned int ret = 0;
1603
1604 plug = current->plug;
1605 if (!plug)
1606 goto out;
1607
1608 if (q->mq_ops)
1609 plug_list = &plug->mq_list;
1610 else
1611 plug_list = &plug->list;
1612
1613 list_for_each_entry(rq, plug_list, queuelist) {
1614 if (rq->q == q)
1615 ret++;
1616 }
1617out:
1618 return ret;
1619}
1620
1597void init_request_from_bio(struct request *req, struct bio *bio) 1621void init_request_from_bio(struct request *req, struct bio *bio)
1598{ 1622{
1599 req->cmd_type = REQ_TYPE_FS; 1623 req->cmd_type = REQ_TYPE_FS;
@@ -1641,9 +1665,11 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
1641 * Check if we can merge with the plugged list before grabbing 1665 * Check if we can merge with the plugged list before grabbing
1642 * any locks. 1666 * any locks.
1643 */ 1667 */
1644 if (!blk_queue_nomerges(q) && 1668 if (!blk_queue_nomerges(q)) {
1645 blk_attempt_plug_merge(q, bio, &request_count, NULL)) 1669 if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
1646 return; 1670 return;
1671 } else
1672 request_count = blk_plug_queued_count(q);
1647 1673
1648 spin_lock_irq(q->queue_lock); 1674 spin_lock_irq(q->queue_lock);
1649 1675
diff --git a/block/blk-merge.c b/block/blk-merge.c
index c4e9c37f3e38..de5716d8e525 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -11,13 +11,16 @@
11 11
12static struct bio *blk_bio_discard_split(struct request_queue *q, 12static struct bio *blk_bio_discard_split(struct request_queue *q,
13 struct bio *bio, 13 struct bio *bio,
14 struct bio_set *bs) 14 struct bio_set *bs,
15 unsigned *nsegs)
15{ 16{
16 unsigned int max_discard_sectors, granularity; 17 unsigned int max_discard_sectors, granularity;
17 int alignment; 18 int alignment;
18 sector_t tmp; 19 sector_t tmp;
19 unsigned split_sectors; 20 unsigned split_sectors;
20 21
22 *nsegs = 1;
23
21 /* Zero-sector (unknown) and one-sector granularities are the same. */ 24 /* Zero-sector (unknown) and one-sector granularities are the same. */
22 granularity = max(q->limits.discard_granularity >> 9, 1U); 25 granularity = max(q->limits.discard_granularity >> 9, 1U);
23 26
@@ -51,8 +54,11 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
51 54
52static struct bio *blk_bio_write_same_split(struct request_queue *q, 55static struct bio *blk_bio_write_same_split(struct request_queue *q,
53 struct bio *bio, 56 struct bio *bio,
54 struct bio_set *bs) 57 struct bio_set *bs,
58 unsigned *nsegs)
55{ 59{
60 *nsegs = 1;
61
56 if (!q->limits.max_write_same_sectors) 62 if (!q->limits.max_write_same_sectors)
57 return NULL; 63 return NULL;
58 64
@@ -64,7 +70,8 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
64 70
65static struct bio *blk_bio_segment_split(struct request_queue *q, 71static struct bio *blk_bio_segment_split(struct request_queue *q,
66 struct bio *bio, 72 struct bio *bio,
67 struct bio_set *bs) 73 struct bio_set *bs,
74 unsigned *segs)
68{ 75{
69 struct bio_vec bv, bvprv, *bvprvp = NULL; 76 struct bio_vec bv, bvprv, *bvprvp = NULL;
70 struct bvec_iter iter; 77 struct bvec_iter iter;
@@ -106,24 +113,35 @@ new_segment:
106 sectors += bv.bv_len >> 9; 113 sectors += bv.bv_len >> 9;
107 } 114 }
108 115
116 *segs = nsegs;
109 return NULL; 117 return NULL;
110split: 118split:
119 *segs = nsegs;
111 return bio_split(bio, sectors, GFP_NOIO, bs); 120 return bio_split(bio, sectors, GFP_NOIO, bs);
112} 121}
113 122
114void blk_queue_split(struct request_queue *q, struct bio **bio, 123void blk_queue_split(struct request_queue *q, struct bio **bio,
115 struct bio_set *bs) 124 struct bio_set *bs)
116{ 125{
117 struct bio *split; 126 struct bio *split, *res;
127 unsigned nsegs;
118 128
119 if ((*bio)->bi_rw & REQ_DISCARD) 129 if ((*bio)->bi_rw & REQ_DISCARD)
120 split = blk_bio_discard_split(q, *bio, bs); 130 split = blk_bio_discard_split(q, *bio, bs, &nsegs);
121 else if ((*bio)->bi_rw & REQ_WRITE_SAME) 131 else if ((*bio)->bi_rw & REQ_WRITE_SAME)
122 split = blk_bio_write_same_split(q, *bio, bs); 132 split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
123 else 133 else
124 split = blk_bio_segment_split(q, *bio, q->bio_split); 134 split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
135
136 /* physical segments can be figured out during splitting */
137 res = split ? split : *bio;
138 res->bi_phys_segments = nsegs;
139 bio_set_flag(res, BIO_SEG_VALID);
125 140
126 if (split) { 141 if (split) {
142 /* there isn't chance to merge the splitted bio */
143 split->bi_rw |= REQ_NOMERGE;
144
127 bio_chain(split, *bio); 145 bio_chain(split, *bio);
128 generic_make_request(*bio); 146 generic_make_request(*bio);
129 *bio = split; 147 *bio = split;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index ec2d11915142..60ac684c8b8c 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -75,6 +75,10 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
75 struct blk_mq_bitmap_tags *bt; 75 struct blk_mq_bitmap_tags *bt;
76 int i, wake_index; 76 int i, wake_index;
77 77
78 /*
79 * Make sure all changes prior to this are visible from other CPUs.
80 */
81 smp_mb();
78 bt = &tags->bitmap_tags; 82 bt = &tags->bitmap_tags;
79 wake_index = atomic_read(&bt->wake_index); 83 wake_index = atomic_read(&bt->wake_index);
80 for (i = 0; i < BT_WAIT_QUEUES; i++) { 84 for (i = 0; i < BT_WAIT_QUEUES; i++) {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 85f014327342..309e41087e6b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -9,6 +9,7 @@
9#include <linux/backing-dev.h> 9#include <linux/backing-dev.h>
10#include <linux/bio.h> 10#include <linux/bio.h>
11#include <linux/blkdev.h> 11#include <linux/blkdev.h>
12#include <linux/kmemleak.h>
12#include <linux/mm.h> 13#include <linux/mm.h>
13#include <linux/init.h> 14#include <linux/init.h>
14#include <linux/slab.h> 15#include <linux/slab.h>
@@ -989,18 +990,25 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
989} 990}
990EXPORT_SYMBOL(blk_mq_delay_queue); 991EXPORT_SYMBOL(blk_mq_delay_queue);
991 992
992static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, 993static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
993 struct request *rq, bool at_head) 994 struct blk_mq_ctx *ctx,
995 struct request *rq,
996 bool at_head)
994{ 997{
995 struct blk_mq_ctx *ctx = rq->mq_ctx;
996
997 trace_block_rq_insert(hctx->queue, rq); 998 trace_block_rq_insert(hctx->queue, rq);
998 999
999 if (at_head) 1000 if (at_head)
1000 list_add(&rq->queuelist, &ctx->rq_list); 1001 list_add(&rq->queuelist, &ctx->rq_list);
1001 else 1002 else
1002 list_add_tail(&rq->queuelist, &ctx->rq_list); 1003 list_add_tail(&rq->queuelist, &ctx->rq_list);
1004}
1003 1005
1006static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
1007 struct request *rq, bool at_head)
1008{
1009 struct blk_mq_ctx *ctx = rq->mq_ctx;
1010
1011 __blk_mq_insert_req_list(hctx, ctx, rq, at_head);
1004 blk_mq_hctx_mark_pending(hctx, ctx); 1012 blk_mq_hctx_mark_pending(hctx, ctx);
1005} 1013}
1006 1014
@@ -1056,8 +1064,9 @@ static void blk_mq_insert_requests(struct request_queue *q,
1056 rq = list_first_entry(list, struct request, queuelist); 1064 rq = list_first_entry(list, struct request, queuelist);
1057 list_del_init(&rq->queuelist); 1065 list_del_init(&rq->queuelist);
1058 rq->mq_ctx = ctx; 1066 rq->mq_ctx = ctx;
1059 __blk_mq_insert_request(hctx, rq, false); 1067 __blk_mq_insert_req_list(hctx, ctx, rq, false);
1060 } 1068 }
1069 blk_mq_hctx_mark_pending(hctx, ctx);
1061 spin_unlock(&ctx->lock); 1070 spin_unlock(&ctx->lock);
1062 1071
1063 blk_mq_run_hw_queue(hctx, from_schedule); 1072 blk_mq_run_hw_queue(hctx, from_schedule);
@@ -1139,7 +1148,7 @@ static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
1139 struct blk_mq_ctx *ctx, 1148 struct blk_mq_ctx *ctx,
1140 struct request *rq, struct bio *bio) 1149 struct request *rq, struct bio *bio)
1141{ 1150{
1142 if (!hctx_allow_merges(hctx)) { 1151 if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
1143 blk_mq_bio_to_request(rq, bio); 1152 blk_mq_bio_to_request(rq, bio);
1144 spin_lock(&ctx->lock); 1153 spin_lock(&ctx->lock);
1145insert_rq: 1154insert_rq:
@@ -1267,9 +1276,12 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
1267 1276
1268 blk_queue_split(q, &bio, q->bio_split); 1277 blk_queue_split(q, &bio, q->bio_split);
1269 1278
1270 if (!is_flush_fua && !blk_queue_nomerges(q) && 1279 if (!is_flush_fua && !blk_queue_nomerges(q)) {
1271 blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq)) 1280 if (blk_attempt_plug_merge(q, bio, &request_count,
1272 return; 1281 &same_queue_rq))
1282 return;
1283 } else
1284 request_count = blk_plug_queued_count(q);
1273 1285
1274 rq = blk_mq_map_request(q, bio, &data); 1286 rq = blk_mq_map_request(q, bio, &data);
1275 if (unlikely(!rq)) 1287 if (unlikely(!rq))
@@ -1376,7 +1388,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
1376 plug = current->plug; 1388 plug = current->plug;
1377 if (plug) { 1389 if (plug) {
1378 blk_mq_bio_to_request(rq, bio); 1390 blk_mq_bio_to_request(rq, bio);
1379 if (list_empty(&plug->mq_list)) 1391 if (!request_count)
1380 trace_block_plug(q); 1392 trace_block_plug(q);
1381 else if (request_count >= BLK_MAX_REQUEST_COUNT) { 1393 else if (request_count >= BLK_MAX_REQUEST_COUNT) {
1382 blk_flush_plug_list(plug, false); 1394 blk_flush_plug_list(plug, false);
@@ -1430,6 +1442,11 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
1430 while (!list_empty(&tags->page_list)) { 1442 while (!list_empty(&tags->page_list)) {
1431 page = list_first_entry(&tags->page_list, struct page, lru); 1443 page = list_first_entry(&tags->page_list, struct page, lru);
1432 list_del_init(&page->lru); 1444 list_del_init(&page->lru);
1445 /*
1446 * Remove kmemleak object previously allocated in
1447 * blk_mq_init_rq_map().
1448 */
1449 kmemleak_free(page_address(page));
1433 __free_pages(page, page->private); 1450 __free_pages(page, page->private);
1434 } 1451 }
1435 1452
@@ -1502,6 +1519,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
1502 list_add_tail(&page->lru, &tags->page_list); 1519 list_add_tail(&page->lru, &tags->page_list);
1503 1520
1504 p = page_address(page); 1521 p = page_address(page);
1522 /*
1523 * Allow kmemleak to scan these pages as they contain pointers
1524 * to additional allocations like via ops->init_request().
1525 */
1526 kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL);
1505 entries_per_page = order_to_size(this_order) / rq_size; 1527 entries_per_page = order_to_size(this_order) / rq_size;
1506 to_do = min(entries_per_page, set->queue_depth - i); 1528 to_do = min(entries_per_page, set->queue_depth - i);
1507 left -= to_do * rq_size; 1529 left -= to_do * rq_size;
@@ -1673,7 +1695,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
1673 INIT_LIST_HEAD(&hctx->dispatch); 1695 INIT_LIST_HEAD(&hctx->dispatch);
1674 hctx->queue = q; 1696 hctx->queue = q;
1675 hctx->queue_num = hctx_idx; 1697 hctx->queue_num = hctx_idx;
1676 hctx->flags = set->flags; 1698 hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
1677 1699
1678 blk_mq_init_cpu_notifier(&hctx->cpu_notifier, 1700 blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
1679 blk_mq_hctx_notify, hctx); 1701 blk_mq_hctx_notify, hctx);
@@ -1860,27 +1882,26 @@ static void blk_mq_map_swqueue(struct request_queue *q,
1860 } 1882 }
1861} 1883}
1862 1884
1863static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set) 1885static void queue_set_hctx_shared(struct request_queue *q, bool shared)
1864{ 1886{
1865 struct blk_mq_hw_ctx *hctx; 1887 struct blk_mq_hw_ctx *hctx;
1866 struct request_queue *q;
1867 bool shared;
1868 int i; 1888 int i;
1869 1889
1870 if (set->tag_list.next == set->tag_list.prev) 1890 queue_for_each_hw_ctx(q, hctx, i) {
1871 shared = false; 1891 if (shared)
1872 else 1892 hctx->flags |= BLK_MQ_F_TAG_SHARED;
1873 shared = true; 1893 else
1894 hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
1895 }
1896}
1897
1898static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
1899{
1900 struct request_queue *q;
1874 1901
1875 list_for_each_entry(q, &set->tag_list, tag_set_list) { 1902 list_for_each_entry(q, &set->tag_list, tag_set_list) {
1876 blk_mq_freeze_queue(q); 1903 blk_mq_freeze_queue(q);
1877 1904 queue_set_hctx_shared(q, shared);
1878 queue_for_each_hw_ctx(q, hctx, i) {
1879 if (shared)
1880 hctx->flags |= BLK_MQ_F_TAG_SHARED;
1881 else
1882 hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
1883 }
1884 blk_mq_unfreeze_queue(q); 1905 blk_mq_unfreeze_queue(q);
1885 } 1906 }
1886} 1907}
@@ -1891,7 +1912,12 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
1891 1912
1892 mutex_lock(&set->tag_list_lock); 1913 mutex_lock(&set->tag_list_lock);
1893 list_del_init(&q->tag_set_list); 1914 list_del_init(&q->tag_set_list);
1894 blk_mq_update_tag_set_depth(set); 1915 if (list_is_singular(&set->tag_list)) {
1916 /* just transitioned to unshared */
1917 set->flags &= ~BLK_MQ_F_TAG_SHARED;
1918 /* update existing queue */
1919 blk_mq_update_tag_set_depth(set, false);
1920 }
1895 mutex_unlock(&set->tag_list_lock); 1921 mutex_unlock(&set->tag_list_lock);
1896} 1922}
1897 1923
@@ -1901,8 +1927,17 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
1901 q->tag_set = set; 1927 q->tag_set = set;
1902 1928
1903 mutex_lock(&set->tag_list_lock); 1929 mutex_lock(&set->tag_list_lock);
1930
1931 /* Check to see if we're transitioning to shared (from 1 to 2 queues). */
1932 if (!list_empty(&set->tag_list) && !(set->flags & BLK_MQ_F_TAG_SHARED)) {
1933 set->flags |= BLK_MQ_F_TAG_SHARED;
1934 /* update existing queue */
1935 blk_mq_update_tag_set_depth(set, true);
1936 }
1937 if (set->flags & BLK_MQ_F_TAG_SHARED)
1938 queue_set_hctx_shared(q, true);
1904 list_add_tail(&q->tag_set_list, &set->tag_list); 1939 list_add_tail(&q->tag_set_list, &set->tag_list);
1905 blk_mq_update_tag_set_depth(set); 1940
1906 mutex_unlock(&set->tag_list_lock); 1941 mutex_unlock(&set->tag_list_lock);
1907} 1942}
1908 1943
diff --git a/block/blk-mq.h b/block/blk-mq.h
index f4fea7964910..b44dce165761 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -29,8 +29,6 @@ void __blk_mq_complete_request(struct request *rq);
29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
30void blk_mq_freeze_queue(struct request_queue *q); 30void blk_mq_freeze_queue(struct request_queue *q);
31void blk_mq_free_queue(struct request_queue *q); 31void blk_mq_free_queue(struct request_queue *q);
32void blk_mq_clone_flush_request(struct request *flush_rq,
33 struct request *orig_rq);
34int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); 32int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
35void blk_mq_wake_waiters(struct request_queue *q); 33void blk_mq_wake_waiters(struct request_queue *q);
36 34
diff --git a/block/blk.h b/block/blk.h
index 98614ad37c81..aa27d0292af1 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -86,6 +86,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
86bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, 86bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
87 unsigned int *request_count, 87 unsigned int *request_count,
88 struct request **same_queue_rq); 88 struct request **same_queue_rq);
89unsigned int blk_plug_queued_count(struct request_queue *q);
89 90
90void blk_account_io_start(struct request *req, bool new_io); 91void blk_account_io_start(struct request *req, bool new_io);
91void blk_account_io_completion(struct request *req, unsigned int bytes); 92void blk_account_io_completion(struct request *req, unsigned int bytes);
diff --git a/block/elevator.c b/block/elevator.c
index 84d63943f2de..c3555c9c672f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -420,7 +420,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
420 * noxmerges: Only simple one-hit cache try 420 * noxmerges: Only simple one-hit cache try
421 * merges: All merge tries attempted 421 * merges: All merge tries attempted
422 */ 422 */
423 if (blk_queue_nomerges(q)) 423 if (blk_queue_nomerges(q) || !bio_mergeable(bio))
424 return ELEVATOR_NO_MERGE; 424 return ELEVATOR_NO_MERGE;
425 425
426 /* 426 /*
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 11256291642e..3ae0e0427191 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -120,6 +120,7 @@ struct dio {
120 int page_errors; /* errno from get_user_pages() */ 120 int page_errors; /* errno from get_user_pages() */
121 int is_async; /* is IO async ? */ 121 int is_async; /* is IO async ? */
122 bool defer_completion; /* defer AIO completion to workqueue? */ 122 bool defer_completion; /* defer AIO completion to workqueue? */
123 bool should_dirty; /* if pages should be dirtied */
123 int io_error; /* IO error in completion path */ 124 int io_error; /* IO error in completion path */
124 unsigned long refcount; /* direct_io_worker() and bios */ 125 unsigned long refcount; /* direct_io_worker() and bios */
125 struct bio *bio_list; /* singly linked via bi_private */ 126 struct bio *bio_list; /* singly linked via bi_private */
@@ -393,7 +394,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
393 dio->refcount++; 394 dio->refcount++;
394 spin_unlock_irqrestore(&dio->bio_lock, flags); 395 spin_unlock_irqrestore(&dio->bio_lock, flags);
395 396
396 if (dio->is_async && dio->rw == READ) 397 if (dio->is_async && dio->rw == READ && dio->should_dirty)
397 bio_set_pages_dirty(bio); 398 bio_set_pages_dirty(bio);
398 399
399 if (sdio->submit_io) 400 if (sdio->submit_io)
@@ -464,14 +465,15 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
464 if (bio->bi_error) 465 if (bio->bi_error)
465 dio->io_error = -EIO; 466 dio->io_error = -EIO;
466 467
467 if (dio->is_async && dio->rw == READ) { 468 if (dio->is_async && dio->rw == READ && dio->should_dirty) {
468 bio_check_pages_dirty(bio); /* transfers ownership */ 469 bio_check_pages_dirty(bio); /* transfers ownership */
469 err = bio->bi_error; 470 err = bio->bi_error;
470 } else { 471 } else {
471 bio_for_each_segment_all(bvec, bio, i) { 472 bio_for_each_segment_all(bvec, bio, i) {
472 struct page *page = bvec->bv_page; 473 struct page *page = bvec->bv_page;
473 474
474 if (dio->rw == READ && !PageCompound(page)) 475 if (dio->rw == READ && !PageCompound(page) &&
476 dio->should_dirty)
475 set_page_dirty_lock(page); 477 set_page_dirty_lock(page);
476 page_cache_release(page); 478 page_cache_release(page);
477 } 479 }
@@ -1219,6 +1221,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
1219 spin_lock_init(&dio->bio_lock); 1221 spin_lock_init(&dio->bio_lock);
1220 dio->refcount = 1; 1222 dio->refcount = 1;
1221 1223
1224 dio->should_dirty = (iter->type == ITER_IOVEC);
1222 sdio.iter = iter; 1225 sdio.iter = iter;
1223 sdio.final_block_in_request = 1226 sdio.final_block_in_request =
1224 (offset + iov_iter_count(iter)) >> blkbits; 1227 (offset + iov_iter_count(iter)) >> blkbits;
diff --git a/fs/mpage.c b/fs/mpage.c
index a7c34274f207..09abba7653aa 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -485,6 +485,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
485 struct buffer_head map_bh; 485 struct buffer_head map_bh;
486 loff_t i_size = i_size_read(inode); 486 loff_t i_size = i_size_read(inode);
487 int ret = 0; 487 int ret = 0;
488 int wr = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
488 489
489 if (page_has_buffers(page)) { 490 if (page_has_buffers(page)) {
490 struct buffer_head *head = page_buffers(page); 491 struct buffer_head *head = page_buffers(page);
@@ -593,7 +594,7 @@ page_is_mapped:
593 * This page will go to BIO. Do we need to send this BIO off first? 594 * This page will go to BIO. Do we need to send this BIO off first?
594 */ 595 */
595 if (bio && mpd->last_block_in_bio != blocks[0] - 1) 596 if (bio && mpd->last_block_in_bio != blocks[0] - 1)
596 bio = mpage_bio_submit(WRITE, bio); 597 bio = mpage_bio_submit(wr, bio);
597 598
598alloc_new: 599alloc_new:
599 if (bio == NULL) { 600 if (bio == NULL) {
@@ -620,7 +621,7 @@ alloc_new:
620 wbc_account_io(wbc, page, PAGE_SIZE); 621 wbc_account_io(wbc, page, PAGE_SIZE);
621 length = first_unmapped << blkbits; 622 length = first_unmapped << blkbits;
622 if (bio_add_page(bio, page, length, 0) < length) { 623 if (bio_add_page(bio, page, length, 0) < length) {
623 bio = mpage_bio_submit(WRITE, bio); 624 bio = mpage_bio_submit(wr, bio);
624 goto alloc_new; 625 goto alloc_new;
625 } 626 }
626 627
@@ -630,7 +631,7 @@ alloc_new:
630 set_page_writeback(page); 631 set_page_writeback(page);
631 unlock_page(page); 632 unlock_page(page);
632 if (boundary || (first_unmapped != blocks_per_page)) { 633 if (boundary || (first_unmapped != blocks_per_page)) {
633 bio = mpage_bio_submit(WRITE, bio); 634 bio = mpage_bio_submit(wr, bio);
634 if (boundary_block) { 635 if (boundary_block) {
635 write_boundary_block(boundary_bdev, 636 write_boundary_block(boundary_bdev,
636 boundary_block, 1 << blkbits); 637 boundary_block, 1 << blkbits);
@@ -642,7 +643,7 @@ alloc_new:
642 643
643confused: 644confused:
644 if (bio) 645 if (bio)
645 bio = mpage_bio_submit(WRITE, bio); 646 bio = mpage_bio_submit(wr, bio);
646 647
647 if (mpd->use_writepage) { 648 if (mpd->use_writepage) {
648 ret = mapping->a_ops->writepage(page, wbc); 649 ret = mapping->a_ops->writepage(page, wbc);
@@ -698,8 +699,11 @@ mpage_writepages(struct address_space *mapping,
698 }; 699 };
699 700
700 ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); 701 ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
701 if (mpd.bio) 702 if (mpd.bio) {
702 mpage_bio_submit(WRITE, mpd.bio); 703 int wr = (wbc->sync_mode == WB_SYNC_ALL ?
704 WRITE_SYNC : WRITE);
705 mpage_bio_submit(wr, mpd.bio);
706 }
703 } 707 }
704 blk_finish_plug(&plug); 708 blk_finish_plug(&plug);
705 return ret; 709 return ret;
@@ -716,8 +720,11 @@ int mpage_writepage(struct page *page, get_block_t get_block,
716 .use_writepage = 0, 720 .use_writepage = 0,
717 }; 721 };
718 int ret = __mpage_writepage(page, wbc, &mpd); 722 int ret = __mpage_writepage(page, wbc, &mpd);
719 if (mpd.bio) 723 if (mpd.bio) {
720 mpage_bio_submit(WRITE, mpd.bio); 724 int wr = (wbc->sync_mode == WB_SYNC_ALL ?
725 WRITE_SYNC : WRITE);
726 mpage_bio_submit(wr, mpd.bio);
727 }
721 return ret; 728 return ret;
722} 729}
723EXPORT_SYMBOL(mpage_writepage); 730EXPORT_SYMBOL(mpage_writepage);
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 90e72a0c3047..e3a26188b95e 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -437,7 +437,7 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
437 struct block_device *bdev, 437 struct block_device *bdev,
438 struct blk_user_trace_setup *buts) 438 struct blk_user_trace_setup *buts)
439{ 439{
440 struct blk_trace *old_bt, *bt = NULL; 440 struct blk_trace *bt = NULL;
441 struct dentry *dir = NULL; 441 struct dentry *dir = NULL;
442 int ret; 442 int ret;
443 443
@@ -519,11 +519,8 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
519 bt->trace_state = Blktrace_setup; 519 bt->trace_state = Blktrace_setup;
520 520
521 ret = -EBUSY; 521 ret = -EBUSY;
522 old_bt = xchg(&q->blk_trace, bt); 522 if (cmpxchg(&q->blk_trace, NULL, bt))
523 if (old_bt) {
524 (void) xchg(&q->blk_trace, old_bt);
525 goto err; 523 goto err;
526 }
527 524
528 if (atomic_inc_return(&blk_probes_ref) == 1) 525 if (atomic_inc_return(&blk_probes_ref) == 1)
529 blk_register_tracepoints(); 526 blk_register_tracepoints();
@@ -1481,7 +1478,7 @@ static int blk_trace_remove_queue(struct request_queue *q)
1481static int blk_trace_setup_queue(struct request_queue *q, 1478static int blk_trace_setup_queue(struct request_queue *q,
1482 struct block_device *bdev) 1479 struct block_device *bdev)
1483{ 1480{
1484 struct blk_trace *old_bt, *bt = NULL; 1481 struct blk_trace *bt = NULL;
1485 int ret = -ENOMEM; 1482 int ret = -ENOMEM;
1486 1483
1487 bt = kzalloc(sizeof(*bt), GFP_KERNEL); 1484 bt = kzalloc(sizeof(*bt), GFP_KERNEL);
@@ -1497,12 +1494,9 @@ static int blk_trace_setup_queue(struct request_queue *q,
1497 1494
1498 blk_trace_setup_lba(bt, bdev); 1495 blk_trace_setup_lba(bt, bdev);
1499 1496
1500 old_bt = xchg(&q->blk_trace, bt); 1497 ret = -EBUSY;
1501 if (old_bt != NULL) { 1498 if (cmpxchg(&q->blk_trace, NULL, bt))
1502 (void)xchg(&q->blk_trace, old_bt);
1503 ret = -EBUSY;
1504 goto free_bt; 1499 goto free_bt;
1505 }
1506 1500
1507 if (atomic_inc_return(&blk_probes_ref) == 1) 1501 if (atomic_inc_return(&blk_probes_ref) == 1)
1508 blk_register_tracepoints(); 1502 blk_register_tracepoints();