aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-09-03 05:56:17 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-09-10 06:35:37 -0400
commit4fed947cb311e5aa51781d316cefca836352f6ce (patch)
treeeada83d5bf503244628e3c190e97e8c7af847e35
parentdd4c133f387c48f526022860ad70354637a80f4c (diff)
block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests
Now that the backend conversion is complete, export sequenced FLUSH/FUA capability through REQ_FLUSH/FUA flags. REQ_FLUSH means the device cache should be flushed before executing the request. REQ_FUA means that the data in the request should be on non-volatile media on completion. Block layer will choose the correct way of implementing the semantics and execute it. The request may be passed to the device directly if the device can handle it; otherwise, it will be sequenced using one or more proxy requests. Devices will never see REQ_FLUSH and/or FUA which it doesn't support. Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are never failed with -EOPNOTSUPP. If the underlying device doesn't support FLUSH/FUA, the block layer simply make those noop. IOW, it no longer distinguishes between writeback cache which doesn't support cache flush and writethrough/no cache. Devices which have WB cache w/o flush are very difficult to come by these days and there's nothing much we can do anyway, so it doesn't make sense to require everyone to implement -EOPNOTSUPP handling. This will simplify filesystems and block drivers as they can drop -EOPNOTSUPP retry logic for barriers. * QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into blk-flush.c. * REQ_FLUSH w/o data can also be directly passed to drivers without sequencing but some drivers assume that zero length requests don't have rq->bio which isn't true for these requests requiring the use of proxy requests. * REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are copied from bio to request. * WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and WRITE_FLUSH_FUA are added. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Christoph Hellwig <hch@infradead.org> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r--block/blk-core.c2
-rw-r--r--block/blk-flush.c85
-rw-r--r--block/blk.h3
-rw-r--r--include/linux/blk_types.h2
-rw-r--r--include/linux/blkdev.h38
-rw-r--r--include/linux/buffer_head.h2
-rw-r--r--include/linux/fs.h19
7 files changed, 67 insertions, 84 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 8870ae40179d..18455c4f618a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1204,7 +1204,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1204 1204
1205 spin_lock_irq(q->queue_lock); 1205 spin_lock_irq(q->queue_lock);
1206 1206
1207 if (bio->bi_rw & REQ_HARDBARRIER) { 1207 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1208 where = ELEVATOR_INSERT_FRONT; 1208 where = ELEVATOR_INSERT_FRONT;
1209 goto get_rq; 1209 goto get_rq;
1210 } 1210 }
diff --git a/block/blk-flush.c b/block/blk-flush.c
index dd873225da97..452c552e9ead 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -1,5 +1,5 @@
1/* 1/*
2 * Functions related to barrier IO handling 2 * Functions to sequence FLUSH and FUA writes.
3 */ 3 */
4#include <linux/kernel.h> 4#include <linux/kernel.h>
5#include <linux/module.h> 5#include <linux/module.h>
@@ -9,6 +9,15 @@
9 9
10#include "blk.h" 10#include "blk.h"
11 11
12/* FLUSH/FUA sequences */
13enum {
14 QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */
15 QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */
16 QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */
17 QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */
18 QUEUE_FSEQ_DONE = (1 << 4),
19};
20
12static struct request *queue_next_fseq(struct request_queue *q); 21static struct request *queue_next_fseq(struct request_queue *q);
13 22
14unsigned blk_flush_cur_seq(struct request_queue *q) 23unsigned blk_flush_cur_seq(struct request_queue *q)
@@ -79,6 +88,7 @@ static void queue_flush(struct request_queue *q, struct request *rq,
79 88
80static struct request *queue_next_fseq(struct request_queue *q) 89static struct request *queue_next_fseq(struct request_queue *q)
81{ 90{
91 struct request *orig_rq = q->orig_flush_rq;
82 struct request *rq = &q->flush_rq; 92 struct request *rq = &q->flush_rq;
83 93
84 switch (blk_flush_cur_seq(q)) { 94 switch (blk_flush_cur_seq(q)) {
@@ -87,12 +97,11 @@ static struct request *queue_next_fseq(struct request_queue *q)
87 break; 97 break;
88 98
89 case QUEUE_FSEQ_DATA: 99 case QUEUE_FSEQ_DATA:
90 /* initialize proxy request and queue it */ 100 /* initialize proxy request, inherit FLUSH/FUA and queue it */
91 blk_rq_init(q, rq); 101 blk_rq_init(q, rq);
92 init_request_from_bio(rq, q->orig_flush_rq->bio); 102 init_request_from_bio(rq, orig_rq->bio);
93 rq->cmd_flags &= ~REQ_HARDBARRIER; 103 rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA);
94 if (q->ordered & QUEUE_ORDERED_DO_FUA) 104 rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA);
95 rq->cmd_flags |= REQ_FUA;
96 rq->end_io = flush_data_end_io; 105 rq->end_io = flush_data_end_io;
97 106
98 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 107 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
@@ -110,60 +119,58 @@ static struct request *queue_next_fseq(struct request_queue *q)
110 119
111struct request *blk_do_flush(struct request_queue *q, struct request *rq) 120struct request *blk_do_flush(struct request_queue *q, struct request *rq)
112{ 121{
122 unsigned int fflags = q->flush_flags; /* may change, cache it */
123 bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA;
124 bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
125 bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
113 unsigned skip = 0; 126 unsigned skip = 0;
114 127
115 if (!(rq->cmd_flags & REQ_HARDBARRIER)) 128 /*
129 * Special case. If there's data but flush is not necessary,
130 * the request can be issued directly.
131 *
132 * Flush w/o data should be able to be issued directly too but
133 * currently some drivers assume that rq->bio contains
134 * non-zero data if it isn't NULL and empty FLUSH requests
135 * getting here usually have bio's without data.
136 */
137 if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) {
138 rq->cmd_flags &= ~REQ_FLUSH;
139 if (!has_fua)
140 rq->cmd_flags &= ~REQ_FUA;
116 return rq; 141 return rq;
142 }
117 143
144 /*
145 * Sequenced flushes can't be processed in parallel. If
146 * another one is already in progress, queue for later
147 * processing.
148 */
118 if (q->flush_seq) { 149 if (q->flush_seq) {
119 /*
120 * Sequenced flush is already in progress and they
121 * can't be processed in parallel. Queue for later
122 * processing.
123 */
124 list_move_tail(&rq->queuelist, &q->pending_flushes); 150 list_move_tail(&rq->queuelist, &q->pending_flushes);
125 return NULL; 151 return NULL;
126 } 152 }
127 153
128 if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
129 /*
130 * Queue ordering not supported. Terminate
131 * with prejudice.
132 */
133 blk_dequeue_request(rq);
134 __blk_end_request_all(rq, -EOPNOTSUPP);
135 return NULL;
136 }
137
138 /* 154 /*
139 * Start a new flush sequence 155 * Start a new flush sequence
140 */ 156 */
141 q->flush_err = 0; 157 q->flush_err = 0;
142 q->ordered = q->next_ordered;
143 q->flush_seq |= QUEUE_FSEQ_STARTED; 158 q->flush_seq |= QUEUE_FSEQ_STARTED;
144 159
145 /* 160 /* adjust FLUSH/FUA of the original request and stash it away */
146 * For an empty barrier, there's no actual BAR request, which 161 rq->cmd_flags &= ~REQ_FLUSH;
147 * in turn makes POSTFLUSH unnecessary. Mask them off. 162 if (!has_fua)
148 */ 163 rq->cmd_flags &= ~REQ_FUA;
149 if (!blk_rq_sectors(rq))
150 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
151 QUEUE_ORDERED_DO_POSTFLUSH);
152
153 /* stash away the original request */
154 blk_dequeue_request(rq); 164 blk_dequeue_request(rq);
155 q->orig_flush_rq = rq; 165 q->orig_flush_rq = rq;
156 166
157 if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) 167 /* skip unneded sequences and return the first one */
168 if (!do_preflush)
158 skip |= QUEUE_FSEQ_PREFLUSH; 169 skip |= QUEUE_FSEQ_PREFLUSH;
159 170 if (!blk_rq_sectors(rq))
160 if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
161 skip |= QUEUE_FSEQ_DATA; 171 skip |= QUEUE_FSEQ_DATA;
162 172 if (!do_postflush)
163 if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
164 skip |= QUEUE_FSEQ_POSTFLUSH; 173 skip |= QUEUE_FSEQ_POSTFLUSH;
165
166 /* complete skipped sequences and return the first sequence */
167 return blk_flush_complete_seq(q, skip, 0); 174 return blk_flush_complete_seq(q, skip, 0);
168} 175}
169 176
diff --git a/block/blk.h b/block/blk.h
index 24b92bd78f37..a09c18b19116 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -60,6 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
60 while (1) { 60 while (1) {
61 while (!list_empty(&q->queue_head)) { 61 while (!list_empty(&q->queue_head)) {
62 rq = list_entry_rq(q->queue_head.next); 62 rq = list_entry_rq(q->queue_head.next);
63 if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) ||
64 rq == &q->flush_rq)
65 return rq;
63 rq = blk_do_flush(q, rq); 66 rq = blk_do_flush(q, rq);
64 if (rq) 67 if (rq)
65 return rq; 68 return rq;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 9192282b4259..179799479e6f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -167,7 +167,7 @@ enum rq_flag_bits {
167 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 167 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
168#define REQ_COMMON_MASK \ 168#define REQ_COMMON_MASK \
169 (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ 169 (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \
170 REQ_META| REQ_DISCARD | REQ_NOIDLE) 170 REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
171 171
172#define REQ_UNPLUG (1 << __REQ_UNPLUG) 172#define REQ_UNPLUG (1 << __REQ_UNPLUG)
173#define REQ_RAHEAD (1 << __REQ_RAHEAD) 173#define REQ_RAHEAD (1 << __REQ_RAHEAD)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1cd83ec077db..8ef705f800ab 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -357,7 +357,6 @@ struct request_queue
357 /* 357 /*
358 * for flush operations 358 * for flush operations
359 */ 359 */
360 unsigned int ordered, next_ordered;
361 unsigned int flush_flags; 360 unsigned int flush_flags;
362 unsigned int flush_seq; 361 unsigned int flush_seq;
363 int flush_err; 362 int flush_err;
@@ -465,40 +464,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
465 __clear_bit(flag, &q->queue_flags); 464 __clear_bit(flag, &q->queue_flags);
466} 465}
467 466
468enum {
469 /*
470 * Hardbarrier is supported with one of the following methods.
471 *
472 * NONE : hardbarrier unsupported
473 * DRAIN : ordering by draining is enough
474 * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
475 * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
476 */
477 QUEUE_ORDERED_DO_PREFLUSH = 0x10,
478 QUEUE_ORDERED_DO_BAR = 0x20,
479 QUEUE_ORDERED_DO_POSTFLUSH = 0x40,
480 QUEUE_ORDERED_DO_FUA = 0x80,
481
482 QUEUE_ORDERED_NONE = 0x00,
483
484 QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR,
485 QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
486 QUEUE_ORDERED_DO_PREFLUSH |
487 QUEUE_ORDERED_DO_POSTFLUSH,
488 QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
489 QUEUE_ORDERED_DO_PREFLUSH |
490 QUEUE_ORDERED_DO_FUA,
491
492 /*
493 * FLUSH/FUA sequences.
494 */
495 QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */
496 QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */
497 QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */
498 QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */
499 QUEUE_FSEQ_DONE = (1 << 4),
500};
501
502#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) 467#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
503#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 468#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
504#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 469#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
@@ -578,7 +543,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync)
578 * it already be started by driver. 543 * it already be started by driver.
579 */ 544 */
580#define RQ_NOMERGE_FLAGS \ 545#define RQ_NOMERGE_FLAGS \
581 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) 546 (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \
547 REQ_FLUSH | REQ_FUA)
582#define rq_mergeable(rq) \ 548#define rq_mergeable(rq) \
583 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ 549 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
584 (((rq)->cmd_flags & REQ_DISCARD) || \ 550 (((rq)->cmd_flags & REQ_DISCARD) || \
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index ec94c12f21da..fc999f583fda 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -32,7 +32,7 @@ enum bh_state_bits {
32 BH_Delay, /* Buffer is not yet allocated on disk */ 32 BH_Delay, /* Buffer is not yet allocated on disk */
33 BH_Boundary, /* Block is followed by a discontiguity */ 33 BH_Boundary, /* Block is followed by a discontiguity */
34 BH_Write_EIO, /* I/O error on write */ 34 BH_Write_EIO, /* I/O error on write */
35 BH_Eopnotsupp, /* operation not supported (barrier) */ 35 BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */
36 BH_Unwritten, /* Buffer is allocated on disk but not written */ 36 BH_Unwritten, /* Buffer is allocated on disk but not written */
37 BH_Quiet, /* Buffer Error Prinks to be quiet */ 37 BH_Quiet, /* Buffer Error Prinks to be quiet */
38 38
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 76041b614758..352c48627381 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -135,12 +135,13 @@ struct inodes_stat_t {
135 * immediately after submission. The write equivalent 135 * immediately after submission. The write equivalent
136 * of READ_SYNC. 136 * of READ_SYNC.
137 * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. 137 * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
138 * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all 138 * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead.
139 * previously submitted writes must be safely on storage 139 * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush.
140 * before this one is started. Also guarantees that when 140 * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on
141 * this write is complete, it itself is also safely on 141 * non-volatile media on completion.
142 * storage. Prevents reordering of writes on both sides 142 * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
143 * of this IO. 143 * by a cache flush and data is guaranteed to be on
144 * non-volatile media on completion.
144 * 145 *
145 */ 146 */
146#define RW_MASK REQ_WRITE 147#define RW_MASK REQ_WRITE
@@ -158,6 +159,12 @@ struct inodes_stat_t {
158#define WRITE_META (WRITE | REQ_META) 159#define WRITE_META (WRITE | REQ_META)
159#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ 160#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
160 REQ_HARDBARRIER) 161 REQ_HARDBARRIER)
162#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
163 REQ_FLUSH)
164#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
165 REQ_FUA)
166#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
167 REQ_FLUSH | REQ_FUA)
161 168
162/* 169/*
163 * These aren't really reads or writes, they pass down information about 170 * These aren't really reads or writes, they pass down information about