diff options
author | Tejun Heo <tj@kernel.org> | 2010-09-03 05:56:17 -0400 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-09-10 06:35:37 -0400 |
commit | 4fed947cb311e5aa51781d316cefca836352f6ce (patch) | |
tree | eada83d5bf503244628e3c190e97e8c7af847e35 | |
parent | dd4c133f387c48f526022860ad70354637a80f4c (diff) |
block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests
Now that the backend conversion is complete, export sequenced
FLUSH/FUA capability through REQ_FLUSH/FUA flags. REQ_FLUSH means the
device cache should be flushed before executing the request. REQ_FUA
means that the data in the request should be on non-volatile media on
completion.
Block layer will choose the correct way of implementing the semantics
and execute it. The request may be passed to the device directly if
the device can handle it; otherwise, it will be sequenced using one or
more proxy requests. Devices will never see REQ_FLUSH and/or FUA
which it doesn't support.
Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are
never failed with -EOPNOTSUPP. If the underlying device doesn't
support FLUSH/FUA, the block layer simply make those noop. IOW, it no
longer distinguishes between writeback cache which doesn't support
cache flush and writethrough/no cache. Devices which have WB cache
w/o flush are very difficult to come by these days and there's nothing
much we can do anyway, so it doesn't make sense to require everyone to
implement -EOPNOTSUPP handling. This will simplify filesystems and
block drivers as they can drop -EOPNOTSUPP retry logic for barriers.
* QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into
blk-flush.c.
* REQ_FLUSH w/o data can also be directly passed to drivers without
sequencing but some drivers assume that zero length requests don't
have rq->bio which isn't true for these requests requiring the use
of proxy requests.
* REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are
copied from bio to request.
* WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and
WRITE_FLUSH_FUA are added.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
-rw-r--r-- | block/blk-core.c | 2 | ||||
-rw-r--r-- | block/blk-flush.c | 85 | ||||
-rw-r--r-- | block/blk.h | 3 | ||||
-rw-r--r-- | include/linux/blk_types.h | 2 | ||||
-rw-r--r-- | include/linux/blkdev.h | 38 | ||||
-rw-r--r-- | include/linux/buffer_head.h | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 19 |
7 files changed, 67 insertions, 84 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 8870ae40179d..18455c4f618a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -1204,7 +1204,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1204 | 1204 | ||
1205 | spin_lock_irq(q->queue_lock); | 1205 | spin_lock_irq(q->queue_lock); |
1206 | 1206 | ||
1207 | if (bio->bi_rw & REQ_HARDBARRIER) { | 1207 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { |
1208 | where = ELEVATOR_INSERT_FRONT; | 1208 | where = ELEVATOR_INSERT_FRONT; |
1209 | goto get_rq; | 1209 | goto get_rq; |
1210 | } | 1210 | } |
diff --git a/block/blk-flush.c b/block/blk-flush.c index dd873225da97..452c552e9ead 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Functions related to barrier IO handling | 2 | * Functions to sequence FLUSH and FUA writes. |
3 | */ | 3 | */ |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
@@ -9,6 +9,15 @@ | |||
9 | 9 | ||
10 | #include "blk.h" | 10 | #include "blk.h" |
11 | 11 | ||
12 | /* FLUSH/FUA sequences */ | ||
13 | enum { | ||
14 | QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ | ||
15 | QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ | ||
16 | QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ | ||
17 | QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ | ||
18 | QUEUE_FSEQ_DONE = (1 << 4), | ||
19 | }; | ||
20 | |||
12 | static struct request *queue_next_fseq(struct request_queue *q); | 21 | static struct request *queue_next_fseq(struct request_queue *q); |
13 | 22 | ||
14 | unsigned blk_flush_cur_seq(struct request_queue *q) | 23 | unsigned blk_flush_cur_seq(struct request_queue *q) |
@@ -79,6 +88,7 @@ static void queue_flush(struct request_queue *q, struct request *rq, | |||
79 | 88 | ||
80 | static struct request *queue_next_fseq(struct request_queue *q) | 89 | static struct request *queue_next_fseq(struct request_queue *q) |
81 | { | 90 | { |
91 | struct request *orig_rq = q->orig_flush_rq; | ||
82 | struct request *rq = &q->flush_rq; | 92 | struct request *rq = &q->flush_rq; |
83 | 93 | ||
84 | switch (blk_flush_cur_seq(q)) { | 94 | switch (blk_flush_cur_seq(q)) { |
@@ -87,12 +97,11 @@ static struct request *queue_next_fseq(struct request_queue *q) | |||
87 | break; | 97 | break; |
88 | 98 | ||
89 | case QUEUE_FSEQ_DATA: | 99 | case QUEUE_FSEQ_DATA: |
90 | /* initialize proxy request and queue it */ | 100 | /* initialize proxy request, inherit FLUSH/FUA and queue it */ |
91 | blk_rq_init(q, rq); | 101 | blk_rq_init(q, rq); |
92 | init_request_from_bio(rq, q->orig_flush_rq->bio); | 102 | init_request_from_bio(rq, orig_rq->bio); |
93 | rq->cmd_flags &= ~REQ_HARDBARRIER; | 103 | rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); |
94 | if (q->ordered & QUEUE_ORDERED_DO_FUA) | 104 | rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); |
95 | rq->cmd_flags |= REQ_FUA; | ||
96 | rq->end_io = flush_data_end_io; | 105 | rq->end_io = flush_data_end_io; |
97 | 106 | ||
98 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); | 107 | elv_insert(q, rq, ELEVATOR_INSERT_FRONT); |
@@ -110,60 +119,58 @@ static struct request *queue_next_fseq(struct request_queue *q) | |||
110 | 119 | ||
111 | struct request *blk_do_flush(struct request_queue *q, struct request *rq) | 120 | struct request *blk_do_flush(struct request_queue *q, struct request *rq) |
112 | { | 121 | { |
122 | unsigned int fflags = q->flush_flags; /* may change, cache it */ | ||
123 | bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; | ||
124 | bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); | ||
125 | bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); | ||
113 | unsigned skip = 0; | 126 | unsigned skip = 0; |
114 | 127 | ||
115 | if (!(rq->cmd_flags & REQ_HARDBARRIER)) | 128 | /* |
129 | * Special case. If there's data but flush is not necessary, | ||
130 | * the request can be issued directly. | ||
131 | * | ||
132 | * Flush w/o data should be able to be issued directly too but | ||
133 | * currently some drivers assume that rq->bio contains | ||
134 | * non-zero data if it isn't NULL and empty FLUSH requests | ||
135 | * getting here usually have bio's without data. | ||
136 | */ | ||
137 | if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { | ||
138 | rq->cmd_flags &= ~REQ_FLUSH; | ||
139 | if (!has_fua) | ||
140 | rq->cmd_flags &= ~REQ_FUA; | ||
116 | return rq; | 141 | return rq; |
142 | } | ||
117 | 143 | ||
144 | /* | ||
145 | * Sequenced flushes can't be processed in parallel. If | ||
146 | * another one is already in progress, queue for later | ||
147 | * processing. | ||
148 | */ | ||
118 | if (q->flush_seq) { | 149 | if (q->flush_seq) { |
119 | /* | ||
120 | * Sequenced flush is already in progress and they | ||
121 | * can't be processed in parallel. Queue for later | ||
122 | * processing. | ||
123 | */ | ||
124 | list_move_tail(&rq->queuelist, &q->pending_flushes); | 150 | list_move_tail(&rq->queuelist, &q->pending_flushes); |
125 | return NULL; | 151 | return NULL; |
126 | } | 152 | } |
127 | 153 | ||
128 | if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { | ||
129 | /* | ||
130 | * Queue ordering not supported. Terminate | ||
131 | * with prejudice. | ||
132 | */ | ||
133 | blk_dequeue_request(rq); | ||
134 | __blk_end_request_all(rq, -EOPNOTSUPP); | ||
135 | return NULL; | ||
136 | } | ||
137 | |||
138 | /* | 154 | /* |
139 | * Start a new flush sequence | 155 | * Start a new flush sequence |
140 | */ | 156 | */ |
141 | q->flush_err = 0; | 157 | q->flush_err = 0; |
142 | q->ordered = q->next_ordered; | ||
143 | q->flush_seq |= QUEUE_FSEQ_STARTED; | 158 | q->flush_seq |= QUEUE_FSEQ_STARTED; |
144 | 159 | ||
145 | /* | 160 | /* adjust FLUSH/FUA of the original request and stash it away */ |
146 | * For an empty barrier, there's no actual BAR request, which | 161 | rq->cmd_flags &= ~REQ_FLUSH; |
147 | * in turn makes POSTFLUSH unnecessary. Mask them off. | 162 | if (!has_fua) |
148 | */ | 163 | rq->cmd_flags &= ~REQ_FUA; |
149 | if (!blk_rq_sectors(rq)) | ||
150 | q->ordered &= ~(QUEUE_ORDERED_DO_BAR | | ||
151 | QUEUE_ORDERED_DO_POSTFLUSH); | ||
152 | |||
153 | /* stash away the original request */ | ||
154 | blk_dequeue_request(rq); | 164 | blk_dequeue_request(rq); |
155 | q->orig_flush_rq = rq; | 165 | q->orig_flush_rq = rq; |
156 | 166 | ||
157 | if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) | 167 | /* skip unneded sequences and return the first one */ |
168 | if (!do_preflush) | ||
158 | skip |= QUEUE_FSEQ_PREFLUSH; | 169 | skip |= QUEUE_FSEQ_PREFLUSH; |
159 | 170 | if (!blk_rq_sectors(rq)) | |
160 | if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) | ||
161 | skip |= QUEUE_FSEQ_DATA; | 171 | skip |= QUEUE_FSEQ_DATA; |
162 | 172 | if (!do_postflush) | |
163 | if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) | ||
164 | skip |= QUEUE_FSEQ_POSTFLUSH; | 173 | skip |= QUEUE_FSEQ_POSTFLUSH; |
165 | |||
166 | /* complete skipped sequences and return the first sequence */ | ||
167 | return blk_flush_complete_seq(q, skip, 0); | 174 | return blk_flush_complete_seq(q, skip, 0); |
168 | } | 175 | } |
169 | 176 | ||
diff --git a/block/blk.h b/block/blk.h index 24b92bd78f37..a09c18b19116 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -60,6 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) | |||
60 | while (1) { | 60 | while (1) { |
61 | while (!list_empty(&q->queue_head)) { | 61 | while (!list_empty(&q->queue_head)) { |
62 | rq = list_entry_rq(q->queue_head.next); | 62 | rq = list_entry_rq(q->queue_head.next); |
63 | if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || | ||
64 | rq == &q->flush_rq) | ||
65 | return rq; | ||
63 | rq = blk_do_flush(q, rq); | 66 | rq = blk_do_flush(q, rq); |
64 | if (rq) | 67 | if (rq) |
65 | return rq; | 68 | return rq; |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9192282b4259..179799479e6f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -167,7 +167,7 @@ enum rq_flag_bits { | |||
167 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) | 167 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) |
168 | #define REQ_COMMON_MASK \ | 168 | #define REQ_COMMON_MASK \ |
169 | (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ | 169 | (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ |
170 | REQ_META| REQ_DISCARD | REQ_NOIDLE) | 170 | REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) |
171 | 171 | ||
172 | #define REQ_UNPLUG (1 << __REQ_UNPLUG) | 172 | #define REQ_UNPLUG (1 << __REQ_UNPLUG) |
173 | #define REQ_RAHEAD (1 << __REQ_RAHEAD) | 173 | #define REQ_RAHEAD (1 << __REQ_RAHEAD) |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cd83ec077db..8ef705f800ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -357,7 +357,6 @@ struct request_queue | |||
357 | /* | 357 | /* |
358 | * for flush operations | 358 | * for flush operations |
359 | */ | 359 | */ |
360 | unsigned int ordered, next_ordered; | ||
361 | unsigned int flush_flags; | 360 | unsigned int flush_flags; |
362 | unsigned int flush_seq; | 361 | unsigned int flush_seq; |
363 | int flush_err; | 362 | int flush_err; |
@@ -465,40 +464,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) | |||
465 | __clear_bit(flag, &q->queue_flags); | 464 | __clear_bit(flag, &q->queue_flags); |
466 | } | 465 | } |
467 | 466 | ||
468 | enum { | ||
469 | /* | ||
470 | * Hardbarrier is supported with one of the following methods. | ||
471 | * | ||
472 | * NONE : hardbarrier unsupported | ||
473 | * DRAIN : ordering by draining is enough | ||
474 | * DRAIN_FLUSH : ordering by draining w/ pre and post flushes | ||
475 | * DRAIN_FUA : ordering by draining w/ pre flush and FUA write | ||
476 | */ | ||
477 | QUEUE_ORDERED_DO_PREFLUSH = 0x10, | ||
478 | QUEUE_ORDERED_DO_BAR = 0x20, | ||
479 | QUEUE_ORDERED_DO_POSTFLUSH = 0x40, | ||
480 | QUEUE_ORDERED_DO_FUA = 0x80, | ||
481 | |||
482 | QUEUE_ORDERED_NONE = 0x00, | ||
483 | |||
484 | QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, | ||
485 | QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | | ||
486 | QUEUE_ORDERED_DO_PREFLUSH | | ||
487 | QUEUE_ORDERED_DO_POSTFLUSH, | ||
488 | QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | | ||
489 | QUEUE_ORDERED_DO_PREFLUSH | | ||
490 | QUEUE_ORDERED_DO_FUA, | ||
491 | |||
492 | /* | ||
493 | * FLUSH/FUA sequences. | ||
494 | */ | ||
495 | QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ | ||
496 | QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ | ||
497 | QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ | ||
498 | QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ | ||
499 | QUEUE_FSEQ_DONE = (1 << 4), | ||
500 | }; | ||
501 | |||
502 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) | 467 | #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) |
503 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) | 468 | #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) |
504 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) | 469 | #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) |
@@ -578,7 +543,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) | |||
578 | * it already be started by driver. | 543 | * it already be started by driver. |
579 | */ | 544 | */ |
580 | #define RQ_NOMERGE_FLAGS \ | 545 | #define RQ_NOMERGE_FLAGS \ |
581 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) | 546 | (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ |
547 | REQ_FLUSH | REQ_FUA) | ||
582 | #define rq_mergeable(rq) \ | 548 | #define rq_mergeable(rq) \ |
583 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ | 549 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ |
584 | (((rq)->cmd_flags & REQ_DISCARD) || \ | 550 | (((rq)->cmd_flags & REQ_DISCARD) || \ |
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ec94c12f21da..fc999f583fda 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h | |||
@@ -32,7 +32,7 @@ enum bh_state_bits { | |||
32 | BH_Delay, /* Buffer is not yet allocated on disk */ | 32 | BH_Delay, /* Buffer is not yet allocated on disk */ |
33 | BH_Boundary, /* Block is followed by a discontiguity */ | 33 | BH_Boundary, /* Block is followed by a discontiguity */ |
34 | BH_Write_EIO, /* I/O error on write */ | 34 | BH_Write_EIO, /* I/O error on write */ |
35 | BH_Eopnotsupp, /* operation not supported (barrier) */ | 35 | BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ |
36 | BH_Unwritten, /* Buffer is allocated on disk but not written */ | 36 | BH_Unwritten, /* Buffer is allocated on disk but not written */ |
37 | BH_Quiet, /* Buffer Error Prinks to be quiet */ | 37 | BH_Quiet, /* Buffer Error Prinks to be quiet */ |
38 | 38 | ||
diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..352c48627381 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -135,12 +135,13 @@ struct inodes_stat_t { | |||
135 | * immediately after submission. The write equivalent | 135 | * immediately after submission. The write equivalent |
136 | * of READ_SYNC. | 136 | * of READ_SYNC. |
137 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. | 137 | * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. |
138 | * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all | 138 | * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead. |
139 | * previously submitted writes must be safely on storage | 139 | * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. |
140 | * before this one is started. Also guarantees that when | 140 | * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on |
141 | * this write is complete, it itself is also safely on | 141 | * non-volatile media on completion. |
142 | * storage. Prevents reordering of writes on both sides | 142 | * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded |
143 | * of this IO. | 143 | * by a cache flush and data is guaranteed to be on |
144 | * non-volatile media on completion. | ||
144 | * | 145 | * |
145 | */ | 146 | */ |
146 | #define RW_MASK REQ_WRITE | 147 | #define RW_MASK REQ_WRITE |
@@ -158,6 +159,12 @@ struct inodes_stat_t { | |||
158 | #define WRITE_META (WRITE | REQ_META) | 159 | #define WRITE_META (WRITE | REQ_META) |
159 | #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | 160 | #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ |
160 | REQ_HARDBARRIER) | 161 | REQ_HARDBARRIER) |
162 | #define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | ||
163 | REQ_FLUSH) | ||
164 | #define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | ||
165 | REQ_FUA) | ||
166 | #define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ | ||
167 | REQ_FLUSH | REQ_FUA) | ||
161 | 168 | ||
162 | /* | 169 | /* |
163 | * These aren't really reads or writes, they pass down information about | 170 | * These aren't really reads or writes, they pass down information about |