aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-barrier.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-09-03 05:56:16 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-09-10 06:35:36 -0400
commit28e7d1845216538303bb95d679d8fd4de50e2f1a (patch)
tree0ef56dc0d7c894657c4ae71a3e8da6e1164fb933 /block/blk-barrier.c
parentdd831006d5be7f74c3fe7aef82380c51c3637960 (diff)
block: drop barrier ordering by queue draining
Filesystems will take all the responsibilities for ordering requests around commit writes and will only indicate how the commit writes themselves should be handled by block layers. This patch drops barrier ordering by queue draining from block layer. Ordering by draining implementation was somewhat invasive to request handling. List of notable changes follow. * Each queue has 1 bit color which is flipped on each barrier issue. This is used to track whether a given request is issued before the current barrier or not. REQ_ORDERED_COLOR flag and coloring implementation in __elv_add_request() are removed. * Requests which shouldn't be processed yet for draining were stalled by returning -EAGAIN from blk_do_ordered() according to the test result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq(). This logic is removed. * Draining completion logic in elv_completed_request() removed. * All barrier sequence requests were queued to request queue and then trckled to lower layer according to progress and thus maintaining request orders during requeue was necessary. This is replaced by queueing the next request in the barrier sequence only after the current one is complete from blk_ordered_complete_seq(), which removes the need for multiple proxy requests in struct request_queue and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of elv_insert(). * As barriers no longer have ordering constraints, there's no need to dump the whole elevator onto the dispatch queue on each barrier. Insert barriers at the front instead. * If other barrier requests come to the front of the dispatch queue while one is already in progress, they are stored in q->pending_barriers and restored to dispatch queue one-by-one after each barrier completion from blk_ordered_complete_seq(). Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Christoph Hellwig <hch@infradead.org> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'block/blk-barrier.c')
-rw-r--r--block/blk-barrier.c220
1 files changed, 86 insertions, 134 deletions
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f1be85ba2bb5..e8b2e5c091b1 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -9,6 +9,8 @@
9 9
10#include "blk.h" 10#include "blk.h"
11 11
12static struct request *queue_next_ordseq(struct request_queue *q);
13
12/* 14/*
13 * Cache flushing for ordered writes handling 15 * Cache flushing for ordered writes handling
14 */ 16 */
@@ -19,38 +21,10 @@ unsigned blk_ordered_cur_seq(struct request_queue *q)
19 return 1 << ffz(q->ordseq); 21 return 1 << ffz(q->ordseq);
20} 22}
21 23
22unsigned blk_ordered_req_seq(struct request *rq) 24static struct request *blk_ordered_complete_seq(struct request_queue *q,
23{ 25 unsigned seq, int error)
24 struct request_queue *q = rq->q;
25
26 BUG_ON(q->ordseq == 0);
27
28 if (rq == &q->pre_flush_rq)
29 return QUEUE_ORDSEQ_PREFLUSH;
30 if (rq == &q->bar_rq)
31 return QUEUE_ORDSEQ_BAR;
32 if (rq == &q->post_flush_rq)
33 return QUEUE_ORDSEQ_POSTFLUSH;
34
35 /*
36 * !fs requests don't need to follow barrier ordering. Always
37 * put them at the front. This fixes the following deadlock.
38 *
39 * http://thread.gmane.org/gmane.linux.kernel/537473
40 */
41 if (rq->cmd_type != REQ_TYPE_FS)
42 return QUEUE_ORDSEQ_DRAIN;
43
44 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
45 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
46 return QUEUE_ORDSEQ_DRAIN;
47 else
48 return QUEUE_ORDSEQ_DONE;
49}
50
51bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
52{ 26{
53 struct request *rq; 27 struct request *next_rq = NULL;
54 28
55 if (error && !q->orderr) 29 if (error && !q->orderr)
56 q->orderr = error; 30 q->orderr = error;
@@ -58,16 +32,22 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
58 BUG_ON(q->ordseq & seq); 32 BUG_ON(q->ordseq & seq);
59 q->ordseq |= seq; 33 q->ordseq |= seq;
60 34
61 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 35 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
62 return false; 36 /* not complete yet, queue the next ordered sequence */
63 37 next_rq = queue_next_ordseq(q);
64 /* 38 } else {
65 * Okay, sequence complete. 39 /* complete this barrier request */
66 */ 40 __blk_end_request_all(q->orig_bar_rq, q->orderr);
67 q->ordseq = 0; 41 q->orig_bar_rq = NULL;
68 rq = q->orig_bar_rq; 42 q->ordseq = 0;
69 __blk_end_request_all(rq, q->orderr); 43
70 return true; 44 /* dispatch the next barrier if there's one */
45 if (!list_empty(&q->pending_barriers)) {
46 next_rq = list_entry_rq(q->pending_barriers.next);
47 list_move(&next_rq->queuelist, &q->queue_head);
48 }
49 }
50 return next_rq;
71} 51}
72 52
73static void pre_flush_end_io(struct request *rq, int error) 53static void pre_flush_end_io(struct request *rq, int error)
@@ -88,133 +68,105 @@ static void post_flush_end_io(struct request *rq, int error)
88 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 68 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
89} 69}
90 70
91static void queue_flush(struct request_queue *q, unsigned which) 71static void queue_flush(struct request_queue *q, struct request *rq,
72 rq_end_io_fn *end_io)
92{ 73{
93 struct request *rq;
94 rq_end_io_fn *end_io;
95
96 if (which == QUEUE_ORDERED_DO_PREFLUSH) {
97 rq = &q->pre_flush_rq;
98 end_io = pre_flush_end_io;
99 } else {
100 rq = &q->post_flush_rq;
101 end_io = post_flush_end_io;
102 }
103
104 blk_rq_init(q, rq); 74 blk_rq_init(q, rq);
105 rq->cmd_type = REQ_TYPE_FS; 75 rq->cmd_type = REQ_TYPE_FS;
106 rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; 76 rq->cmd_flags = REQ_FLUSH;
107 rq->rq_disk = q->orig_bar_rq->rq_disk; 77 rq->rq_disk = q->orig_bar_rq->rq_disk;
108 rq->end_io = end_io; 78 rq->end_io = end_io;
109 79
110 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 80 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
111} 81}
112 82
113static inline struct request *start_ordered(struct request_queue *q, 83static struct request *queue_next_ordseq(struct request_queue *q)
114 struct request *rq)
115{ 84{
116 unsigned skip = 0; 85 struct request *rq = &q->bar_rq;
117
118 q->orderr = 0;
119 q->ordered = q->next_ordered;
120 q->ordseq |= QUEUE_ORDSEQ_STARTED;
121
122 /*
123 * For an empty barrier, there's no actual BAR request, which
124 * in turn makes POSTFLUSH unnecessary. Mask them off.
125 */
126 if (!blk_rq_sectors(rq))
127 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
128 QUEUE_ORDERED_DO_POSTFLUSH);
129
130 /* stash away the original request */
131 blk_dequeue_request(rq);
132 q->orig_bar_rq = rq;
133 rq = NULL;
134
135 /*
136 * Queue ordered sequence. As we stack them at the head, we
137 * need to queue in reverse order. Note that we rely on that
138 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
139 * request gets inbetween ordered sequence.
140 */
141 if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
142 queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
143 rq = &q->post_flush_rq;
144 } else
145 skip |= QUEUE_ORDSEQ_POSTFLUSH;
146 86
147 if (q->ordered & QUEUE_ORDERED_DO_BAR) { 87 switch (blk_ordered_cur_seq(q)) {
148 rq = &q->bar_rq; 88 case QUEUE_ORDSEQ_PREFLUSH:
89 queue_flush(q, rq, pre_flush_end_io);
90 break;
149 91
92 case QUEUE_ORDSEQ_BAR:
150 /* initialize proxy request and queue it */ 93 /* initialize proxy request and queue it */
151 blk_rq_init(q, rq); 94 blk_rq_init(q, rq);
152 init_request_from_bio(rq, q->orig_bar_rq->bio); 95 init_request_from_bio(rq, q->orig_bar_rq->bio);
96 rq->cmd_flags &= ~REQ_HARDBARRIER;
153 if (q->ordered & QUEUE_ORDERED_DO_FUA) 97 if (q->ordered & QUEUE_ORDERED_DO_FUA)
154 rq->cmd_flags |= REQ_FUA; 98 rq->cmd_flags |= REQ_FUA;
155 rq->end_io = bar_end_io; 99 rq->end_io = bar_end_io;
156 100
157 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 101 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
158 } else 102 break;
159 skip |= QUEUE_ORDSEQ_BAR;
160 103
161 if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { 104 case QUEUE_ORDSEQ_POSTFLUSH:
162 queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); 105 queue_flush(q, rq, post_flush_end_io);
163 rq = &q->pre_flush_rq; 106 break;
164 } else
165 skip |= QUEUE_ORDSEQ_PREFLUSH;
166 107
167 if (queue_in_flight(q)) 108 default:
168 rq = NULL; 109 BUG();
169 else 110 }
170 skip |= QUEUE_ORDSEQ_DRAIN;
171
172 /*
173 * Complete skipped sequences. If whole sequence is complete,
174 * return %NULL to tell elevator that this request is gone.
175 */
176 if (blk_ordered_complete_seq(q, skip, 0))
177 rq = NULL;
178 return rq; 111 return rq;
179} 112}
180 113
181struct request *blk_do_ordered(struct request_queue *q, struct request *rq) 114struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
182{ 115{
183 const int is_barrier = rq->cmd_type == REQ_TYPE_FS && 116 unsigned skip = 0;
184 (rq->cmd_flags & REQ_HARDBARRIER); 117
185 118 if (!(rq->cmd_flags & REQ_HARDBARRIER))
186 if (!q->ordseq) { 119 return rq;
187 if (!is_barrier) 120
188 return rq; 121 if (q->ordseq) {
189 122 /*
190 if (q->next_ordered != QUEUE_ORDERED_NONE) 123 * Barrier is already in progress and they can't be
191 return start_ordered(q, rq); 124 * processed in parallel. Queue for later processing.
192 else { 125 */
193 /* 126 list_move_tail(&rq->queuelist, &q->pending_barriers);
194 * Queue ordering not supported. Terminate 127 return NULL;
195 * with prejudice. 128 }
196 */ 129
197 blk_dequeue_request(rq); 130 if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
198 __blk_end_request_all(rq, -EOPNOTSUPP); 131 /*
199 return NULL; 132 * Queue ordering not supported. Terminate
200 } 133 * with prejudice.
134 */
135 blk_dequeue_request(rq);
136 __blk_end_request_all(rq, -EOPNOTSUPP);
137 return NULL;
201 } 138 }
202 139
203 /* 140 /*
204 * Ordered sequence in progress 141 * Start a new ordered sequence
205 */ 142 */
143 q->orderr = 0;
144 q->ordered = q->next_ordered;
145 q->ordseq |= QUEUE_ORDSEQ_STARTED;
206 146
207 /* Special requests are not subject to ordering rules. */ 147 /*
208 if (rq->cmd_type != REQ_TYPE_FS && 148 * For an empty barrier, there's no actual BAR request, which
209 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 149 * in turn makes POSTFLUSH unnecessary. Mask them off.
210 return rq; 150 */
151 if (!blk_rq_sectors(rq))
152 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
153 QUEUE_ORDERED_DO_POSTFLUSH);
211 154
212 /* Ordered by draining. Wait for turn. */ 155 /* stash away the original request */
213 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 156 blk_dequeue_request(rq);
214 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 157 q->orig_bar_rq = rq;
215 rq = ERR_PTR(-EAGAIN);
216 158
217 return rq; 159 if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
160 skip |= QUEUE_ORDSEQ_PREFLUSH;
161
162 if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
163 skip |= QUEUE_ORDSEQ_BAR;
164
165 if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
166 skip |= QUEUE_ORDSEQ_POSTFLUSH;
167
168 /* complete skipped sequences and return the first sequence */
169 return blk_ordered_complete_seq(q, skip, 0);
218} 170}
219 171
220static void bio_end_empty_barrier(struct bio *bio, int err) 172static void bio_end_empty_barrier(struct bio *bio, int err)