aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c85
-rw-r--r--block/blk-flush.c441
-rw-r--r--block/blk.h12
-rw-r--r--block/cfq-iosched.c20
-rw-r--r--block/elevator.c9
5 files changed, 376 insertions, 191 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 2f4002f79a24..3cc17e6064d6 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -149,39 +149,29 @@ EXPORT_SYMBOL(blk_rq_init);
149static void req_bio_endio(struct request *rq, struct bio *bio, 149static void req_bio_endio(struct request *rq, struct bio *bio,
150 unsigned int nbytes, int error) 150 unsigned int nbytes, int error)
151{ 151{
152 struct request_queue *q = rq->q; 152 if (error)
153 153 clear_bit(BIO_UPTODATE, &bio->bi_flags);
154 if (&q->flush_rq != rq) { 154 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
155 if (error) 155 error = -EIO;
156 clear_bit(BIO_UPTODATE, &bio->bi_flags); 156
157 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) 157 if (unlikely(nbytes > bio->bi_size)) {
158 error = -EIO; 158 printk(KERN_ERR "%s: want %u bytes done, %u left\n",
159 159 __func__, nbytes, bio->bi_size);
160 if (unlikely(nbytes > bio->bi_size)) { 160 nbytes = bio->bi_size;
161 printk(KERN_ERR "%s: want %u bytes done, %u left\n", 161 }
162 __func__, nbytes, bio->bi_size);
163 nbytes = bio->bi_size;
164 }
165 162
166 if (unlikely(rq->cmd_flags & REQ_QUIET)) 163 if (unlikely(rq->cmd_flags & REQ_QUIET))
167 set_bit(BIO_QUIET, &bio->bi_flags); 164 set_bit(BIO_QUIET, &bio->bi_flags);
168 165
169 bio->bi_size -= nbytes; 166 bio->bi_size -= nbytes;
170 bio->bi_sector += (nbytes >> 9); 167 bio->bi_sector += (nbytes >> 9);
171 168
172 if (bio_integrity(bio)) 169 if (bio_integrity(bio))
173 bio_integrity_advance(bio, nbytes); 170 bio_integrity_advance(bio, nbytes);
174 171
175 if (bio->bi_size == 0) 172 /* don't actually finish bio if it's part of flush sequence */
176 bio_endio(bio, error); 173 if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
177 } else { 174 bio_endio(bio, error);
178 /*
179 * Okay, this is the sequenced flush request in
180 * progress, just record the error;
181 */
182 if (error && !q->flush_err)
183 q->flush_err = error;
184 }
185} 175}
186 176
187void blk_dump_rq_flags(struct request *rq, char *msg) 177void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -540,7 +530,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
540 init_timer(&q->unplug_timer); 530 init_timer(&q->unplug_timer);
541 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 531 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
542 INIT_LIST_HEAD(&q->timeout_list); 532 INIT_LIST_HEAD(&q->timeout_list);
543 INIT_LIST_HEAD(&q->pending_flushes); 533 INIT_LIST_HEAD(&q->flush_queue[0]);
534 INIT_LIST_HEAD(&q->flush_queue[1]);
535 INIT_LIST_HEAD(&q->flush_data_in_flight);
544 INIT_WORK(&q->unplug_work, blk_unplug_work); 536 INIT_WORK(&q->unplug_work, blk_unplug_work);
545 537
546 kobject_init(&q->kobj, &blk_queue_ktype); 538 kobject_init(&q->kobj, &blk_queue_ktype);
@@ -761,6 +753,25 @@ static void freed_request(struct request_queue *q, int sync, int priv)
761} 753}
762 754
763/* 755/*
756 * Determine if elevator data should be initialized when allocating the
757 * request associated with @bio.
758 */
759static bool blk_rq_should_init_elevator(struct bio *bio)
760{
761 if (!bio)
762 return true;
763
764 /*
765 * Flush requests do not use the elevator so skip initialization.
766 * This allows a request to share the flush and elevator data.
767 */
768 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA))
769 return false;
770
771 return true;
772}
773
774/*
764 * Get a free request, queue_lock must be held. 775 * Get a free request, queue_lock must be held.
765 * Returns NULL on failure, with queue_lock held. 776 * Returns NULL on failure, with queue_lock held.
766 * Returns !NULL on success, with queue_lock *not held*. 777 * Returns !NULL on success, with queue_lock *not held*.
@@ -772,7 +783,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
772 struct request_list *rl = &q->rq; 783 struct request_list *rl = &q->rq;
773 struct io_context *ioc = NULL; 784 struct io_context *ioc = NULL;
774 const bool is_sync = rw_is_sync(rw_flags) != 0; 785 const bool is_sync = rw_is_sync(rw_flags) != 0;
775 int may_queue, priv; 786 int may_queue, priv = 0;
776 787
777 may_queue = elv_may_queue(q, rw_flags); 788 may_queue = elv_may_queue(q, rw_flags);
778 if (may_queue == ELV_MQUEUE_NO) 789 if (may_queue == ELV_MQUEUE_NO)
@@ -816,9 +827,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags,
816 rl->count[is_sync]++; 827 rl->count[is_sync]++;
817 rl->starved[is_sync] = 0; 828 rl->starved[is_sync] = 0;
818 829
819 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); 830 if (blk_rq_should_init_elevator(bio)) {
820 if (priv) 831 priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags);
821 rl->elvpriv++; 832 if (priv)
833 rl->elvpriv++;
834 }
822 835
823 if (blk_queue_io_stat(q)) 836 if (blk_queue_io_stat(q))
824 rw_flags |= REQ_IO_STAT; 837 rw_flags |= REQ_IO_STAT;
@@ -1219,7 +1232,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1219 spin_lock_irq(q->queue_lock); 1232 spin_lock_irq(q->queue_lock);
1220 1233
1221 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { 1234 if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) {
1222 where = ELEVATOR_INSERT_FRONT; 1235 where = ELEVATOR_INSERT_FLUSH;
1223 goto get_rq; 1236 goto get_rq;
1224 } 1237 }
1225 1238
@@ -1804,7 +1817,7 @@ static void blk_account_io_done(struct request *req)
1804 * normal IO on queueing nor completion. Accounting the 1817 * normal IO on queueing nor completion. Accounting the
1805 * containing request is enough. 1818 * containing request is enough.
1806 */ 1819 */
1807 if (blk_do_io_stat(req) && req != &req->q->flush_rq) { 1820 if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) {
1808 unsigned long duration = jiffies - req->start_time; 1821 unsigned long duration = jiffies - req->start_time;
1809 const int rw = rq_data_dir(req); 1822 const int rw = rq_data_dir(req);
1810 struct hd_struct *part; 1823 struct hd_struct *part;
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 54b123d6563e..a867e3f524f3 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -1,6 +1,69 @@
1/* 1/*
2 * Functions to sequence FLUSH and FUA writes. 2 * Functions to sequence FLUSH and FUA writes.
3 *
4 * Copyright (C) 2011 Max Planck Institute for Gravitational Physics
5 * Copyright (C) 2011 Tejun Heo <tj@kernel.org>
6 *
7 * This file is released under the GPLv2.
8 *
9 * REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three
10 * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request
11 * properties and hardware capability.
12 *
13 * If a request doesn't have data, only REQ_FLUSH makes sense, which
14 * indicates a simple flush request. If there is data, REQ_FLUSH indicates
15 * that the device cache should be flushed before the data is executed, and
16 * REQ_FUA means that the data must be on non-volatile media on request
17 * completion.
18 *
19 * If the device doesn't have writeback cache, FLUSH and FUA don't make any
20 * difference. The requests are either completed immediately if there's no
21 * data or executed as normal requests otherwise.
22 *
23 * If the device has writeback cache and supports FUA, REQ_FLUSH is
24 * translated to PREFLUSH but REQ_FUA is passed down directly with DATA.
25 *
26 * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is
27 * translated to PREFLUSH and REQ_FUA to POSTFLUSH.
28 *
29 * The actual execution of flush is double buffered. Whenever a request
30 * needs to execute PRE or POSTFLUSH, it queues at
31 * q->flush_queue[q->flush_pending_idx]. Once certain criteria are met, a
32 * flush is issued and the pending_idx is toggled. When the flush
33 * completes, all the requests which were pending are proceeded to the next
34 * step. This allows arbitrary merging of different types of FLUSH/FUA
35 * requests.
36 *
37 * Currently, the following conditions are used to determine when to issue
38 * flush.
39 *
40 * C1. At any given time, only one flush shall be in progress. This makes
41 * double buffering sufficient.
42 *
43 * C2. Flush is deferred if any request is executing DATA of its sequence.
44 * This avoids issuing separate POSTFLUSHes for requests which shared
45 * PREFLUSH.
46 *
47 * C3. The second condition is ignored if there is a request which has
48 * waited longer than FLUSH_PENDING_TIMEOUT. This is to avoid
49 * starvation in the unlikely case where there are continuous stream of
50 * FUA (without FLUSH) requests.
51 *
52 * For devices which support FUA, it isn't clear whether C2 (and thus C3)
53 * is beneficial.
54 *
55 * Note that a sequenced FLUSH/FUA request with DATA is completed twice.
56 * Once while executing DATA and again after the whole sequence is
57 * complete. The first completion updates the contained bio but doesn't
58 * finish it so that the bio submitter is notified only after the whole
59 * sequence is complete. This is implemented by testing REQ_FLUSH_SEQ in
60 * req_bio_endio().
61 *
62 * The above peculiarity requires that each FLUSH/FUA request has only one
63 * bio attached to it, which is guaranteed as they aren't allowed to be
64 * merged in the usual way.
3 */ 65 */
66
4#include <linux/kernel.h> 67#include <linux/kernel.h>
5#include <linux/module.h> 68#include <linux/module.h>
6#include <linux/bio.h> 69#include <linux/bio.h>
@@ -11,184 +74,290 @@
11 74
12/* FLUSH/FUA sequences */ 75/* FLUSH/FUA sequences */
13enum { 76enum {
14 QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ 77 REQ_FSEQ_PREFLUSH = (1 << 0), /* pre-flushing in progress */
15 QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ 78 REQ_FSEQ_DATA = (1 << 1), /* data write in progress */
16 QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ 79 REQ_FSEQ_POSTFLUSH = (1 << 2), /* post-flushing in progress */
17 QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ 80 REQ_FSEQ_DONE = (1 << 3),
18 QUEUE_FSEQ_DONE = (1 << 4), 81
82 REQ_FSEQ_ACTIONS = REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA |
83 REQ_FSEQ_POSTFLUSH,
84
85 /*
86 * If flush has been pending longer than the following timeout,
87 * it's issued even if flush_data requests are still in flight.
88 */
89 FLUSH_PENDING_TIMEOUT = 5 * HZ,
19}; 90};
20 91
21static struct request *queue_next_fseq(struct request_queue *q); 92static bool blk_kick_flush(struct request_queue *q);
22 93
23unsigned blk_flush_cur_seq(struct request_queue *q) 94static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq)
24{ 95{
25 if (!q->flush_seq) 96 unsigned int policy = 0;
26 return 0; 97
27 return 1 << ffz(q->flush_seq); 98 if (fflags & REQ_FLUSH) {
99 if (rq->cmd_flags & REQ_FLUSH)
100 policy |= REQ_FSEQ_PREFLUSH;
101 if (blk_rq_sectors(rq))
102 policy |= REQ_FSEQ_DATA;
103 if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA))
104 policy |= REQ_FSEQ_POSTFLUSH;
105 }
106 return policy;
28} 107}
29 108
30static struct request *blk_flush_complete_seq(struct request_queue *q, 109static unsigned int blk_flush_cur_seq(struct request *rq)
31 unsigned seq, int error)
32{ 110{
33 struct request *next_rq = NULL; 111 return 1 << ffz(rq->flush.seq);
34
35 if (error && !q->flush_err)
36 q->flush_err = error;
37
38 BUG_ON(q->flush_seq & seq);
39 q->flush_seq |= seq;
40
41 if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) {
42 /* not complete yet, queue the next flush sequence */
43 next_rq = queue_next_fseq(q);
44 } else {
45 /* complete this flush request */
46 __blk_end_request_all(q->orig_flush_rq, q->flush_err);
47 q->orig_flush_rq = NULL;
48 q->flush_seq = 0;
49
50 /* dispatch the next flush if there's one */
51 if (!list_empty(&q->pending_flushes)) {
52 next_rq = list_entry_rq(q->pending_flushes.next);
53 list_move(&next_rq->queuelist, &q->queue_head);
54 }
55 }
56 return next_rq;
57} 112}
58 113
59static void blk_flush_complete_seq_end_io(struct request_queue *q, 114static void blk_flush_restore_request(struct request *rq)
60 unsigned seq, int error)
61{ 115{
62 bool was_empty = elv_queue_empty(q);
63 struct request *next_rq;
64
65 next_rq = blk_flush_complete_seq(q, seq, error);
66
67 /* 116 /*
68 * Moving a request silently to empty queue_head may stall the 117 * After flush data completion, @rq->bio is %NULL but we need to
69 * queue. Kick the queue in those cases. 118 * complete the bio again. @rq->biotail is guaranteed to equal the
119 * original @rq->bio. Restore it.
70 */ 120 */
71 if (was_empty && next_rq) 121 rq->bio = rq->biotail;
72 __blk_run_queue(q); 122
123 /* make @rq a normal request */
124 rq->cmd_flags &= ~REQ_FLUSH_SEQ;
125 rq->end_io = NULL;
73} 126}
74 127
75static void pre_flush_end_io(struct request *rq, int error) 128/**
129 * blk_flush_complete_seq - complete flush sequence
130 * @rq: FLUSH/FUA request being sequenced
131 * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero)
132 * @error: whether an error occurred
133 *
134 * @rq just completed @seq part of its flush sequence, record the
135 * completion and trigger the next step.
136 *
137 * CONTEXT:
138 * spin_lock_irq(q->queue_lock)
139 *
140 * RETURNS:
141 * %true if requests were added to the dispatch queue, %false otherwise.
142 */
143static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
144 int error)
76{ 145{
77 elv_completed_request(rq->q, rq); 146 struct request_queue *q = rq->q;
78 blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); 147 struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
148 bool queued = false;
149
150 BUG_ON(rq->flush.seq & seq);
151 rq->flush.seq |= seq;
152
153 if (likely(!error))
154 seq = blk_flush_cur_seq(rq);
155 else
156 seq = REQ_FSEQ_DONE;
157
158 switch (seq) {
159 case REQ_FSEQ_PREFLUSH:
160 case REQ_FSEQ_POSTFLUSH:
161 /* queue for flush */
162 if (list_empty(pending))
163 q->flush_pending_since = jiffies;
164 list_move_tail(&rq->flush.list, pending);
165 break;
166
167 case REQ_FSEQ_DATA:
168 list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
169 list_add(&rq->queuelist, &q->queue_head);
170 queued = true;
171 break;
172
173 case REQ_FSEQ_DONE:
174 /*
175 * @rq was previously adjusted by blk_flush_issue() for
176 * flush sequencing and may already have gone through the
177 * flush data request completion path. Restore @rq for
178 * normal completion and end it.
179 */
180 BUG_ON(!list_empty(&rq->queuelist));
181 list_del_init(&rq->flush.list);
182 blk_flush_restore_request(rq);
183 __blk_end_request_all(rq, error);
184 break;
185
186 default:
187 BUG();
188 }
189
190 return blk_kick_flush(q) | queued;
79} 191}
80 192
81static void flush_data_end_io(struct request *rq, int error) 193static void flush_end_io(struct request *flush_rq, int error)
82{ 194{
83 elv_completed_request(rq->q, rq); 195 struct request_queue *q = flush_rq->q;
84 blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); 196 struct list_head *running = &q->flush_queue[q->flush_running_idx];
197 bool was_empty = elv_queue_empty(q);
198 bool queued = false;
199 struct request *rq, *n;
200
201 BUG_ON(q->flush_pending_idx == q->flush_running_idx);
202
203 /* account completion of the flush request */
204 q->flush_running_idx ^= 1;
205 elv_completed_request(q, flush_rq);
206
207 /* and push the waiting requests to the next stage */
208 list_for_each_entry_safe(rq, n, running, flush.list) {
209 unsigned int seq = blk_flush_cur_seq(rq);
210
211 BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH);
212 queued |= blk_flush_complete_seq(rq, seq, error);
213 }
214
215 /* after populating an empty queue, kick it to avoid stall */
216 if (queued && was_empty)
217 __blk_run_queue(q);
85} 218}
86 219
87static void post_flush_end_io(struct request *rq, int error) 220/**
221 * blk_kick_flush - consider issuing flush request
222 * @q: request_queue being kicked
223 *
224 * Flush related states of @q have changed, consider issuing flush request.
225 * Please read the comment at the top of this file for more info.
226 *
227 * CONTEXT:
228 * spin_lock_irq(q->queue_lock)
229 *
230 * RETURNS:
231 * %true if flush was issued, %false otherwise.
232 */
233static bool blk_kick_flush(struct request_queue *q)
88{ 234{
89 elv_completed_request(rq->q, rq); 235 struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
90 blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); 236 struct request *first_rq =
237 list_first_entry(pending, struct request, flush.list);
238
239 /* C1 described at the top of this file */
240 if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
241 return false;
242
243 /* C2 and C3 */
244 if (!list_empty(&q->flush_data_in_flight) &&
245 time_before(jiffies,
246 q->flush_pending_since + FLUSH_PENDING_TIMEOUT))
247 return false;
248
249 /*
250 * Issue flush and toggle pending_idx. This makes pending_idx
251 * different from running_idx, which means flush is in flight.
252 */
253 blk_rq_init(q, &q->flush_rq);
254 q->flush_rq.cmd_type = REQ_TYPE_FS;
255 q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
256 q->flush_rq.rq_disk = first_rq->rq_disk;
257 q->flush_rq.end_io = flush_end_io;
258
259 q->flush_pending_idx ^= 1;
260 elv_insert(q, &q->flush_rq, ELEVATOR_INSERT_FRONT);
261 return true;
91} 262}
92 263
93static void init_flush_request(struct request *rq, struct gendisk *disk) 264static void flush_data_end_io(struct request *rq, int error)
94{ 265{
95 rq->cmd_type = REQ_TYPE_FS; 266 struct request_queue *q = rq->q;
96 rq->cmd_flags = WRITE_FLUSH; 267 bool was_empty = elv_queue_empty(q);
97 rq->rq_disk = disk; 268
269 /* after populating an empty queue, kick it to avoid stall */
270 if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error) && was_empty)
271 __blk_run_queue(q);
98} 272}
99 273
100static struct request *queue_next_fseq(struct request_queue *q) 274/**
275 * blk_insert_flush - insert a new FLUSH/FUA request
276 * @rq: request to insert
277 *
278 * To be called from elv_insert() for %ELEVATOR_INSERT_FLUSH insertions.
279 * @rq is being submitted. Analyze what needs to be done and put it on the
280 * right queue.
281 *
282 * CONTEXT:
283 * spin_lock_irq(q->queue_lock)
284 */
285void blk_insert_flush(struct request *rq)
101{ 286{
102 struct request *orig_rq = q->orig_flush_rq; 287 struct request_queue *q = rq->q;
103 struct request *rq = &q->flush_rq; 288 unsigned int fflags = q->flush_flags; /* may change, cache */
289 unsigned int policy = blk_flush_policy(fflags, rq);
104 290
105 blk_rq_init(q, rq); 291 BUG_ON(rq->end_io);
292 BUG_ON(!rq->bio || rq->bio != rq->biotail);
106 293
107 switch (blk_flush_cur_seq(q)) { 294 /*
108 case QUEUE_FSEQ_PREFLUSH: 295 * @policy now records what operations need to be done. Adjust
109 init_flush_request(rq, orig_rq->rq_disk); 296 * REQ_FLUSH and FUA for the driver.
110 rq->end_io = pre_flush_end_io; 297 */
111 break; 298 rq->cmd_flags &= ~REQ_FLUSH;
112 case QUEUE_FSEQ_DATA: 299 if (!(fflags & REQ_FUA))
113 init_request_from_bio(rq, orig_rq->bio); 300 rq->cmd_flags &= ~REQ_FUA;
114 /* 301
115 * orig_rq->rq_disk may be different from 302 /*
116 * bio->bi_bdev->bd_disk if orig_rq got here through 303 * If there's data but flush is not necessary, the request can be
117 * remapping drivers. Make sure rq->rq_disk points 304 * processed directly without going through flush machinery. Queue
118 * to the same one as orig_rq. 305 * for normal execution.
119 */ 306 */
120 rq->rq_disk = orig_rq->rq_disk; 307 if ((policy & REQ_FSEQ_DATA) &&
121 rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); 308 !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) {
122 rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); 309 list_add(&rq->queuelist, &q->queue_head);
123 rq->end_io = flush_data_end_io; 310 return;
124 break;
125 case QUEUE_FSEQ_POSTFLUSH:
126 init_flush_request(rq, orig_rq->rq_disk);
127 rq->end_io = post_flush_end_io;
128 break;
129 default:
130 BUG();
131 } 311 }
132 312
133 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 313 /*
134 return rq; 314 * @rq should go through flush machinery. Mark it part of flush
315 * sequence and submit for further processing.
316 */
317 memset(&rq->flush, 0, sizeof(rq->flush));
318 INIT_LIST_HEAD(&rq->flush.list);
319 rq->cmd_flags |= REQ_FLUSH_SEQ;
320 rq->end_io = flush_data_end_io;
321
322 blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
135} 323}
136 324
137struct request *blk_do_flush(struct request_queue *q, struct request *rq) 325/**
326 * blk_abort_flushes - @q is being aborted, abort flush requests
327 * @q: request_queue being aborted
328 *
329 * To be called from elv_abort_queue(). @q is being aborted. Prepare all
330 * FLUSH/FUA requests for abortion.
331 *
332 * CONTEXT:
333 * spin_lock_irq(q->queue_lock)
334 */
335void blk_abort_flushes(struct request_queue *q)
138{ 336{
139 unsigned int fflags = q->flush_flags; /* may change, cache it */ 337 struct request *rq, *n;
140 bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; 338 int i;
141 bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH);
142 bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA);
143 unsigned skip = 0;
144 339
145 /* 340 /*
146 * Special case. If there's data but flush is not necessary, 341 * Requests in flight for data are already owned by the dispatch
147 * the request can be issued directly. 342 * queue or the device driver. Just restore for normal completion.
148 *
149 * Flush w/o data should be able to be issued directly too but
150 * currently some drivers assume that rq->bio contains
151 * non-zero data if it isn't NULL and empty FLUSH requests
152 * getting here usually have bio's without data.
153 */ 343 */
154 if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { 344 list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) {
155 rq->cmd_flags &= ~REQ_FLUSH; 345 list_del_init(&rq->flush.list);
156 if (!has_fua) 346 blk_flush_restore_request(rq);
157 rq->cmd_flags &= ~REQ_FUA;
158 return rq;
159 } 347 }
160 348
161 /* 349 /*
162 * Sequenced flushes can't be processed in parallel. If 350 * We need to give away requests on flush queues. Restore for
163 * another one is already in progress, queue for later 351 * normal completion and put them on the dispatch queue.
164 * processing.
165 */ 352 */
166 if (q->flush_seq) { 353 for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) {
167 list_move_tail(&rq->queuelist, &q->pending_flushes); 354 list_for_each_entry_safe(rq, n, &q->flush_queue[i],
168 return NULL; 355 flush.list) {
356 list_del_init(&rq->flush.list);
357 blk_flush_restore_request(rq);
358 list_add_tail(&rq->queuelist, &q->queue_head);
359 }
169 } 360 }
170
171 /*
172 * Start a new flush sequence
173 */
174 q->flush_err = 0;
175 q->flush_seq |= QUEUE_FSEQ_STARTED;
176
177 /* adjust FLUSH/FUA of the original request and stash it away */
178 rq->cmd_flags &= ~REQ_FLUSH;
179 if (!has_fua)
180 rq->cmd_flags &= ~REQ_FUA;
181 blk_dequeue_request(rq);
182 q->orig_flush_rq = rq;
183
184 /* skip unneded sequences and return the first one */
185 if (!do_preflush)
186 skip |= QUEUE_FSEQ_PREFLUSH;
187 if (!blk_rq_sectors(rq))
188 skip |= QUEUE_FSEQ_DATA;
189 if (!do_postflush)
190 skip |= QUEUE_FSEQ_POSTFLUSH;
191 return blk_flush_complete_seq(q, skip, 0);
192} 361}
193 362
194static void bio_end_flush(struct bio *bio, int err) 363static void bio_end_flush(struct bio *bio, int err)
diff --git a/block/blk.h b/block/blk.h
index 2db8f32838e7..284b500852bd 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,21 +51,17 @@ static inline void blk_clear_rq_complete(struct request *rq)
51 */ 51 */
52#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) 52#define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash))
53 53
54struct request *blk_do_flush(struct request_queue *q, struct request *rq); 54void blk_insert_flush(struct request *rq);
55void blk_abort_flushes(struct request_queue *q);
55 56
56static inline struct request *__elv_next_request(struct request_queue *q) 57static inline struct request *__elv_next_request(struct request_queue *q)
57{ 58{
58 struct request *rq; 59 struct request *rq;
59 60
60 while (1) { 61 while (1) {
61 while (!list_empty(&q->queue_head)) { 62 if (!list_empty(&q->queue_head)) {
62 rq = list_entry_rq(q->queue_head.next); 63 rq = list_entry_rq(q->queue_head.next);
63 if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || 64 return rq;
64 rq == &q->flush_rq)
65 return rq;
66 rq = blk_do_flush(q, rq);
67 if (rq)
68 return rq;
69 } 65 }
70 66
71 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 67 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 7be4c7959625..f27ff3efe6cd 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -54,9 +54,9 @@ static const int cfq_hist_divisor = 4;
54#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) 54#define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8)
55 55
56#define RQ_CIC(rq) \ 56#define RQ_CIC(rq) \
57 ((struct cfq_io_context *) (rq)->elevator_private) 57 ((struct cfq_io_context *) (rq)->elevator_private[0])
58#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) 58#define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private[1])
59#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) 59#define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private[2])
60 60
61static struct kmem_cache *cfq_pool; 61static struct kmem_cache *cfq_pool;
62static struct kmem_cache *cfq_ioc_pool; 62static struct kmem_cache *cfq_ioc_pool;
@@ -3613,12 +3613,12 @@ static void cfq_put_request(struct request *rq)
3613 3613
3614 put_io_context(RQ_CIC(rq)->ioc); 3614 put_io_context(RQ_CIC(rq)->ioc);
3615 3615
3616 rq->elevator_private = NULL; 3616 rq->elevator_private[0] = NULL;
3617 rq->elevator_private2 = NULL; 3617 rq->elevator_private[1] = NULL;
3618 3618
3619 /* Put down rq reference on cfqg */ 3619 /* Put down rq reference on cfqg */
3620 cfq_put_cfqg(RQ_CFQG(rq)); 3620 cfq_put_cfqg(RQ_CFQG(rq));
3621 rq->elevator_private3 = NULL; 3621 rq->elevator_private[2] = NULL;
3622 3622
3623 cfq_put_queue(cfqq); 3623 cfq_put_queue(cfqq);
3624 } 3624 }
@@ -3705,13 +3705,13 @@ new_queue:
3705 } 3705 }
3706 3706
3707 cfqq->allocated[rw]++; 3707 cfqq->allocated[rw]++;
3708 cfqq->ref++;
3709 rq->elevator_private = cic;
3710 rq->elevator_private2 = cfqq;
3711 rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg);
3712 3708
3713 spin_unlock_irqrestore(q->queue_lock, flags); 3709 spin_unlock_irqrestore(q->queue_lock, flags);
3714 3710
3711 cfqq->ref++;
3712 rq->elevator_private[0] = cic;
3713 rq->elevator_private[1] = cfqq;
3714 rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg);
3715 return 0; 3715 return 0;
3716 3716
3717queue_fail: 3717queue_fail:
diff --git a/block/elevator.c b/block/elevator.c
index 2569512830d3..f98e92edc937 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -673,6 +673,11 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
673 q->elevator->ops->elevator_add_req_fn(q, rq); 673 q->elevator->ops->elevator_add_req_fn(q, rq);
674 break; 674 break;
675 675
676 case ELEVATOR_INSERT_FLUSH:
677 rq->cmd_flags |= REQ_SOFTBARRIER;
678 blk_insert_flush(rq);
679 break;
680
676 default: 681 default:
677 printk(KERN_ERR "%s: bad insertion point %d\n", 682 printk(KERN_ERR "%s: bad insertion point %d\n",
678 __func__, where); 683 __func__, where);
@@ -759,7 +764,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
759 if (e->ops->elevator_set_req_fn) 764 if (e->ops->elevator_set_req_fn)
760 return e->ops->elevator_set_req_fn(q, rq, gfp_mask); 765 return e->ops->elevator_set_req_fn(q, rq, gfp_mask);
761 766
762 rq->elevator_private = NULL; 767 rq->elevator_private[0] = NULL;
763 return 0; 768 return 0;
764} 769}
765 770
@@ -785,6 +790,8 @@ void elv_abort_queue(struct request_queue *q)
785{ 790{
786 struct request *rq; 791 struct request *rq;
787 792
793 blk_abort_flushes(q);
794
788 while (!list_empty(&q->queue_head)) { 795 while (!list_empty(&q->queue_head)) {
789 rq = list_entry_rq(q->queue_head.next); 796 rq = list_entry_rq(q->queue_head.next);
790 rq->cmd_flags |= REQ_QUIET; 797 rq->cmd_flags |= REQ_QUIET;