aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-09-03 05:56:16 -0400
committerJens Axboe <jaxboe@fusionio.com>2010-09-10 06:35:36 -0400
commit28e7d1845216538303bb95d679d8fd4de50e2f1a (patch)
tree0ef56dc0d7c894657c4ae71a3e8da6e1164fb933 /block
parentdd831006d5be7f74c3fe7aef82380c51c3637960 (diff)
block: drop barrier ordering by queue draining
Filesystems will take all the responsibilities for ordering requests around commit writes and will only indicate how the commit writes themselves should be handled by block layers. This patch drops barrier ordering by queue draining from block layer. Ordering by draining implementation was somewhat invasive to request handling. List of notable changes follow. * Each queue has 1 bit color which is flipped on each barrier issue. This is used to track whether a given request is issued before the current barrier or not. REQ_ORDERED_COLOR flag and coloring implementation in __elv_add_request() are removed. * Requests which shouldn't be processed yet for draining were stalled by returning -EAGAIN from blk_do_ordered() according to the test result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq(). This logic is removed. * Draining completion logic in elv_completed_request() removed. * All barrier sequence requests were queued to request queue and then trckled to lower layer according to progress and thus maintaining request orders during requeue was necessary. This is replaced by queueing the next request in the barrier sequence only after the current one is complete from blk_ordered_complete_seq(), which removes the need for multiple proxy requests in struct request_queue and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of elv_insert(). * As barriers no longer have ordering constraints, there's no need to dump the whole elevator onto the dispatch queue on each barrier. Insert barriers at the front instead. * If other barrier requests come to the front of the dispatch queue while one is already in progress, they are stored in q->pending_barriers and restored to dispatch queue one-by-one after each barrier completion from blk_ordered_complete_seq(). Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Christoph Hellwig <hch@infradead.org> Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-barrier.c220
-rw-r--r--block/blk-core.c11
-rw-r--r--block/blk.h2
-rw-r--r--block/elevator.c79
4 files changed, 105 insertions, 207 deletions
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index f1be85ba2bb5..e8b2e5c091b1 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -9,6 +9,8 @@
9 9
10#include "blk.h" 10#include "blk.h"
11 11
12static struct request *queue_next_ordseq(struct request_queue *q);
13
12/* 14/*
13 * Cache flushing for ordered writes handling 15 * Cache flushing for ordered writes handling
14 */ 16 */
@@ -19,38 +21,10 @@ unsigned blk_ordered_cur_seq(struct request_queue *q)
19 return 1 << ffz(q->ordseq); 21 return 1 << ffz(q->ordseq);
20} 22}
21 23
22unsigned blk_ordered_req_seq(struct request *rq) 24static struct request *blk_ordered_complete_seq(struct request_queue *q,
23{ 25 unsigned seq, int error)
24 struct request_queue *q = rq->q;
25
26 BUG_ON(q->ordseq == 0);
27
28 if (rq == &q->pre_flush_rq)
29 return QUEUE_ORDSEQ_PREFLUSH;
30 if (rq == &q->bar_rq)
31 return QUEUE_ORDSEQ_BAR;
32 if (rq == &q->post_flush_rq)
33 return QUEUE_ORDSEQ_POSTFLUSH;
34
35 /*
36 * !fs requests don't need to follow barrier ordering. Always
37 * put them at the front. This fixes the following deadlock.
38 *
39 * http://thread.gmane.org/gmane.linux.kernel/537473
40 */
41 if (rq->cmd_type != REQ_TYPE_FS)
42 return QUEUE_ORDSEQ_DRAIN;
43
44 if ((rq->cmd_flags & REQ_ORDERED_COLOR) ==
45 (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR))
46 return QUEUE_ORDSEQ_DRAIN;
47 else
48 return QUEUE_ORDSEQ_DONE;
49}
50
51bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
52{ 26{
53 struct request *rq; 27 struct request *next_rq = NULL;
54 28
55 if (error && !q->orderr) 29 if (error && !q->orderr)
56 q->orderr = error; 30 q->orderr = error;
@@ -58,16 +32,22 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error)
58 BUG_ON(q->ordseq & seq); 32 BUG_ON(q->ordseq & seq);
59 q->ordseq |= seq; 33 q->ordseq |= seq;
60 34
61 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) 35 if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) {
62 return false; 36 /* not complete yet, queue the next ordered sequence */
63 37 next_rq = queue_next_ordseq(q);
64 /* 38 } else {
65 * Okay, sequence complete. 39 /* complete this barrier request */
66 */ 40 __blk_end_request_all(q->orig_bar_rq, q->orderr);
67 q->ordseq = 0; 41 q->orig_bar_rq = NULL;
68 rq = q->orig_bar_rq; 42 q->ordseq = 0;
69 __blk_end_request_all(rq, q->orderr); 43
70 return true; 44 /* dispatch the next barrier if there's one */
45 if (!list_empty(&q->pending_barriers)) {
46 next_rq = list_entry_rq(q->pending_barriers.next);
47 list_move(&next_rq->queuelist, &q->queue_head);
48 }
49 }
50 return next_rq;
71} 51}
72 52
73static void pre_flush_end_io(struct request *rq, int error) 53static void pre_flush_end_io(struct request *rq, int error)
@@ -88,133 +68,105 @@ static void post_flush_end_io(struct request *rq, int error)
88 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); 68 blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
89} 69}
90 70
91static void queue_flush(struct request_queue *q, unsigned which) 71static void queue_flush(struct request_queue *q, struct request *rq,
72 rq_end_io_fn *end_io)
92{ 73{
93 struct request *rq;
94 rq_end_io_fn *end_io;
95
96 if (which == QUEUE_ORDERED_DO_PREFLUSH) {
97 rq = &q->pre_flush_rq;
98 end_io = pre_flush_end_io;
99 } else {
100 rq = &q->post_flush_rq;
101 end_io = post_flush_end_io;
102 }
103
104 blk_rq_init(q, rq); 74 blk_rq_init(q, rq);
105 rq->cmd_type = REQ_TYPE_FS; 75 rq->cmd_type = REQ_TYPE_FS;
106 rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; 76 rq->cmd_flags = REQ_FLUSH;
107 rq->rq_disk = q->orig_bar_rq->rq_disk; 77 rq->rq_disk = q->orig_bar_rq->rq_disk;
108 rq->end_io = end_io; 78 rq->end_io = end_io;
109 79
110 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 80 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
111} 81}
112 82
113static inline struct request *start_ordered(struct request_queue *q, 83static struct request *queue_next_ordseq(struct request_queue *q)
114 struct request *rq)
115{ 84{
116 unsigned skip = 0; 85 struct request *rq = &q->bar_rq;
117
118 q->orderr = 0;
119 q->ordered = q->next_ordered;
120 q->ordseq |= QUEUE_ORDSEQ_STARTED;
121
122 /*
123 * For an empty barrier, there's no actual BAR request, which
124 * in turn makes POSTFLUSH unnecessary. Mask them off.
125 */
126 if (!blk_rq_sectors(rq))
127 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
128 QUEUE_ORDERED_DO_POSTFLUSH);
129
130 /* stash away the original request */
131 blk_dequeue_request(rq);
132 q->orig_bar_rq = rq;
133 rq = NULL;
134
135 /*
136 * Queue ordered sequence. As we stack them at the head, we
137 * need to queue in reverse order. Note that we rely on that
138 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
139 * request gets inbetween ordered sequence.
140 */
141 if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) {
142 queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH);
143 rq = &q->post_flush_rq;
144 } else
145 skip |= QUEUE_ORDSEQ_POSTFLUSH;
146 86
147 if (q->ordered & QUEUE_ORDERED_DO_BAR) { 87 switch (blk_ordered_cur_seq(q)) {
148 rq = &q->bar_rq; 88 case QUEUE_ORDSEQ_PREFLUSH:
89 queue_flush(q, rq, pre_flush_end_io);
90 break;
149 91
92 case QUEUE_ORDSEQ_BAR:
150 /* initialize proxy request and queue it */ 93 /* initialize proxy request and queue it */
151 blk_rq_init(q, rq); 94 blk_rq_init(q, rq);
152 init_request_from_bio(rq, q->orig_bar_rq->bio); 95 init_request_from_bio(rq, q->orig_bar_rq->bio);
96 rq->cmd_flags &= ~REQ_HARDBARRIER;
153 if (q->ordered & QUEUE_ORDERED_DO_FUA) 97 if (q->ordered & QUEUE_ORDERED_DO_FUA)
154 rq->cmd_flags |= REQ_FUA; 98 rq->cmd_flags |= REQ_FUA;
155 rq->end_io = bar_end_io; 99 rq->end_io = bar_end_io;
156 100
157 elv_insert(q, rq, ELEVATOR_INSERT_FRONT); 101 elv_insert(q, rq, ELEVATOR_INSERT_FRONT);
158 } else 102 break;
159 skip |= QUEUE_ORDSEQ_BAR;
160 103
161 if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { 104 case QUEUE_ORDSEQ_POSTFLUSH:
162 queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); 105 queue_flush(q, rq, post_flush_end_io);
163 rq = &q->pre_flush_rq; 106 break;
164 } else
165 skip |= QUEUE_ORDSEQ_PREFLUSH;
166 107
167 if (queue_in_flight(q)) 108 default:
168 rq = NULL; 109 BUG();
169 else 110 }
170 skip |= QUEUE_ORDSEQ_DRAIN;
171
172 /*
173 * Complete skipped sequences. If whole sequence is complete,
174 * return %NULL to tell elevator that this request is gone.
175 */
176 if (blk_ordered_complete_seq(q, skip, 0))
177 rq = NULL;
178 return rq; 111 return rq;
179} 112}
180 113
181struct request *blk_do_ordered(struct request_queue *q, struct request *rq) 114struct request *blk_do_ordered(struct request_queue *q, struct request *rq)
182{ 115{
183 const int is_barrier = rq->cmd_type == REQ_TYPE_FS && 116 unsigned skip = 0;
184 (rq->cmd_flags & REQ_HARDBARRIER); 117
185 118 if (!(rq->cmd_flags & REQ_HARDBARRIER))
186 if (!q->ordseq) { 119 return rq;
187 if (!is_barrier) 120
188 return rq; 121 if (q->ordseq) {
189 122 /*
190 if (q->next_ordered != QUEUE_ORDERED_NONE) 123 * Barrier is already in progress and they can't be
191 return start_ordered(q, rq); 124 * processed in parallel. Queue for later processing.
192 else { 125 */
193 /* 126 list_move_tail(&rq->queuelist, &q->pending_barriers);
194 * Queue ordering not supported. Terminate 127 return NULL;
195 * with prejudice. 128 }
196 */ 129
197 blk_dequeue_request(rq); 130 if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) {
198 __blk_end_request_all(rq, -EOPNOTSUPP); 131 /*
199 return NULL; 132 * Queue ordering not supported. Terminate
200 } 133 * with prejudice.
134 */
135 blk_dequeue_request(rq);
136 __blk_end_request_all(rq, -EOPNOTSUPP);
137 return NULL;
201 } 138 }
202 139
203 /* 140 /*
204 * Ordered sequence in progress 141 * Start a new ordered sequence
205 */ 142 */
143 q->orderr = 0;
144 q->ordered = q->next_ordered;
145 q->ordseq |= QUEUE_ORDSEQ_STARTED;
206 146
207 /* Special requests are not subject to ordering rules. */ 147 /*
208 if (rq->cmd_type != REQ_TYPE_FS && 148 * For an empty barrier, there's no actual BAR request, which
209 rq != &q->pre_flush_rq && rq != &q->post_flush_rq) 149 * in turn makes POSTFLUSH unnecessary. Mask them off.
210 return rq; 150 */
151 if (!blk_rq_sectors(rq))
152 q->ordered &= ~(QUEUE_ORDERED_DO_BAR |
153 QUEUE_ORDERED_DO_POSTFLUSH);
211 154
212 /* Ordered by draining. Wait for turn. */ 155 /* stash away the original request */
213 WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); 156 blk_dequeue_request(rq);
214 if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) 157 q->orig_bar_rq = rq;
215 rq = ERR_PTR(-EAGAIN);
216 158
217 return rq; 159 if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH))
160 skip |= QUEUE_ORDSEQ_PREFLUSH;
161
162 if (!(q->ordered & QUEUE_ORDERED_DO_BAR))
163 skip |= QUEUE_ORDSEQ_BAR;
164
165 if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH))
166 skip |= QUEUE_ORDSEQ_POSTFLUSH;
167
168 /* complete skipped sequences and return the first sequence */
169 return blk_ordered_complete_seq(q, skip, 0);
218} 170}
219 171
220static void bio_end_empty_barrier(struct bio *bio, int err) 172static void bio_end_empty_barrier(struct bio *bio, int err)
diff --git a/block/blk-core.c b/block/blk-core.c
index f8d37a8e2c55..d316662682c8 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -520,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
520 init_timer(&q->unplug_timer); 520 init_timer(&q->unplug_timer);
521 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); 521 setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
522 INIT_LIST_HEAD(&q->timeout_list); 522 INIT_LIST_HEAD(&q->timeout_list);
523 INIT_LIST_HEAD(&q->pending_barriers);
523 INIT_WORK(&q->unplug_work, blk_unplug_work); 524 INIT_WORK(&q->unplug_work, blk_unplug_work);
524 525
525 kobject_init(&q->kobj, &blk_queue_ktype); 526 kobject_init(&q->kobj, &blk_queue_ktype);
@@ -1185,6 +1186,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1185 const bool sync = (bio->bi_rw & REQ_SYNC); 1186 const bool sync = (bio->bi_rw & REQ_SYNC);
1186 const bool unplug = (bio->bi_rw & REQ_UNPLUG); 1187 const bool unplug = (bio->bi_rw & REQ_UNPLUG);
1187 const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; 1188 const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1189 int where = ELEVATOR_INSERT_SORT;
1188 int rw_flags; 1190 int rw_flags;
1189 1191
1190 /* REQ_HARDBARRIER is no more */ 1192 /* REQ_HARDBARRIER is no more */
@@ -1203,7 +1205,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1203 1205
1204 spin_lock_irq(q->queue_lock); 1206 spin_lock_irq(q->queue_lock);
1205 1207
1206 if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) 1208 if (bio->bi_rw & REQ_HARDBARRIER) {
1209 where = ELEVATOR_INSERT_FRONT;
1210 goto get_rq;
1211 }
1212
1213 if (elv_queue_empty(q))
1207 goto get_rq; 1214 goto get_rq;
1208 1215
1209 el_ret = elv_merge(q, &req, bio); 1216 el_ret = elv_merge(q, &req, bio);
@@ -1303,7 +1310,7 @@ get_rq:
1303 1310
1304 /* insert the request into the elevator */ 1311 /* insert the request into the elevator */
1305 drive_stat_acct(req, 1); 1312 drive_stat_acct(req, 1);
1306 __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); 1313 __elv_add_request(q, req, where, 0);
1307out: 1314out:
1308 if (unplug || !queue_should_plug(q)) 1315 if (unplug || !queue_should_plug(q))
1309 __generic_unplug_device(q); 1316 __generic_unplug_device(q);
diff --git a/block/blk.h b/block/blk.h
index 874eb4ea8093..08081e4b294e 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -62,7 +62,7 @@ static inline struct request *__elv_next_request(struct request_queue *q)
62 rq = list_entry_rq(q->queue_head.next); 62 rq = list_entry_rq(q->queue_head.next);
63 rq = blk_do_ordered(q, rq); 63 rq = blk_do_ordered(q, rq);
64 if (rq) 64 if (rq)
65 return !IS_ERR(rq) ? rq : NULL; 65 return rq;
66 } 66 }
67 67
68 if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) 68 if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
diff --git a/block/elevator.c b/block/elevator.c
index ec585c9554d3..241c69c45c5f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q)
617 617
618void elv_insert(struct request_queue *q, struct request *rq, int where) 618void elv_insert(struct request_queue *q, struct request *rq, int where)
619{ 619{
620 struct list_head *pos;
621 unsigned ordseq;
622 int unplug_it = 1; 620 int unplug_it = 1;
623 621
624 trace_block_rq_insert(q, rq); 622 trace_block_rq_insert(q, rq);
@@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
626 rq->q = q; 624 rq->q = q;
627 625
628 switch (where) { 626 switch (where) {
627 case ELEVATOR_INSERT_REQUEUE:
628 /*
629 * Most requeues happen because of a busy condition,
630 * don't force unplug of the queue for that case.
631 * Clear unplug_it and fall through.
632 */
633 unplug_it = 0;
634
629 case ELEVATOR_INSERT_FRONT: 635 case ELEVATOR_INSERT_FRONT:
630 rq->cmd_flags |= REQ_SOFTBARRIER; 636 rq->cmd_flags |= REQ_SOFTBARRIER;
631
632 list_add(&rq->queuelist, &q->queue_head); 637 list_add(&rq->queuelist, &q->queue_head);
633 break; 638 break;
634 639
@@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
668 q->elevator->ops->elevator_add_req_fn(q, rq); 673 q->elevator->ops->elevator_add_req_fn(q, rq);
669 break; 674 break;
670 675
671 case ELEVATOR_INSERT_REQUEUE:
672 /*
673 * If ordered flush isn't in progress, we do front
674 * insertion; otherwise, requests should be requeued
675 * in ordseq order.
676 */
677 rq->cmd_flags |= REQ_SOFTBARRIER;
678
679 /*
680 * Most requeues happen because of a busy condition,
681 * don't force unplug of the queue for that case.
682 */
683 unplug_it = 0;
684
685 if (q->ordseq == 0) {
686 list_add(&rq->queuelist, &q->queue_head);
687 break;
688 }
689
690 ordseq = blk_ordered_req_seq(rq);
691
692 list_for_each(pos, &q->queue_head) {
693 struct request *pos_rq = list_entry_rq(pos);
694 if (ordseq <= blk_ordered_req_seq(pos_rq))
695 break;
696 }
697
698 list_add_tail(&rq->queuelist, pos);
699 break;
700
701 default: 676 default:
702 printk(KERN_ERR "%s: bad insertion point %d\n", 677 printk(KERN_ERR "%s: bad insertion point %d\n",
703 __func__, where); 678 __func__, where);
@@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where)
716void __elv_add_request(struct request_queue *q, struct request *rq, int where, 691void __elv_add_request(struct request_queue *q, struct request *rq, int where,
717 int plug) 692 int plug)
718{ 693{
719 if (q->ordcolor)
720 rq->cmd_flags |= REQ_ORDERED_COLOR;
721
722 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { 694 if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
723 /* 695 /* barriers are scheduling boundary, update end_sector */
724 * toggle ordered color
725 */
726 if (rq->cmd_flags & REQ_HARDBARRIER)
727 q->ordcolor ^= 1;
728
729 /*
730 * barriers implicitly indicate back insertion
731 */
732 if (where == ELEVATOR_INSERT_SORT)
733 where = ELEVATOR_INSERT_BACK;
734
735 /*
736 * this request is scheduling boundary, update
737 * end_sector
738 */
739 if (rq->cmd_type == REQ_TYPE_FS || 696 if (rq->cmd_type == REQ_TYPE_FS ||
740 (rq->cmd_flags & REQ_DISCARD)) { 697 (rq->cmd_flags & REQ_DISCARD)) {
741 q->end_sector = rq_end_sector(rq); 698 q->end_sector = rq_end_sector(rq);
@@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq)
855 e->ops->elevator_completed_req_fn) 812 e->ops->elevator_completed_req_fn)
856 e->ops->elevator_completed_req_fn(q, rq); 813 e->ops->elevator_completed_req_fn(q, rq);
857 } 814 }
858
859 /*
860 * Check if the queue is waiting for fs requests to be
861 * drained for flush sequence.
862 */
863 if (unlikely(q->ordseq)) {
864 struct request *next = NULL;
865
866 if (!list_empty(&q->queue_head))
867 next = list_entry_rq(q->queue_head.next);
868
869 if (!queue_in_flight(q) &&
870 blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
871 (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) {
872 blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
873 __blk_run_queue(q);
874 }
875 }
876} 815}
877 816
878#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) 817#define to_elv(atr) container_of((atr), struct elv_fs_entry, attr)