diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 20 | ||||
-rw-r--r-- | block/blk-cgroup.h | 14 | ||||
-rw-r--r-- | block/blk-core.c | 737 | ||||
-rw-r--r-- | block/blk-exec.c | 4 | ||||
-rw-r--r-- | block/blk-flush.c | 441 | ||||
-rw-r--r-- | block/blk-integrity.c | 12 | ||||
-rw-r--r-- | block/blk-lib.c | 2 | ||||
-rw-r--r-- | block/blk-merge.c | 6 | ||||
-rw-r--r-- | block/blk-settings.c | 15 | ||||
-rw-r--r-- | block/blk-sysfs.c | 13 | ||||
-rw-r--r-- | block/blk-throttle.c | 143 | ||||
-rw-r--r-- | block/blk.h | 18 | ||||
-rw-r--r-- | block/cfq-iosched.c | 191 | ||||
-rw-r--r-- | block/cfq.h | 6 | ||||
-rw-r--r-- | block/deadline-iosched.c | 9 | ||||
-rw-r--r-- | block/elevator.c | 142 | ||||
-rw-r--r-- | block/genhd.c | 28 | ||||
-rw-r--r-- | block/noop-iosched.c | 8 |
18 files changed, 1082 insertions, 727 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 455768a3eb9e..f0605ab2a761 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -371,12 +371,14 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | |||
371 | } | 371 | } |
372 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); | 372 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); |
373 | 373 | ||
374 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) | 374 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time, |
375 | unsigned long unaccounted_time) | ||
375 | { | 376 | { |
376 | unsigned long flags; | 377 | unsigned long flags; |
377 | 378 | ||
378 | spin_lock_irqsave(&blkg->stats_lock, flags); | 379 | spin_lock_irqsave(&blkg->stats_lock, flags); |
379 | blkg->stats.time += time; | 380 | blkg->stats.time += time; |
381 | blkg->stats.unaccounted_time += unaccounted_time; | ||
380 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | 382 | spin_unlock_irqrestore(&blkg->stats_lock, flags); |
381 | } | 383 | } |
382 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); | 384 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); |
@@ -604,6 +606,9 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, | |||
604 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | 606 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, |
605 | blkg->stats.sectors, cb, dev); | 607 | blkg->stats.sectors, cb, dev); |
606 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 608 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
609 | if (type == BLKIO_STAT_UNACCOUNTED_TIME) | ||
610 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
611 | blkg->stats.unaccounted_time, cb, dev); | ||
607 | if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { | 612 | if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { |
608 | uint64_t sum = blkg->stats.avg_queue_size_sum; | 613 | uint64_t sum = blkg->stats.avg_queue_size_sum; |
609 | uint64_t samples = blkg->stats.avg_queue_size_samples; | 614 | uint64_t samples = blkg->stats.avg_queue_size_samples; |
@@ -863,7 +868,7 @@ static void blkio_update_policy_rule(struct blkio_policy_node *oldpn, | |||
863 | } | 868 | } |
864 | 869 | ||
865 | /* | 870 | /* |
866 | * Some rules/values in blkg have changed. Propogate those to respective | 871 | * Some rules/values in blkg have changed. Propagate those to respective |
867 | * policies. | 872 | * policies. |
868 | */ | 873 | */ |
869 | static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg, | 874 | static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg, |
@@ -898,7 +903,7 @@ static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg, | |||
898 | } | 903 | } |
899 | 904 | ||
900 | /* | 905 | /* |
901 | * A policy node rule has been updated. Propogate this update to all the | 906 | * A policy node rule has been updated. Propagate this update to all the |
902 | * block groups which might be affected by this update. | 907 | * block groups which might be affected by this update. |
903 | */ | 908 | */ |
904 | static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg, | 909 | static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg, |
@@ -1125,6 +1130,9 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft, | |||
1125 | return blkio_read_blkg_stats(blkcg, cft, cb, | 1130 | return blkio_read_blkg_stats(blkcg, cft, cb, |
1126 | BLKIO_STAT_QUEUED, 1); | 1131 | BLKIO_STAT_QUEUED, 1); |
1127 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 1132 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
1133 | case BLKIO_PROP_unaccounted_time: | ||
1134 | return blkio_read_blkg_stats(blkcg, cft, cb, | ||
1135 | BLKIO_STAT_UNACCOUNTED_TIME, 0); | ||
1128 | case BLKIO_PROP_dequeue: | 1136 | case BLKIO_PROP_dequeue: |
1129 | return blkio_read_blkg_stats(blkcg, cft, cb, | 1137 | return blkio_read_blkg_stats(blkcg, cft, cb, |
1130 | BLKIO_STAT_DEQUEUE, 0); | 1138 | BLKIO_STAT_DEQUEUE, 0); |
@@ -1382,6 +1390,12 @@ struct cftype blkio_files[] = { | |||
1382 | BLKIO_PROP_dequeue), | 1390 | BLKIO_PROP_dequeue), |
1383 | .read_map = blkiocg_file_read_map, | 1391 | .read_map = blkiocg_file_read_map, |
1384 | }, | 1392 | }, |
1393 | { | ||
1394 | .name = "unaccounted_time", | ||
1395 | .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, | ||
1396 | BLKIO_PROP_unaccounted_time), | ||
1397 | .read_map = blkiocg_file_read_map, | ||
1398 | }, | ||
1385 | #endif | 1399 | #endif |
1386 | }; | 1400 | }; |
1387 | 1401 | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index ea4861bdd549..10919fae2d3a 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -49,6 +49,8 @@ enum stat_type { | |||
49 | /* All the single valued stats go below this */ | 49 | /* All the single valued stats go below this */ |
50 | BLKIO_STAT_TIME, | 50 | BLKIO_STAT_TIME, |
51 | BLKIO_STAT_SECTORS, | 51 | BLKIO_STAT_SECTORS, |
52 | /* Time not charged to this cgroup */ | ||
53 | BLKIO_STAT_UNACCOUNTED_TIME, | ||
52 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 54 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
53 | BLKIO_STAT_AVG_QUEUE_SIZE, | 55 | BLKIO_STAT_AVG_QUEUE_SIZE, |
54 | BLKIO_STAT_IDLE_TIME, | 56 | BLKIO_STAT_IDLE_TIME, |
@@ -81,6 +83,7 @@ enum blkcg_file_name_prop { | |||
81 | BLKIO_PROP_io_serviced, | 83 | BLKIO_PROP_io_serviced, |
82 | BLKIO_PROP_time, | 84 | BLKIO_PROP_time, |
83 | BLKIO_PROP_sectors, | 85 | BLKIO_PROP_sectors, |
86 | BLKIO_PROP_unaccounted_time, | ||
84 | BLKIO_PROP_io_service_time, | 87 | BLKIO_PROP_io_service_time, |
85 | BLKIO_PROP_io_wait_time, | 88 | BLKIO_PROP_io_wait_time, |
86 | BLKIO_PROP_io_merged, | 89 | BLKIO_PROP_io_merged, |
@@ -114,6 +117,8 @@ struct blkio_group_stats { | |||
114 | /* total disk time and nr sectors dispatched by this group */ | 117 | /* total disk time and nr sectors dispatched by this group */ |
115 | uint64_t time; | 118 | uint64_t time; |
116 | uint64_t sectors; | 119 | uint64_t sectors; |
120 | /* Time not charged to this cgroup */ | ||
121 | uint64_t unaccounted_time; | ||
117 | uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL]; | 122 | uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL]; |
118 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 123 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
119 | /* Sum of number of IOs queued across all samples */ | 124 | /* Sum of number of IOs queued across all samples */ |
@@ -240,7 +245,7 @@ static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } | |||
240 | 245 | ||
241 | #endif | 246 | #endif |
242 | 247 | ||
243 | #define BLKIO_WEIGHT_MIN 100 | 248 | #define BLKIO_WEIGHT_MIN 10 |
244 | #define BLKIO_WEIGHT_MAX 1000 | 249 | #define BLKIO_WEIGHT_MAX 1000 |
245 | #define BLKIO_WEIGHT_DEFAULT 500 | 250 | #define BLKIO_WEIGHT_DEFAULT 500 |
246 | 251 | ||
@@ -293,7 +298,8 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg); | |||
293 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | 298 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, |
294 | void *key); | 299 | void *key); |
295 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, | 300 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
296 | unsigned long time); | 301 | unsigned long time, |
302 | unsigned long unaccounted_time); | ||
297 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes, | 303 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes, |
298 | bool direction, bool sync); | 304 | bool direction, bool sync); |
299 | void blkiocg_update_completion_stats(struct blkio_group *blkg, | 305 | void blkiocg_update_completion_stats(struct blkio_group *blkg, |
@@ -319,7 +325,9 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } | |||
319 | static inline struct blkio_group * | 325 | static inline struct blkio_group * |
320 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } | 326 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } |
321 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, | 327 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
322 | unsigned long time) {} | 328 | unsigned long time, |
329 | unsigned long unaccounted_time) | ||
330 | {} | ||
323 | static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | 331 | static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg, |
324 | uint64_t bytes, bool direction, bool sync) {} | 332 | uint64_t bytes, bool direction, bool sync) {} |
325 | static inline void blkiocg_update_completion_stats(struct blkio_group *blkg, | 333 | static inline void blkiocg_update_completion_stats(struct blkio_group *blkg, |
diff --git a/block/blk-core.c b/block/blk-core.c index a63336d49f30..a2e58eeb3549 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/task_io_accounting_ops.h> | 28 | #include <linux/task_io_accounting_ops.h> |
29 | #include <linux/fault-inject.h> | 29 | #include <linux/fault-inject.h> |
30 | #include <linux/list_sort.h> | ||
30 | 31 | ||
31 | #define CREATE_TRACE_POINTS | 32 | #define CREATE_TRACE_POINTS |
32 | #include <trace/events/block.h> | 33 | #include <trace/events/block.h> |
@@ -149,39 +150,29 @@ EXPORT_SYMBOL(blk_rq_init); | |||
149 | static void req_bio_endio(struct request *rq, struct bio *bio, | 150 | static void req_bio_endio(struct request *rq, struct bio *bio, |
150 | unsigned int nbytes, int error) | 151 | unsigned int nbytes, int error) |
151 | { | 152 | { |
152 | struct request_queue *q = rq->q; | 153 | if (error) |
153 | 154 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | |
154 | if (&q->flush_rq != rq) { | 155 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
155 | if (error) | 156 | error = -EIO; |
156 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
157 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
158 | error = -EIO; | ||
159 | 157 | ||
160 | if (unlikely(nbytes > bio->bi_size)) { | 158 | if (unlikely(nbytes > bio->bi_size)) { |
161 | printk(KERN_ERR "%s: want %u bytes done, %u left\n", | 159 | printk(KERN_ERR "%s: want %u bytes done, %u left\n", |
162 | __func__, nbytes, bio->bi_size); | 160 | __func__, nbytes, bio->bi_size); |
163 | nbytes = bio->bi_size; | 161 | nbytes = bio->bi_size; |
164 | } | 162 | } |
165 | 163 | ||
166 | if (unlikely(rq->cmd_flags & REQ_QUIET)) | 164 | if (unlikely(rq->cmd_flags & REQ_QUIET)) |
167 | set_bit(BIO_QUIET, &bio->bi_flags); | 165 | set_bit(BIO_QUIET, &bio->bi_flags); |
168 | 166 | ||
169 | bio->bi_size -= nbytes; | 167 | bio->bi_size -= nbytes; |
170 | bio->bi_sector += (nbytes >> 9); | 168 | bio->bi_sector += (nbytes >> 9); |
171 | 169 | ||
172 | if (bio_integrity(bio)) | 170 | if (bio_integrity(bio)) |
173 | bio_integrity_advance(bio, nbytes); | 171 | bio_integrity_advance(bio, nbytes); |
174 | 172 | ||
175 | if (bio->bi_size == 0) | 173 | /* don't actually finish bio if it's part of flush sequence */ |
176 | bio_endio(bio, error); | 174 | if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) |
177 | } else { | 175 | bio_endio(bio, error); |
178 | /* | ||
179 | * Okay, this is the sequenced flush request in | ||
180 | * progress, just record the error; | ||
181 | */ | ||
182 | if (error && !q->flush_err) | ||
183 | q->flush_err = error; | ||
184 | } | ||
185 | } | 176 | } |
186 | 177 | ||
187 | void blk_dump_rq_flags(struct request *rq, char *msg) | 178 | void blk_dump_rq_flags(struct request *rq, char *msg) |
@@ -207,136 +198,32 @@ void blk_dump_rq_flags(struct request *rq, char *msg) | |||
207 | } | 198 | } |
208 | EXPORT_SYMBOL(blk_dump_rq_flags); | 199 | EXPORT_SYMBOL(blk_dump_rq_flags); |
209 | 200 | ||
210 | /* | 201 | static void blk_delay_work(struct work_struct *work) |
211 | * "plug" the device if there are no outstanding requests: this will | ||
212 | * force the transfer to start only after we have put all the requests | ||
213 | * on the list. | ||
214 | * | ||
215 | * This is called with interrupts off and no requests on the queue and | ||
216 | * with the queue lock held. | ||
217 | */ | ||
218 | void blk_plug_device(struct request_queue *q) | ||
219 | { | 202 | { |
220 | WARN_ON(!irqs_disabled()); | 203 | struct request_queue *q; |
221 | |||
222 | /* | ||
223 | * don't plug a stopped queue, it must be paired with blk_start_queue() | ||
224 | * which will restart the queueing | ||
225 | */ | ||
226 | if (blk_queue_stopped(q)) | ||
227 | return; | ||
228 | 204 | ||
229 | if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { | 205 | q = container_of(work, struct request_queue, delay_work.work); |
230 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); | 206 | spin_lock_irq(q->queue_lock); |
231 | trace_block_plug(q); | 207 | __blk_run_queue(q); |
232 | } | 208 | spin_unlock_irq(q->queue_lock); |
233 | } | 209 | } |
234 | EXPORT_SYMBOL(blk_plug_device); | ||
235 | 210 | ||
236 | /** | 211 | /** |
237 | * blk_plug_device_unlocked - plug a device without queue lock held | 212 | * blk_delay_queue - restart queueing after defined interval |
238 | * @q: The &struct request_queue to plug | 213 | * @q: The &struct request_queue in question |
214 | * @msecs: Delay in msecs | ||
239 | * | 215 | * |
240 | * Description: | 216 | * Description: |
241 | * Like @blk_plug_device(), but grabs the queue lock and disables | 217 | * Sometimes queueing needs to be postponed for a little while, to allow |
242 | * interrupts. | 218 | * resources to come back. This function will make sure that queueing is |
243 | **/ | 219 | * restarted around the specified time. |
244 | void blk_plug_device_unlocked(struct request_queue *q) | ||
245 | { | ||
246 | unsigned long flags; | ||
247 | |||
248 | spin_lock_irqsave(q->queue_lock, flags); | ||
249 | blk_plug_device(q); | ||
250 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
251 | } | ||
252 | EXPORT_SYMBOL(blk_plug_device_unlocked); | ||
253 | |||
254 | /* | ||
255 | * remove the queue from the plugged list, if present. called with | ||
256 | * queue lock held and interrupts disabled. | ||
257 | */ | ||
258 | int blk_remove_plug(struct request_queue *q) | ||
259 | { | ||
260 | WARN_ON(!irqs_disabled()); | ||
261 | |||
262 | if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) | ||
263 | return 0; | ||
264 | |||
265 | del_timer(&q->unplug_timer); | ||
266 | return 1; | ||
267 | } | ||
268 | EXPORT_SYMBOL(blk_remove_plug); | ||
269 | |||
270 | /* | ||
271 | * remove the plug and let it rip.. | ||
272 | */ | 220 | */ |
273 | void __generic_unplug_device(struct request_queue *q) | 221 | void blk_delay_queue(struct request_queue *q, unsigned long msecs) |
274 | { | 222 | { |
275 | if (unlikely(blk_queue_stopped(q))) | 223 | queue_delayed_work(kblockd_workqueue, &q->delay_work, |
276 | return; | 224 | msecs_to_jiffies(msecs)); |
277 | if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) | ||
278 | return; | ||
279 | |||
280 | q->request_fn(q); | ||
281 | } | 225 | } |
282 | 226 | EXPORT_SYMBOL(blk_delay_queue); | |
283 | /** | ||
284 | * generic_unplug_device - fire a request queue | ||
285 | * @q: The &struct request_queue in question | ||
286 | * | ||
287 | * Description: | ||
288 | * Linux uses plugging to build bigger requests queues before letting | ||
289 | * the device have at them. If a queue is plugged, the I/O scheduler | ||
290 | * is still adding and merging requests on the queue. Once the queue | ||
291 | * gets unplugged, the request_fn defined for the queue is invoked and | ||
292 | * transfers started. | ||
293 | **/ | ||
294 | void generic_unplug_device(struct request_queue *q) | ||
295 | { | ||
296 | if (blk_queue_plugged(q)) { | ||
297 | spin_lock_irq(q->queue_lock); | ||
298 | __generic_unplug_device(q); | ||
299 | spin_unlock_irq(q->queue_lock); | ||
300 | } | ||
301 | } | ||
302 | EXPORT_SYMBOL(generic_unplug_device); | ||
303 | |||
304 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, | ||
305 | struct page *page) | ||
306 | { | ||
307 | struct request_queue *q = bdi->unplug_io_data; | ||
308 | |||
309 | blk_unplug(q); | ||
310 | } | ||
311 | |||
312 | void blk_unplug_work(struct work_struct *work) | ||
313 | { | ||
314 | struct request_queue *q = | ||
315 | container_of(work, struct request_queue, unplug_work); | ||
316 | |||
317 | trace_block_unplug_io(q); | ||
318 | q->unplug_fn(q); | ||
319 | } | ||
320 | |||
321 | void blk_unplug_timeout(unsigned long data) | ||
322 | { | ||
323 | struct request_queue *q = (struct request_queue *)data; | ||
324 | |||
325 | trace_block_unplug_timer(q); | ||
326 | kblockd_schedule_work(q, &q->unplug_work); | ||
327 | } | ||
328 | |||
329 | void blk_unplug(struct request_queue *q) | ||
330 | { | ||
331 | /* | ||
332 | * devices don't necessarily have an ->unplug_fn defined | ||
333 | */ | ||
334 | if (q->unplug_fn) { | ||
335 | trace_block_unplug_io(q); | ||
336 | q->unplug_fn(q); | ||
337 | } | ||
338 | } | ||
339 | EXPORT_SYMBOL(blk_unplug); | ||
340 | 227 | ||
341 | /** | 228 | /** |
342 | * blk_start_queue - restart a previously stopped queue | 229 | * blk_start_queue - restart a previously stopped queue |
@@ -352,7 +239,7 @@ void blk_start_queue(struct request_queue *q) | |||
352 | WARN_ON(!irqs_disabled()); | 239 | WARN_ON(!irqs_disabled()); |
353 | 240 | ||
354 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); | 241 | queue_flag_clear(QUEUE_FLAG_STOPPED, q); |
355 | __blk_run_queue(q, false); | 242 | __blk_run_queue(q); |
356 | } | 243 | } |
357 | EXPORT_SYMBOL(blk_start_queue); | 244 | EXPORT_SYMBOL(blk_start_queue); |
358 | 245 | ||
@@ -372,7 +259,7 @@ EXPORT_SYMBOL(blk_start_queue); | |||
372 | **/ | 259 | **/ |
373 | void blk_stop_queue(struct request_queue *q) | 260 | void blk_stop_queue(struct request_queue *q) |
374 | { | 261 | { |
375 | blk_remove_plug(q); | 262 | __cancel_delayed_work(&q->delay_work); |
376 | queue_flag_set(QUEUE_FLAG_STOPPED, q); | 263 | queue_flag_set(QUEUE_FLAG_STOPPED, q); |
377 | } | 264 | } |
378 | EXPORT_SYMBOL(blk_stop_queue); | 265 | EXPORT_SYMBOL(blk_stop_queue); |
@@ -390,51 +277,51 @@ EXPORT_SYMBOL(blk_stop_queue); | |||
390 | * that its ->make_request_fn will not re-add plugging prior to calling | 277 | * that its ->make_request_fn will not re-add plugging prior to calling |
391 | * this function. | 278 | * this function. |
392 | * | 279 | * |
280 | * This function does not cancel any asynchronous activity arising | ||
281 | * out of elevator or throttling code. That would require elevaotor_exit() | ||
282 | * and blk_throtl_exit() to be called with queue lock initialized. | ||
283 | * | ||
393 | */ | 284 | */ |
394 | void blk_sync_queue(struct request_queue *q) | 285 | void blk_sync_queue(struct request_queue *q) |
395 | { | 286 | { |
396 | del_timer_sync(&q->unplug_timer); | ||
397 | del_timer_sync(&q->timeout); | 287 | del_timer_sync(&q->timeout); |
398 | cancel_work_sync(&q->unplug_work); | 288 | cancel_delayed_work_sync(&q->delay_work); |
399 | throtl_shutdown_timer_wq(q); | ||
400 | } | 289 | } |
401 | EXPORT_SYMBOL(blk_sync_queue); | 290 | EXPORT_SYMBOL(blk_sync_queue); |
402 | 291 | ||
403 | /** | 292 | /** |
404 | * __blk_run_queue - run a single device queue | 293 | * __blk_run_queue - run a single device queue |
405 | * @q: The queue to run | 294 | * @q: The queue to run |
406 | * @force_kblockd: Don't run @q->request_fn directly. Use kblockd. | ||
407 | * | 295 | * |
408 | * Description: | 296 | * Description: |
409 | * See @blk_run_queue. This variant must be called with the queue lock | 297 | * See @blk_run_queue. This variant must be called with the queue lock |
410 | * held and interrupts disabled. | 298 | * held and interrupts disabled. |
411 | * | ||
412 | */ | 299 | */ |
413 | void __blk_run_queue(struct request_queue *q, bool force_kblockd) | 300 | void __blk_run_queue(struct request_queue *q) |
414 | { | 301 | { |
415 | blk_remove_plug(q); | ||
416 | |||
417 | if (unlikely(blk_queue_stopped(q))) | 302 | if (unlikely(blk_queue_stopped(q))) |
418 | return; | 303 | return; |
419 | 304 | ||
420 | if (elv_queue_empty(q)) | 305 | q->request_fn(q); |
421 | return; | ||
422 | |||
423 | /* | ||
424 | * Only recurse once to avoid overrunning the stack, let the unplug | ||
425 | * handling reinvoke the handler shortly if we already got there. | ||
426 | */ | ||
427 | if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { | ||
428 | q->request_fn(q); | ||
429 | queue_flag_clear(QUEUE_FLAG_REENTER, q); | ||
430 | } else { | ||
431 | queue_flag_set(QUEUE_FLAG_PLUGGED, q); | ||
432 | kblockd_schedule_work(q, &q->unplug_work); | ||
433 | } | ||
434 | } | 306 | } |
435 | EXPORT_SYMBOL(__blk_run_queue); | 307 | EXPORT_SYMBOL(__blk_run_queue); |
436 | 308 | ||
437 | /** | 309 | /** |
310 | * blk_run_queue_async - run a single device queue in workqueue context | ||
311 | * @q: The queue to run | ||
312 | * | ||
313 | * Description: | ||
314 | * Tells kblockd to perform the equivalent of @blk_run_queue on behalf | ||
315 | * of us. | ||
316 | */ | ||
317 | void blk_run_queue_async(struct request_queue *q) | ||
318 | { | ||
319 | if (likely(!blk_queue_stopped(q))) | ||
320 | queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); | ||
321 | } | ||
322 | EXPORT_SYMBOL(blk_run_queue_async); | ||
323 | |||
324 | /** | ||
438 | * blk_run_queue - run a single device queue | 325 | * blk_run_queue - run a single device queue |
439 | * @q: The queue to run | 326 | * @q: The queue to run |
440 | * | 327 | * |
@@ -447,7 +334,7 @@ void blk_run_queue(struct request_queue *q) | |||
447 | unsigned long flags; | 334 | unsigned long flags; |
448 | 335 | ||
449 | spin_lock_irqsave(q->queue_lock, flags); | 336 | spin_lock_irqsave(q->queue_lock, flags); |
450 | __blk_run_queue(q, false); | 337 | __blk_run_queue(q); |
451 | spin_unlock_irqrestore(q->queue_lock, flags); | 338 | spin_unlock_irqrestore(q->queue_lock, flags); |
452 | } | 339 | } |
453 | EXPORT_SYMBOL(blk_run_queue); | 340 | EXPORT_SYMBOL(blk_run_queue); |
@@ -457,6 +344,11 @@ void blk_put_queue(struct request_queue *q) | |||
457 | kobject_put(&q->kobj); | 344 | kobject_put(&q->kobj); |
458 | } | 345 | } |
459 | 346 | ||
347 | /* | ||
348 | * Note: If a driver supplied the queue lock, it should not zap that lock | ||
349 | * unexpectedly as some queue cleanup components like elevator_exit() and | ||
350 | * blk_throtl_exit() need queue lock. | ||
351 | */ | ||
460 | void blk_cleanup_queue(struct request_queue *q) | 352 | void blk_cleanup_queue(struct request_queue *q) |
461 | { | 353 | { |
462 | /* | 354 | /* |
@@ -475,6 +367,8 @@ void blk_cleanup_queue(struct request_queue *q) | |||
475 | if (q->elevator) | 367 | if (q->elevator) |
476 | elevator_exit(q->elevator); | 368 | elevator_exit(q->elevator); |
477 | 369 | ||
370 | blk_throtl_exit(q); | ||
371 | |||
478 | blk_put_queue(q); | 372 | blk_put_queue(q); |
479 | } | 373 | } |
480 | EXPORT_SYMBOL(blk_cleanup_queue); | 374 | EXPORT_SYMBOL(blk_cleanup_queue); |
@@ -517,8 +411,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
517 | if (!q) | 411 | if (!q) |
518 | return NULL; | 412 | return NULL; |
519 | 413 | ||
520 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; | ||
521 | q->backing_dev_info.unplug_io_data = q; | ||
522 | q->backing_dev_info.ra_pages = | 414 | q->backing_dev_info.ra_pages = |
523 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 415 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
524 | q->backing_dev_info.state = 0; | 416 | q->backing_dev_info.state = 0; |
@@ -538,17 +430,24 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
538 | 430 | ||
539 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, | 431 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, |
540 | laptop_mode_timer_fn, (unsigned long) q); | 432 | laptop_mode_timer_fn, (unsigned long) q); |
541 | init_timer(&q->unplug_timer); | ||
542 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | 433 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
543 | INIT_LIST_HEAD(&q->timeout_list); | 434 | INIT_LIST_HEAD(&q->timeout_list); |
544 | INIT_LIST_HEAD(&q->pending_flushes); | 435 | INIT_LIST_HEAD(&q->flush_queue[0]); |
545 | INIT_WORK(&q->unplug_work, blk_unplug_work); | 436 | INIT_LIST_HEAD(&q->flush_queue[1]); |
437 | INIT_LIST_HEAD(&q->flush_data_in_flight); | ||
438 | INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); | ||
546 | 439 | ||
547 | kobject_init(&q->kobj, &blk_queue_ktype); | 440 | kobject_init(&q->kobj, &blk_queue_ktype); |
548 | 441 | ||
549 | mutex_init(&q->sysfs_lock); | 442 | mutex_init(&q->sysfs_lock); |
550 | spin_lock_init(&q->__queue_lock); | 443 | spin_lock_init(&q->__queue_lock); |
551 | 444 | ||
445 | /* | ||
446 | * By default initialize queue_lock to internal lock and driver can | ||
447 | * override it later if need be. | ||
448 | */ | ||
449 | q->queue_lock = &q->__queue_lock; | ||
450 | |||
552 | return q; | 451 | return q; |
553 | } | 452 | } |
554 | EXPORT_SYMBOL(blk_alloc_queue_node); | 453 | EXPORT_SYMBOL(blk_alloc_queue_node); |
@@ -631,9 +530,11 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, | |||
631 | q->request_fn = rfn; | 530 | q->request_fn = rfn; |
632 | q->prep_rq_fn = NULL; | 531 | q->prep_rq_fn = NULL; |
633 | q->unprep_rq_fn = NULL; | 532 | q->unprep_rq_fn = NULL; |
634 | q->unplug_fn = generic_unplug_device; | ||
635 | q->queue_flags = QUEUE_FLAG_DEFAULT; | 533 | q->queue_flags = QUEUE_FLAG_DEFAULT; |
636 | q->queue_lock = lock; | 534 | |
535 | /* Override internal queue lock with supplied lock pointer */ | ||
536 | if (lock) | ||
537 | q->queue_lock = lock; | ||
637 | 538 | ||
638 | /* | 539 | /* |
639 | * This also sets hw/phys segments, boundary and size | 540 | * This also sets hw/phys segments, boundary and size |
@@ -666,6 +567,8 @@ int blk_get_queue(struct request_queue *q) | |||
666 | 567 | ||
667 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 568 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
668 | { | 569 | { |
570 | BUG_ON(rq->cmd_flags & REQ_ON_PLUG); | ||
571 | |||
669 | if (rq->cmd_flags & REQ_ELVPRIV) | 572 | if (rq->cmd_flags & REQ_ELVPRIV) |
670 | elv_put_request(q, rq); | 573 | elv_put_request(q, rq); |
671 | mempool_free(rq, q->rq.rq_pool); | 574 | mempool_free(rq, q->rq.rq_pool); |
@@ -762,6 +665,25 @@ static void freed_request(struct request_queue *q, int sync, int priv) | |||
762 | } | 665 | } |
763 | 666 | ||
764 | /* | 667 | /* |
668 | * Determine if elevator data should be initialized when allocating the | ||
669 | * request associated with @bio. | ||
670 | */ | ||
671 | static bool blk_rq_should_init_elevator(struct bio *bio) | ||
672 | { | ||
673 | if (!bio) | ||
674 | return true; | ||
675 | |||
676 | /* | ||
677 | * Flush requests do not use the elevator so skip initialization. | ||
678 | * This allows a request to share the flush and elevator data. | ||
679 | */ | ||
680 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) | ||
681 | return false; | ||
682 | |||
683 | return true; | ||
684 | } | ||
685 | |||
686 | /* | ||
765 | * Get a free request, queue_lock must be held. | 687 | * Get a free request, queue_lock must be held. |
766 | * Returns NULL on failure, with queue_lock held. | 688 | * Returns NULL on failure, with queue_lock held. |
767 | * Returns !NULL on success, with queue_lock *not held*. | 689 | * Returns !NULL on success, with queue_lock *not held*. |
@@ -773,7 +695,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
773 | struct request_list *rl = &q->rq; | 695 | struct request_list *rl = &q->rq; |
774 | struct io_context *ioc = NULL; | 696 | struct io_context *ioc = NULL; |
775 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 697 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
776 | int may_queue, priv; | 698 | int may_queue, priv = 0; |
777 | 699 | ||
778 | may_queue = elv_may_queue(q, rw_flags); | 700 | may_queue = elv_may_queue(q, rw_flags); |
779 | if (may_queue == ELV_MQUEUE_NO) | 701 | if (may_queue == ELV_MQUEUE_NO) |
@@ -817,9 +739,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
817 | rl->count[is_sync]++; | 739 | rl->count[is_sync]++; |
818 | rl->starved[is_sync] = 0; | 740 | rl->starved[is_sync] = 0; |
819 | 741 | ||
820 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 742 | if (blk_rq_should_init_elevator(bio)) { |
821 | if (priv) | 743 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
822 | rl->elvpriv++; | 744 | if (priv) |
745 | rl->elvpriv++; | ||
746 | } | ||
823 | 747 | ||
824 | if (blk_queue_io_stat(q)) | 748 | if (blk_queue_io_stat(q)) |
825 | rw_flags |= REQ_IO_STAT; | 749 | rw_flags |= REQ_IO_STAT; |
@@ -866,8 +790,8 @@ out: | |||
866 | } | 790 | } |
867 | 791 | ||
868 | /* | 792 | /* |
869 | * No available requests for this queue, unplug the device and wait for some | 793 | * No available requests for this queue, wait for some requests to become |
870 | * requests to become available. | 794 | * available. |
871 | * | 795 | * |
872 | * Called with q->queue_lock held, and returns with it unlocked. | 796 | * Called with q->queue_lock held, and returns with it unlocked. |
873 | */ | 797 | */ |
@@ -888,7 +812,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, | |||
888 | 812 | ||
889 | trace_block_sleeprq(q, bio, rw_flags & 1); | 813 | trace_block_sleeprq(q, bio, rw_flags & 1); |
890 | 814 | ||
891 | __generic_unplug_device(q); | ||
892 | spin_unlock_irq(q->queue_lock); | 815 | spin_unlock_irq(q->queue_lock); |
893 | io_schedule(); | 816 | io_schedule(); |
894 | 817 | ||
@@ -1010,6 +933,13 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) | |||
1010 | } | 933 | } |
1011 | EXPORT_SYMBOL(blk_requeue_request); | 934 | EXPORT_SYMBOL(blk_requeue_request); |
1012 | 935 | ||
936 | static void add_acct_request(struct request_queue *q, struct request *rq, | ||
937 | int where) | ||
938 | { | ||
939 | drive_stat_acct(rq, 1); | ||
940 | __elv_add_request(q, rq, where); | ||
941 | } | ||
942 | |||
1013 | /** | 943 | /** |
1014 | * blk_insert_request - insert a special request into a request queue | 944 | * blk_insert_request - insert a special request into a request queue |
1015 | * @q: request queue where request should be inserted | 945 | * @q: request queue where request should be inserted |
@@ -1052,9 +982,8 @@ void blk_insert_request(struct request_queue *q, struct request *rq, | |||
1052 | if (blk_rq_tagged(rq)) | 982 | if (blk_rq_tagged(rq)) |
1053 | blk_queue_end_tag(q, rq); | 983 | blk_queue_end_tag(q, rq); |
1054 | 984 | ||
1055 | drive_stat_acct(rq, 1); | 985 | add_acct_request(q, rq, where); |
1056 | __elv_add_request(q, rq, where, 0); | 986 | __blk_run_queue(q); |
1057 | __blk_run_queue(q, false); | ||
1058 | spin_unlock_irqrestore(q->queue_lock, flags); | 987 | spin_unlock_irqrestore(q->queue_lock, flags); |
1059 | } | 988 | } |
1060 | EXPORT_SYMBOL(blk_insert_request); | 989 | EXPORT_SYMBOL(blk_insert_request); |
@@ -1174,6 +1103,113 @@ void blk_add_request_payload(struct request *rq, struct page *page, | |||
1174 | } | 1103 | } |
1175 | EXPORT_SYMBOL_GPL(blk_add_request_payload); | 1104 | EXPORT_SYMBOL_GPL(blk_add_request_payload); |
1176 | 1105 | ||
1106 | static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, | ||
1107 | struct bio *bio) | ||
1108 | { | ||
1109 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; | ||
1110 | |||
1111 | /* | ||
1112 | * Debug stuff, kill later | ||
1113 | */ | ||
1114 | if (!rq_mergeable(req)) { | ||
1115 | blk_dump_rq_flags(req, "back"); | ||
1116 | return false; | ||
1117 | } | ||
1118 | |||
1119 | if (!ll_back_merge_fn(q, req, bio)) | ||
1120 | return false; | ||
1121 | |||
1122 | trace_block_bio_backmerge(q, bio); | ||
1123 | |||
1124 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) | ||
1125 | blk_rq_set_mixed_merge(req); | ||
1126 | |||
1127 | req->biotail->bi_next = bio; | ||
1128 | req->biotail = bio; | ||
1129 | req->__data_len += bio->bi_size; | ||
1130 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | ||
1131 | |||
1132 | drive_stat_acct(req, 0); | ||
1133 | return true; | ||
1134 | } | ||
1135 | |||
1136 | static bool bio_attempt_front_merge(struct request_queue *q, | ||
1137 | struct request *req, struct bio *bio) | ||
1138 | { | ||
1139 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; | ||
1140 | sector_t sector; | ||
1141 | |||
1142 | /* | ||
1143 | * Debug stuff, kill later | ||
1144 | */ | ||
1145 | if (!rq_mergeable(req)) { | ||
1146 | blk_dump_rq_flags(req, "front"); | ||
1147 | return false; | ||
1148 | } | ||
1149 | |||
1150 | if (!ll_front_merge_fn(q, req, bio)) | ||
1151 | return false; | ||
1152 | |||
1153 | trace_block_bio_frontmerge(q, bio); | ||
1154 | |||
1155 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) | ||
1156 | blk_rq_set_mixed_merge(req); | ||
1157 | |||
1158 | sector = bio->bi_sector; | ||
1159 | |||
1160 | bio->bi_next = req->bio; | ||
1161 | req->bio = bio; | ||
1162 | |||
1163 | /* | ||
1164 | * may not be valid. if the low level driver said | ||
1165 | * it didn't need a bounce buffer then it better | ||
1166 | * not touch req->buffer either... | ||
1167 | */ | ||
1168 | req->buffer = bio_data(bio); | ||
1169 | req->__sector = bio->bi_sector; | ||
1170 | req->__data_len += bio->bi_size; | ||
1171 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | ||
1172 | |||
1173 | drive_stat_acct(req, 0); | ||
1174 | return true; | ||
1175 | } | ||
1176 | |||
1177 | /* | ||
1178 | * Attempts to merge with the plugged list in the current process. Returns | ||
1179 | * true if merge was successful, otherwise false. | ||
1180 | */ | ||
1181 | static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, | ||
1182 | struct bio *bio) | ||
1183 | { | ||
1184 | struct blk_plug *plug; | ||
1185 | struct request *rq; | ||
1186 | bool ret = false; | ||
1187 | |||
1188 | plug = tsk->plug; | ||
1189 | if (!plug) | ||
1190 | goto out; | ||
1191 | |||
1192 | list_for_each_entry_reverse(rq, &plug->list, queuelist) { | ||
1193 | int el_ret; | ||
1194 | |||
1195 | if (rq->q != q) | ||
1196 | continue; | ||
1197 | |||
1198 | el_ret = elv_try_merge(rq, bio); | ||
1199 | if (el_ret == ELEVATOR_BACK_MERGE) { | ||
1200 | ret = bio_attempt_back_merge(q, rq, bio); | ||
1201 | if (ret) | ||
1202 | break; | ||
1203 | } else if (el_ret == ELEVATOR_FRONT_MERGE) { | ||
1204 | ret = bio_attempt_front_merge(q, rq, bio); | ||
1205 | if (ret) | ||
1206 | break; | ||
1207 | } | ||
1208 | } | ||
1209 | out: | ||
1210 | return ret; | ||
1211 | } | ||
1212 | |||
1177 | void init_request_from_bio(struct request *req, struct bio *bio) | 1213 | void init_request_from_bio(struct request *req, struct bio *bio) |
1178 | { | 1214 | { |
1179 | req->cpu = bio->bi_comp_cpu; | 1215 | req->cpu = bio->bi_comp_cpu; |
@@ -1189,26 +1225,12 @@ void init_request_from_bio(struct request *req, struct bio *bio) | |||
1189 | blk_rq_bio_prep(req->q, req, bio); | 1225 | blk_rq_bio_prep(req->q, req, bio); |
1190 | } | 1226 | } |
1191 | 1227 | ||
1192 | /* | ||
1193 | * Only disabling plugging for non-rotational devices if it does tagging | ||
1194 | * as well, otherwise we do need the proper merging | ||
1195 | */ | ||
1196 | static inline bool queue_should_plug(struct request_queue *q) | ||
1197 | { | ||
1198 | return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); | ||
1199 | } | ||
1200 | |||
1201 | static int __make_request(struct request_queue *q, struct bio *bio) | 1228 | static int __make_request(struct request_queue *q, struct bio *bio) |
1202 | { | 1229 | { |
1203 | struct request *req; | ||
1204 | int el_ret; | ||
1205 | unsigned int bytes = bio->bi_size; | ||
1206 | const unsigned short prio = bio_prio(bio); | ||
1207 | const bool sync = !!(bio->bi_rw & REQ_SYNC); | 1230 | const bool sync = !!(bio->bi_rw & REQ_SYNC); |
1208 | const bool unplug = !!(bio->bi_rw & REQ_UNPLUG); | 1231 | struct blk_plug *plug; |
1209 | const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK; | 1232 | int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; |
1210 | int where = ELEVATOR_INSERT_SORT; | 1233 | struct request *req; |
1211 | int rw_flags; | ||
1212 | 1234 | ||
1213 | /* | 1235 | /* |
1214 | * low level driver can indicate that it wants pages above a | 1236 | * low level driver can indicate that it wants pages above a |
@@ -1217,78 +1239,36 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1217 | */ | 1239 | */ |
1218 | blk_queue_bounce(q, &bio); | 1240 | blk_queue_bounce(q, &bio); |
1219 | 1241 | ||
1220 | spin_lock_irq(q->queue_lock); | ||
1221 | |||
1222 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { | 1242 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { |
1223 | where = ELEVATOR_INSERT_FRONT; | 1243 | spin_lock_irq(q->queue_lock); |
1244 | where = ELEVATOR_INSERT_FLUSH; | ||
1224 | goto get_rq; | 1245 | goto get_rq; |
1225 | } | 1246 | } |
1226 | 1247 | ||
1227 | if (elv_queue_empty(q)) | 1248 | /* |
1228 | goto get_rq; | 1249 | * Check if we can merge with the plugged list before grabbing |
1229 | 1250 | * any locks. | |
1230 | el_ret = elv_merge(q, &req, bio); | 1251 | */ |
1231 | switch (el_ret) { | 1252 | if (attempt_plug_merge(current, q, bio)) |
1232 | case ELEVATOR_BACK_MERGE: | ||
1233 | BUG_ON(!rq_mergeable(req)); | ||
1234 | |||
1235 | if (!ll_back_merge_fn(q, req, bio)) | ||
1236 | break; | ||
1237 | |||
1238 | trace_block_bio_backmerge(q, bio); | ||
1239 | |||
1240 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) | ||
1241 | blk_rq_set_mixed_merge(req); | ||
1242 | |||
1243 | req->biotail->bi_next = bio; | ||
1244 | req->biotail = bio; | ||
1245 | req->__data_len += bytes; | ||
1246 | req->ioprio = ioprio_best(req->ioprio, prio); | ||
1247 | if (!blk_rq_cpu_valid(req)) | ||
1248 | req->cpu = bio->bi_comp_cpu; | ||
1249 | drive_stat_acct(req, 0); | ||
1250 | elv_bio_merged(q, req, bio); | ||
1251 | if (!attempt_back_merge(q, req)) | ||
1252 | elv_merged_request(q, req, el_ret); | ||
1253 | goto out; | 1253 | goto out; |
1254 | 1254 | ||
1255 | case ELEVATOR_FRONT_MERGE: | 1255 | spin_lock_irq(q->queue_lock); |
1256 | BUG_ON(!rq_mergeable(req)); | ||
1257 | |||
1258 | if (!ll_front_merge_fn(q, req, bio)) | ||
1259 | break; | ||
1260 | |||
1261 | trace_block_bio_frontmerge(q, bio); | ||
1262 | 1256 | ||
1263 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { | 1257 | el_ret = elv_merge(q, &req, bio); |
1264 | blk_rq_set_mixed_merge(req); | 1258 | if (el_ret == ELEVATOR_BACK_MERGE) { |
1265 | req->cmd_flags &= ~REQ_FAILFAST_MASK; | 1259 | BUG_ON(req->cmd_flags & REQ_ON_PLUG); |
1266 | req->cmd_flags |= ff; | 1260 | if (bio_attempt_back_merge(q, req, bio)) { |
1261 | if (!attempt_back_merge(q, req)) | ||
1262 | elv_merged_request(q, req, el_ret); | ||
1263 | goto out_unlock; | ||
1264 | } | ||
1265 | } else if (el_ret == ELEVATOR_FRONT_MERGE) { | ||
1266 | BUG_ON(req->cmd_flags & REQ_ON_PLUG); | ||
1267 | if (bio_attempt_front_merge(q, req, bio)) { | ||
1268 | if (!attempt_front_merge(q, req)) | ||
1269 | elv_merged_request(q, req, el_ret); | ||
1270 | goto out_unlock; | ||
1267 | } | 1271 | } |
1268 | |||
1269 | bio->bi_next = req->bio; | ||
1270 | req->bio = bio; | ||
1271 | |||
1272 | /* | ||
1273 | * may not be valid. if the low level driver said | ||
1274 | * it didn't need a bounce buffer then it better | ||
1275 | * not touch req->buffer either... | ||
1276 | */ | ||
1277 | req->buffer = bio_data(bio); | ||
1278 | req->__sector = bio->bi_sector; | ||
1279 | req->__data_len += bytes; | ||
1280 | req->ioprio = ioprio_best(req->ioprio, prio); | ||
1281 | if (!blk_rq_cpu_valid(req)) | ||
1282 | req->cpu = bio->bi_comp_cpu; | ||
1283 | drive_stat_acct(req, 0); | ||
1284 | elv_bio_merged(q, req, bio); | ||
1285 | if (!attempt_front_merge(q, req)) | ||
1286 | elv_merged_request(q, req, el_ret); | ||
1287 | goto out; | ||
1288 | |||
1289 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ | ||
1290 | default: | ||
1291 | ; | ||
1292 | } | 1272 | } |
1293 | 1273 | ||
1294 | get_rq: | 1274 | get_rq: |
@@ -1315,20 +1295,43 @@ get_rq: | |||
1315 | */ | 1295 | */ |
1316 | init_request_from_bio(req, bio); | 1296 | init_request_from_bio(req, bio); |
1317 | 1297 | ||
1318 | spin_lock_irq(q->queue_lock); | ||
1319 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || | 1298 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || |
1320 | bio_flagged(bio, BIO_CPU_AFFINE)) | 1299 | bio_flagged(bio, BIO_CPU_AFFINE)) { |
1321 | req->cpu = blk_cpu_to_group(smp_processor_id()); | 1300 | req->cpu = blk_cpu_to_group(get_cpu()); |
1322 | if (queue_should_plug(q) && elv_queue_empty(q)) | 1301 | put_cpu(); |
1323 | blk_plug_device(q); | 1302 | } |
1324 | 1303 | ||
1325 | /* insert the request into the elevator */ | 1304 | plug = current->plug; |
1326 | drive_stat_acct(req, 1); | 1305 | if (plug) { |
1327 | __elv_add_request(q, req, where, 0); | 1306 | /* |
1307 | * If this is the first request added after a plug, fire | ||
1308 | * of a plug trace. If others have been added before, check | ||
1309 | * if we have multiple devices in this plug. If so, make a | ||
1310 | * note to sort the list before dispatch. | ||
1311 | */ | ||
1312 | if (list_empty(&plug->list)) | ||
1313 | trace_block_plug(q); | ||
1314 | else if (!plug->should_sort) { | ||
1315 | struct request *__rq; | ||
1316 | |||
1317 | __rq = list_entry_rq(plug->list.prev); | ||
1318 | if (__rq->q != q) | ||
1319 | plug->should_sort = 1; | ||
1320 | } | ||
1321 | /* | ||
1322 | * Debug flag, kill later | ||
1323 | */ | ||
1324 | req->cmd_flags |= REQ_ON_PLUG; | ||
1325 | list_add_tail(&req->queuelist, &plug->list); | ||
1326 | drive_stat_acct(req, 1); | ||
1327 | } else { | ||
1328 | spin_lock_irq(q->queue_lock); | ||
1329 | add_acct_request(q, req, where); | ||
1330 | __blk_run_queue(q); | ||
1331 | out_unlock: | ||
1332 | spin_unlock_irq(q->queue_lock); | ||
1333 | } | ||
1328 | out: | 1334 | out: |
1329 | if (unplug || !queue_should_plug(q)) | ||
1330 | __generic_unplug_device(q); | ||
1331 | spin_unlock_irq(q->queue_lock); | ||
1332 | return 0; | 1335 | return 0; |
1333 | } | 1336 | } |
1334 | 1337 | ||
@@ -1731,9 +1734,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |||
1731 | */ | 1734 | */ |
1732 | BUG_ON(blk_queued_rq(rq)); | 1735 | BUG_ON(blk_queued_rq(rq)); |
1733 | 1736 | ||
1734 | drive_stat_acct(rq, 1); | 1737 | add_acct_request(q, rq, ELEVATOR_INSERT_BACK); |
1735 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); | ||
1736 | |||
1737 | spin_unlock_irqrestore(q->queue_lock, flags); | 1738 | spin_unlock_irqrestore(q->queue_lock, flags); |
1738 | 1739 | ||
1739 | return 0; | 1740 | return 0; |
@@ -1805,7 +1806,7 @@ static void blk_account_io_done(struct request *req) | |||
1805 | * normal IO on queueing nor completion. Accounting the | 1806 | * normal IO on queueing nor completion. Accounting the |
1806 | * containing request is enough. | 1807 | * containing request is enough. |
1807 | */ | 1808 | */ |
1808 | if (blk_do_io_stat(req) && req != &req->q->flush_rq) { | 1809 | if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) { |
1809 | unsigned long duration = jiffies - req->start_time; | 1810 | unsigned long duration = jiffies - req->start_time; |
1810 | const int rw = rq_data_dir(req); | 1811 | const int rw = rq_data_dir(req); |
1811 | struct hd_struct *part; | 1812 | struct hd_struct *part; |
@@ -2162,7 +2163,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | |||
2162 | * size, something has gone terribly wrong. | 2163 | * size, something has gone terribly wrong. |
2163 | */ | 2164 | */ |
2164 | if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { | 2165 | if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { |
2165 | printk(KERN_ERR "blk: request botched\n"); | 2166 | blk_dump_rq_flags(req, "request botched"); |
2166 | req->__data_len = blk_rq_cur_bytes(req); | 2167 | req->__data_len = blk_rq_cur_bytes(req); |
2167 | } | 2168 | } |
2168 | 2169 | ||
@@ -2628,6 +2629,166 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) | |||
2628 | } | 2629 | } |
2629 | EXPORT_SYMBOL(kblockd_schedule_work); | 2630 | EXPORT_SYMBOL(kblockd_schedule_work); |
2630 | 2631 | ||
2632 | int kblockd_schedule_delayed_work(struct request_queue *q, | ||
2633 | struct delayed_work *dwork, unsigned long delay) | ||
2634 | { | ||
2635 | return queue_delayed_work(kblockd_workqueue, dwork, delay); | ||
2636 | } | ||
2637 | EXPORT_SYMBOL(kblockd_schedule_delayed_work); | ||
2638 | |||
2639 | #define PLUG_MAGIC 0x91827364 | ||
2640 | |||
2641 | void blk_start_plug(struct blk_plug *plug) | ||
2642 | { | ||
2643 | struct task_struct *tsk = current; | ||
2644 | |||
2645 | plug->magic = PLUG_MAGIC; | ||
2646 | INIT_LIST_HEAD(&plug->list); | ||
2647 | INIT_LIST_HEAD(&plug->cb_list); | ||
2648 | plug->should_sort = 0; | ||
2649 | |||
2650 | /* | ||
2651 | * If this is a nested plug, don't actually assign it. It will be | ||
2652 | * flushed on its own. | ||
2653 | */ | ||
2654 | if (!tsk->plug) { | ||
2655 | /* | ||
2656 | * Store ordering should not be needed here, since a potential | ||
2657 | * preempt will imply a full memory barrier | ||
2658 | */ | ||
2659 | tsk->plug = plug; | ||
2660 | } | ||
2661 | } | ||
2662 | EXPORT_SYMBOL(blk_start_plug); | ||
2663 | |||
2664 | static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
2665 | { | ||
2666 | struct request *rqa = container_of(a, struct request, queuelist); | ||
2667 | struct request *rqb = container_of(b, struct request, queuelist); | ||
2668 | |||
2669 | return !(rqa->q <= rqb->q); | ||
2670 | } | ||
2671 | |||
2672 | /* | ||
2673 | * If 'from_schedule' is true, then postpone the dispatch of requests | ||
2674 | * until a safe kblockd context. We due this to avoid accidental big | ||
2675 | * additional stack usage in driver dispatch, in places where the originally | ||
2676 | * plugger did not intend it. | ||
2677 | */ | ||
2678 | static void queue_unplugged(struct request_queue *q, unsigned int depth, | ||
2679 | bool from_schedule) | ||
2680 | __releases(q->queue_lock) | ||
2681 | { | ||
2682 | trace_block_unplug(q, depth, !from_schedule); | ||
2683 | |||
2684 | /* | ||
2685 | * If we are punting this to kblockd, then we can safely drop | ||
2686 | * the queue_lock before waking kblockd (which needs to take | ||
2687 | * this lock). | ||
2688 | */ | ||
2689 | if (from_schedule) { | ||
2690 | spin_unlock(q->queue_lock); | ||
2691 | blk_run_queue_async(q); | ||
2692 | } else { | ||
2693 | __blk_run_queue(q); | ||
2694 | spin_unlock(q->queue_lock); | ||
2695 | } | ||
2696 | |||
2697 | } | ||
2698 | |||
2699 | static void flush_plug_callbacks(struct blk_plug *plug) | ||
2700 | { | ||
2701 | LIST_HEAD(callbacks); | ||
2702 | |||
2703 | if (list_empty(&plug->cb_list)) | ||
2704 | return; | ||
2705 | |||
2706 | list_splice_init(&plug->cb_list, &callbacks); | ||
2707 | |||
2708 | while (!list_empty(&callbacks)) { | ||
2709 | struct blk_plug_cb *cb = list_first_entry(&callbacks, | ||
2710 | struct blk_plug_cb, | ||
2711 | list); | ||
2712 | list_del(&cb->list); | ||
2713 | cb->callback(cb); | ||
2714 | } | ||
2715 | } | ||
2716 | |||
2717 | void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) | ||
2718 | { | ||
2719 | struct request_queue *q; | ||
2720 | unsigned long flags; | ||
2721 | struct request *rq; | ||
2722 | LIST_HEAD(list); | ||
2723 | unsigned int depth; | ||
2724 | |||
2725 | BUG_ON(plug->magic != PLUG_MAGIC); | ||
2726 | |||
2727 | flush_plug_callbacks(plug); | ||
2728 | if (list_empty(&plug->list)) | ||
2729 | return; | ||
2730 | |||
2731 | list_splice_init(&plug->list, &list); | ||
2732 | |||
2733 | if (plug->should_sort) { | ||
2734 | list_sort(NULL, &list, plug_rq_cmp); | ||
2735 | plug->should_sort = 0; | ||
2736 | } | ||
2737 | |||
2738 | q = NULL; | ||
2739 | depth = 0; | ||
2740 | |||
2741 | /* | ||
2742 | * Save and disable interrupts here, to avoid doing it for every | ||
2743 | * queue lock we have to take. | ||
2744 | */ | ||
2745 | local_irq_save(flags); | ||
2746 | while (!list_empty(&list)) { | ||
2747 | rq = list_entry_rq(list.next); | ||
2748 | list_del_init(&rq->queuelist); | ||
2749 | BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); | ||
2750 | BUG_ON(!rq->q); | ||
2751 | if (rq->q != q) { | ||
2752 | /* | ||
2753 | * This drops the queue lock | ||
2754 | */ | ||
2755 | if (q) | ||
2756 | queue_unplugged(q, depth, from_schedule); | ||
2757 | q = rq->q; | ||
2758 | depth = 0; | ||
2759 | spin_lock(q->queue_lock); | ||
2760 | } | ||
2761 | rq->cmd_flags &= ~REQ_ON_PLUG; | ||
2762 | |||
2763 | /* | ||
2764 | * rq is already accounted, so use raw insert | ||
2765 | */ | ||
2766 | if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) | ||
2767 | __elv_add_request(q, rq, ELEVATOR_INSERT_FLUSH); | ||
2768 | else | ||
2769 | __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); | ||
2770 | |||
2771 | depth++; | ||
2772 | } | ||
2773 | |||
2774 | /* | ||
2775 | * This drops the queue lock | ||
2776 | */ | ||
2777 | if (q) | ||
2778 | queue_unplugged(q, depth, from_schedule); | ||
2779 | |||
2780 | local_irq_restore(flags); | ||
2781 | } | ||
2782 | |||
2783 | void blk_finish_plug(struct blk_plug *plug) | ||
2784 | { | ||
2785 | blk_flush_plug_list(plug, false); | ||
2786 | |||
2787 | if (plug == current->plug) | ||
2788 | current->plug = NULL; | ||
2789 | } | ||
2790 | EXPORT_SYMBOL(blk_finish_plug); | ||
2791 | |||
2631 | int __init blk_dev_init(void) | 2792 | int __init blk_dev_init(void) |
2632 | { | 2793 | { |
2633 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * | 2794 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * |
diff --git a/block/blk-exec.c b/block/blk-exec.c index cf1456a02acd..81e31819a597 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
@@ -54,8 +54,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
54 | rq->end_io = done; | 54 | rq->end_io = done; |
55 | WARN_ON(irqs_disabled()); | 55 | WARN_ON(irqs_disabled()); |
56 | spin_lock_irq(q->queue_lock); | 56 | spin_lock_irq(q->queue_lock); |
57 | __elv_add_request(q, rq, where, 1); | 57 | __elv_add_request(q, rq, where); |
58 | __generic_unplug_device(q); | 58 | __blk_run_queue(q); |
59 | /* the queue is stopped so it won't be plugged+unplugged */ | 59 | /* the queue is stopped so it won't be plugged+unplugged */ |
60 | if (rq->cmd_type == REQ_TYPE_PM_RESUME) | 60 | if (rq->cmd_type == REQ_TYPE_PM_RESUME) |
61 | q->request_fn(q); | 61 | q->request_fn(q); |
diff --git a/block/blk-flush.c b/block/blk-flush.c index b27d0208611b..6c9b5e189e62 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -1,6 +1,69 @@ | |||
1 | /* | 1 | /* |
2 | * Functions to sequence FLUSH and FUA writes. | 2 | * Functions to sequence FLUSH and FUA writes. |
3 | * | ||
4 | * Copyright (C) 2011 Max Planck Institute for Gravitational Physics | ||
5 | * Copyright (C) 2011 Tejun Heo <tj@kernel.org> | ||
6 | * | ||
7 | * This file is released under the GPLv2. | ||
8 | * | ||
9 | * REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three | ||
10 | * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request | ||
11 | * properties and hardware capability. | ||
12 | * | ||
13 | * If a request doesn't have data, only REQ_FLUSH makes sense, which | ||
14 | * indicates a simple flush request. If there is data, REQ_FLUSH indicates | ||
15 | * that the device cache should be flushed before the data is executed, and | ||
16 | * REQ_FUA means that the data must be on non-volatile media on request | ||
17 | * completion. | ||
18 | * | ||
19 | * If the device doesn't have writeback cache, FLUSH and FUA don't make any | ||
20 | * difference. The requests are either completed immediately if there's no | ||
21 | * data or executed as normal requests otherwise. | ||
22 | * | ||
23 | * If the device has writeback cache and supports FUA, REQ_FLUSH is | ||
24 | * translated to PREFLUSH but REQ_FUA is passed down directly with DATA. | ||
25 | * | ||
26 | * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is | ||
27 | * translated to PREFLUSH and REQ_FUA to POSTFLUSH. | ||
28 | * | ||
29 | * The actual execution of flush is double buffered. Whenever a request | ||
30 | * needs to execute PRE or POSTFLUSH, it queues at | ||
31 | * q->flush_queue[q->flush_pending_idx]. Once certain criteria are met, a | ||
32 | * flush is issued and the pending_idx is toggled. When the flush | ||
33 | * completes, all the requests which were pending are proceeded to the next | ||
34 | * step. This allows arbitrary merging of different types of FLUSH/FUA | ||
35 | * requests. | ||
36 | * | ||
37 | * Currently, the following conditions are used to determine when to issue | ||
38 | * flush. | ||
39 | * | ||
40 | * C1. At any given time, only one flush shall be in progress. This makes | ||
41 | * double buffering sufficient. | ||
42 | * | ||
43 | * C2. Flush is deferred if any request is executing DATA of its sequence. | ||
44 | * This avoids issuing separate POSTFLUSHes for requests which shared | ||
45 | * PREFLUSH. | ||
46 | * | ||
47 | * C3. The second condition is ignored if there is a request which has | ||
48 | * waited longer than FLUSH_PENDING_TIMEOUT. This is to avoid | ||
49 | * starvation in the unlikely case where there are continuous stream of | ||
50 | * FUA (without FLUSH) requests. | ||
51 | * | ||
52 | * For devices which support FUA, it isn't clear whether C2 (and thus C3) | ||
53 | * is beneficial. | ||
54 | * | ||
55 | * Note that a sequenced FLUSH/FUA request with DATA is completed twice. | ||
56 | * Once while executing DATA and again after the whole sequence is | ||
57 | * complete. The first completion updates the contained bio but doesn't | ||
58 | * finish it so that the bio submitter is notified only after the whole | ||
59 | * sequence is complete. This is implemented by testing REQ_FLUSH_SEQ in | ||
60 | * req_bio_endio(). | ||
61 | * | ||
62 | * The above peculiarity requires that each FLUSH/FUA request has only one | ||
63 | * bio attached to it, which is guaranteed as they aren't allowed to be | ||
64 | * merged in the usual way. | ||
3 | */ | 65 | */ |
66 | |||
4 | #include <linux/kernel.h> | 67 | #include <linux/kernel.h> |
5 | #include <linux/module.h> | 68 | #include <linux/module.h> |
6 | #include <linux/bio.h> | 69 | #include <linux/bio.h> |
@@ -11,58 +74,142 @@ | |||
11 | 74 | ||
12 | /* FLUSH/FUA sequences */ | 75 | /* FLUSH/FUA sequences */ |
13 | enum { | 76 | enum { |
14 | QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ | 77 | REQ_FSEQ_PREFLUSH = (1 << 0), /* pre-flushing in progress */ |
15 | QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ | 78 | REQ_FSEQ_DATA = (1 << 1), /* data write in progress */ |
16 | QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ | 79 | REQ_FSEQ_POSTFLUSH = (1 << 2), /* post-flushing in progress */ |
17 | QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ | 80 | REQ_FSEQ_DONE = (1 << 3), |
18 | QUEUE_FSEQ_DONE = (1 << 4), | 81 | |
82 | REQ_FSEQ_ACTIONS = REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA | | ||
83 | REQ_FSEQ_POSTFLUSH, | ||
84 | |||
85 | /* | ||
86 | * If flush has been pending longer than the following timeout, | ||
87 | * it's issued even if flush_data requests are still in flight. | ||
88 | */ | ||
89 | FLUSH_PENDING_TIMEOUT = 5 * HZ, | ||
19 | }; | 90 | }; |
20 | 91 | ||
21 | static struct request *queue_next_fseq(struct request_queue *q); | 92 | static bool blk_kick_flush(struct request_queue *q); |
22 | 93 | ||
23 | unsigned blk_flush_cur_seq(struct request_queue *q) | 94 | static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) |
24 | { | 95 | { |
25 | if (!q->flush_seq) | 96 | unsigned int policy = 0; |
26 | return 0; | 97 | |
27 | return 1 << ffz(q->flush_seq); | 98 | if (fflags & REQ_FLUSH) { |
99 | if (rq->cmd_flags & REQ_FLUSH) | ||
100 | policy |= REQ_FSEQ_PREFLUSH; | ||
101 | if (blk_rq_sectors(rq)) | ||
102 | policy |= REQ_FSEQ_DATA; | ||
103 | if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) | ||
104 | policy |= REQ_FSEQ_POSTFLUSH; | ||
105 | } | ||
106 | return policy; | ||
28 | } | 107 | } |
29 | 108 | ||
30 | static struct request *blk_flush_complete_seq(struct request_queue *q, | 109 | static unsigned int blk_flush_cur_seq(struct request *rq) |
31 | unsigned seq, int error) | ||
32 | { | 110 | { |
33 | struct request *next_rq = NULL; | 111 | return 1 << ffz(rq->flush.seq); |
34 | 112 | } | |
35 | if (error && !q->flush_err) | 113 | |
36 | q->flush_err = error; | 114 | static void blk_flush_restore_request(struct request *rq) |
37 | 115 | { | |
38 | BUG_ON(q->flush_seq & seq); | 116 | /* |
39 | q->flush_seq |= seq; | 117 | * After flush data completion, @rq->bio is %NULL but we need to |
40 | 118 | * complete the bio again. @rq->biotail is guaranteed to equal the | |
41 | if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { | 119 | * original @rq->bio. Restore it. |
42 | /* not complete yet, queue the next flush sequence */ | 120 | */ |
43 | next_rq = queue_next_fseq(q); | 121 | rq->bio = rq->biotail; |
44 | } else { | 122 | |
45 | /* complete this flush request */ | 123 | /* make @rq a normal request */ |
46 | __blk_end_request_all(q->orig_flush_rq, q->flush_err); | 124 | rq->cmd_flags &= ~REQ_FLUSH_SEQ; |
47 | q->orig_flush_rq = NULL; | 125 | rq->end_io = NULL; |
48 | q->flush_seq = 0; | 126 | } |
49 | 127 | ||
50 | /* dispatch the next flush if there's one */ | 128 | /** |
51 | if (!list_empty(&q->pending_flushes)) { | 129 | * blk_flush_complete_seq - complete flush sequence |
52 | next_rq = list_entry_rq(q->pending_flushes.next); | 130 | * @rq: FLUSH/FUA request being sequenced |
53 | list_move(&next_rq->queuelist, &q->queue_head); | 131 | * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero) |
54 | } | 132 | * @error: whether an error occurred |
133 | * | ||
134 | * @rq just completed @seq part of its flush sequence, record the | ||
135 | * completion and trigger the next step. | ||
136 | * | ||
137 | * CONTEXT: | ||
138 | * spin_lock_irq(q->queue_lock) | ||
139 | * | ||
140 | * RETURNS: | ||
141 | * %true if requests were added to the dispatch queue, %false otherwise. | ||
142 | */ | ||
143 | static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, | ||
144 | int error) | ||
145 | { | ||
146 | struct request_queue *q = rq->q; | ||
147 | struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; | ||
148 | bool queued = false; | ||
149 | |||
150 | BUG_ON(rq->flush.seq & seq); | ||
151 | rq->flush.seq |= seq; | ||
152 | |||
153 | if (likely(!error)) | ||
154 | seq = blk_flush_cur_seq(rq); | ||
155 | else | ||
156 | seq = REQ_FSEQ_DONE; | ||
157 | |||
158 | switch (seq) { | ||
159 | case REQ_FSEQ_PREFLUSH: | ||
160 | case REQ_FSEQ_POSTFLUSH: | ||
161 | /* queue for flush */ | ||
162 | if (list_empty(pending)) | ||
163 | q->flush_pending_since = jiffies; | ||
164 | list_move_tail(&rq->flush.list, pending); | ||
165 | break; | ||
166 | |||
167 | case REQ_FSEQ_DATA: | ||
168 | list_move_tail(&rq->flush.list, &q->flush_data_in_flight); | ||
169 | list_add(&rq->queuelist, &q->queue_head); | ||
170 | queued = true; | ||
171 | break; | ||
172 | |||
173 | case REQ_FSEQ_DONE: | ||
174 | /* | ||
175 | * @rq was previously adjusted by blk_flush_issue() for | ||
176 | * flush sequencing and may already have gone through the | ||
177 | * flush data request completion path. Restore @rq for | ||
178 | * normal completion and end it. | ||
179 | */ | ||
180 | BUG_ON(!list_empty(&rq->queuelist)); | ||
181 | list_del_init(&rq->flush.list); | ||
182 | blk_flush_restore_request(rq); | ||
183 | __blk_end_request_all(rq, error); | ||
184 | break; | ||
185 | |||
186 | default: | ||
187 | BUG(); | ||
55 | } | 188 | } |
56 | return next_rq; | 189 | |
190 | return blk_kick_flush(q) | queued; | ||
57 | } | 191 | } |
58 | 192 | ||
59 | static void blk_flush_complete_seq_end_io(struct request_queue *q, | 193 | static void flush_end_io(struct request *flush_rq, int error) |
60 | unsigned seq, int error) | ||
61 | { | 194 | { |
62 | bool was_empty = elv_queue_empty(q); | 195 | struct request_queue *q = flush_rq->q; |
63 | struct request *next_rq; | 196 | struct list_head *running = &q->flush_queue[q->flush_running_idx]; |
197 | bool queued = false; | ||
198 | struct request *rq, *n; | ||
199 | |||
200 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); | ||
201 | |||
202 | /* account completion of the flush request */ | ||
203 | q->flush_running_idx ^= 1; | ||
204 | elv_completed_request(q, flush_rq); | ||
64 | 205 | ||
65 | next_rq = blk_flush_complete_seq(q, seq, error); | 206 | /* and push the waiting requests to the next stage */ |
207 | list_for_each_entry_safe(rq, n, running, flush.list) { | ||
208 | unsigned int seq = blk_flush_cur_seq(rq); | ||
209 | |||
210 | BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); | ||
211 | queued |= blk_flush_complete_seq(rq, seq, error); | ||
212 | } | ||
66 | 213 | ||
67 | /* | 214 | /* |
68 | * Moving a request silently to empty queue_head may stall the | 215 | * Moving a request silently to empty queue_head may stall the |
@@ -70,127 +217,153 @@ static void blk_flush_complete_seq_end_io(struct request_queue *q, | |||
70 | * from request completion path and calling directly into | 217 | * from request completion path and calling directly into |
71 | * request_fn may confuse the driver. Always use kblockd. | 218 | * request_fn may confuse the driver. Always use kblockd. |
72 | */ | 219 | */ |
73 | if (was_empty && next_rq) | 220 | if (queued) |
74 | __blk_run_queue(q, true); | 221 | blk_run_queue_async(q); |
75 | } | 222 | } |
76 | 223 | ||
77 | static void pre_flush_end_io(struct request *rq, int error) | 224 | /** |
225 | * blk_kick_flush - consider issuing flush request | ||
226 | * @q: request_queue being kicked | ||
227 | * | ||
228 | * Flush related states of @q have changed, consider issuing flush request. | ||
229 | * Please read the comment at the top of this file for more info. | ||
230 | * | ||
231 | * CONTEXT: | ||
232 | * spin_lock_irq(q->queue_lock) | ||
233 | * | ||
234 | * RETURNS: | ||
235 | * %true if flush was issued, %false otherwise. | ||
236 | */ | ||
237 | static bool blk_kick_flush(struct request_queue *q) | ||
78 | { | 238 | { |
79 | elv_completed_request(rq->q, rq); | 239 | struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; |
80 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); | 240 | struct request *first_rq = |
241 | list_first_entry(pending, struct request, flush.list); | ||
242 | |||
243 | /* C1 described at the top of this file */ | ||
244 | if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending)) | ||
245 | return false; | ||
246 | |||
247 | /* C2 and C3 */ | ||
248 | if (!list_empty(&q->flush_data_in_flight) && | ||
249 | time_before(jiffies, | ||
250 | q->flush_pending_since + FLUSH_PENDING_TIMEOUT)) | ||
251 | return false; | ||
252 | |||
253 | /* | ||
254 | * Issue flush and toggle pending_idx. This makes pending_idx | ||
255 | * different from running_idx, which means flush is in flight. | ||
256 | */ | ||
257 | blk_rq_init(q, &q->flush_rq); | ||
258 | q->flush_rq.cmd_type = REQ_TYPE_FS; | ||
259 | q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; | ||
260 | q->flush_rq.rq_disk = first_rq->rq_disk; | ||
261 | q->flush_rq.end_io = flush_end_io; | ||
262 | |||
263 | q->flush_pending_idx ^= 1; | ||
264 | list_add_tail(&q->flush_rq.queuelist, &q->queue_head); | ||
265 | return true; | ||
81 | } | 266 | } |
82 | 267 | ||
83 | static void flush_data_end_io(struct request *rq, int error) | 268 | static void flush_data_end_io(struct request *rq, int error) |
84 | { | 269 | { |
85 | elv_completed_request(rq->q, rq); | 270 | struct request_queue *q = rq->q; |
86 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); | ||
87 | } | ||
88 | 271 | ||
89 | static void post_flush_end_io(struct request *rq, int error) | 272 | /* |
90 | { | 273 | * After populating an empty queue, kick it to avoid stall. Read |
91 | elv_completed_request(rq->q, rq); | 274 | * the comment in flush_end_io(). |
92 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); | 275 | */ |
276 | if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) | ||
277 | blk_run_queue_async(q); | ||
93 | } | 278 | } |
94 | 279 | ||
95 | static void init_flush_request(struct request *rq, struct gendisk *disk) | 280 | /** |
281 | * blk_insert_flush - insert a new FLUSH/FUA request | ||
282 | * @rq: request to insert | ||
283 | * | ||
284 | * To be called from __elv_add_request() for %ELEVATOR_INSERT_FLUSH insertions. | ||
285 | * @rq is being submitted. Analyze what needs to be done and put it on the | ||
286 | * right queue. | ||
287 | * | ||
288 | * CONTEXT: | ||
289 | * spin_lock_irq(q->queue_lock) | ||
290 | */ | ||
291 | void blk_insert_flush(struct request *rq) | ||
96 | { | 292 | { |
97 | rq->cmd_type = REQ_TYPE_FS; | 293 | struct request_queue *q = rq->q; |
98 | rq->cmd_flags = WRITE_FLUSH; | 294 | unsigned int fflags = q->flush_flags; /* may change, cache */ |
99 | rq->rq_disk = disk; | 295 | unsigned int policy = blk_flush_policy(fflags, rq); |
100 | } | ||
101 | 296 | ||
102 | static struct request *queue_next_fseq(struct request_queue *q) | 297 | BUG_ON(rq->end_io); |
103 | { | 298 | BUG_ON(!rq->bio || rq->bio != rq->biotail); |
104 | struct request *orig_rq = q->orig_flush_rq; | ||
105 | struct request *rq = &q->flush_rq; | ||
106 | 299 | ||
107 | blk_rq_init(q, rq); | 300 | /* |
301 | * @policy now records what operations need to be done. Adjust | ||
302 | * REQ_FLUSH and FUA for the driver. | ||
303 | */ | ||
304 | rq->cmd_flags &= ~REQ_FLUSH; | ||
305 | if (!(fflags & REQ_FUA)) | ||
306 | rq->cmd_flags &= ~REQ_FUA; | ||
108 | 307 | ||
109 | switch (blk_flush_cur_seq(q)) { | 308 | /* |
110 | case QUEUE_FSEQ_PREFLUSH: | 309 | * If there's data but flush is not necessary, the request can be |
111 | init_flush_request(rq, orig_rq->rq_disk); | 310 | * processed directly without going through flush machinery. Queue |
112 | rq->end_io = pre_flush_end_io; | 311 | * for normal execution. |
113 | break; | 312 | */ |
114 | case QUEUE_FSEQ_DATA: | 313 | if ((policy & REQ_FSEQ_DATA) && |
115 | init_request_from_bio(rq, orig_rq->bio); | 314 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { |
116 | /* | 315 | list_add_tail(&rq->queuelist, &q->queue_head); |
117 | * orig_rq->rq_disk may be different from | 316 | return; |
118 | * bio->bi_bdev->bd_disk if orig_rq got here through | ||
119 | * remapping drivers. Make sure rq->rq_disk points | ||
120 | * to the same one as orig_rq. | ||
121 | */ | ||
122 | rq->rq_disk = orig_rq->rq_disk; | ||
123 | rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); | ||
124 | rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); | ||
125 | rq->end_io = flush_data_end_io; | ||
126 | break; | ||
127 | case QUEUE_FSEQ_POSTFLUSH: | ||
128 | init_flush_request(rq, orig_rq->rq_disk); | ||
129 | rq->end_io = post_flush_end_io; | ||
130 | break; | ||
131 | default: | ||
132 | BUG(); | ||
133 | } | 317 | } |
134 | 318 | ||
135 | elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); | 319 | /* |
136 | return rq; | 320 | * @rq should go through flush machinery. Mark it part of flush |
321 | * sequence and submit for further processing. | ||
322 | */ | ||
323 | memset(&rq->flush, 0, sizeof(rq->flush)); | ||
324 | INIT_LIST_HEAD(&rq->flush.list); | ||
325 | rq->cmd_flags |= REQ_FLUSH_SEQ; | ||
326 | rq->end_io = flush_data_end_io; | ||
327 | |||
328 | blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); | ||
137 | } | 329 | } |
138 | 330 | ||
139 | struct request *blk_do_flush(struct request_queue *q, struct request *rq) | 331 | /** |
332 | * blk_abort_flushes - @q is being aborted, abort flush requests | ||
333 | * @q: request_queue being aborted | ||
334 | * | ||
335 | * To be called from elv_abort_queue(). @q is being aborted. Prepare all | ||
336 | * FLUSH/FUA requests for abortion. | ||
337 | * | ||
338 | * CONTEXT: | ||
339 | * spin_lock_irq(q->queue_lock) | ||
340 | */ | ||
341 | void blk_abort_flushes(struct request_queue *q) | ||
140 | { | 342 | { |
141 | unsigned int fflags = q->flush_flags; /* may change, cache it */ | 343 | struct request *rq, *n; |
142 | bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; | 344 | int i; |
143 | bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); | ||
144 | bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); | ||
145 | unsigned skip = 0; | ||
146 | 345 | ||
147 | /* | 346 | /* |
148 | * Special case. If there's data but flush is not necessary, | 347 | * Requests in flight for data are already owned by the dispatch |
149 | * the request can be issued directly. | 348 | * queue or the device driver. Just restore for normal completion. |
150 | * | ||
151 | * Flush w/o data should be able to be issued directly too but | ||
152 | * currently some drivers assume that rq->bio contains | ||
153 | * non-zero data if it isn't NULL and empty FLUSH requests | ||
154 | * getting here usually have bio's without data. | ||
155 | */ | 349 | */ |
156 | if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { | 350 | list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) { |
157 | rq->cmd_flags &= ~REQ_FLUSH; | 351 | list_del_init(&rq->flush.list); |
158 | if (!has_fua) | 352 | blk_flush_restore_request(rq); |
159 | rq->cmd_flags &= ~REQ_FUA; | ||
160 | return rq; | ||
161 | } | 353 | } |
162 | 354 | ||
163 | /* | 355 | /* |
164 | * Sequenced flushes can't be processed in parallel. If | 356 | * We need to give away requests on flush queues. Restore for |
165 | * another one is already in progress, queue for later | 357 | * normal completion and put them on the dispatch queue. |
166 | * processing. | ||
167 | */ | 358 | */ |
168 | if (q->flush_seq) { | 359 | for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) { |
169 | list_move_tail(&rq->queuelist, &q->pending_flushes); | 360 | list_for_each_entry_safe(rq, n, &q->flush_queue[i], |
170 | return NULL; | 361 | flush.list) { |
362 | list_del_init(&rq->flush.list); | ||
363 | blk_flush_restore_request(rq); | ||
364 | list_add_tail(&rq->queuelist, &q->queue_head); | ||
365 | } | ||
171 | } | 366 | } |
172 | |||
173 | /* | ||
174 | * Start a new flush sequence | ||
175 | */ | ||
176 | q->flush_err = 0; | ||
177 | q->flush_seq |= QUEUE_FSEQ_STARTED; | ||
178 | |||
179 | /* adjust FLUSH/FUA of the original request and stash it away */ | ||
180 | rq->cmd_flags &= ~REQ_FLUSH; | ||
181 | if (!has_fua) | ||
182 | rq->cmd_flags &= ~REQ_FUA; | ||
183 | blk_dequeue_request(rq); | ||
184 | q->orig_flush_rq = rq; | ||
185 | |||
186 | /* skip unneded sequences and return the first one */ | ||
187 | if (!do_preflush) | ||
188 | skip |= QUEUE_FSEQ_PREFLUSH; | ||
189 | if (!blk_rq_sectors(rq)) | ||
190 | skip |= QUEUE_FSEQ_DATA; | ||
191 | if (!do_postflush) | ||
192 | skip |= QUEUE_FSEQ_POSTFLUSH; | ||
193 | return blk_flush_complete_seq(q, skip, 0); | ||
194 | } | 367 | } |
195 | 368 | ||
196 | static void bio_end_flush(struct bio *bio, int err) | 369 | static void bio_end_flush(struct bio *bio, int err) |
diff --git a/block/blk-integrity.c b/block/blk-integrity.c index 54bcba6c02a7..129b9e209a3b 100644 --- a/block/blk-integrity.c +++ b/block/blk-integrity.c | |||
@@ -30,6 +30,8 @@ | |||
30 | 30 | ||
31 | static struct kmem_cache *integrity_cachep; | 31 | static struct kmem_cache *integrity_cachep; |
32 | 32 | ||
33 | static const char *bi_unsupported_name = "unsupported"; | ||
34 | |||
33 | /** | 35 | /** |
34 | * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements | 36 | * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements |
35 | * @q: request queue | 37 | * @q: request queue |
@@ -358,6 +360,14 @@ static struct kobj_type integrity_ktype = { | |||
358 | .release = blk_integrity_release, | 360 | .release = blk_integrity_release, |
359 | }; | 361 | }; |
360 | 362 | ||
363 | bool blk_integrity_is_initialized(struct gendisk *disk) | ||
364 | { | ||
365 | struct blk_integrity *bi = blk_get_integrity(disk); | ||
366 | |||
367 | return (bi && bi->name && strcmp(bi->name, bi_unsupported_name) != 0); | ||
368 | } | ||
369 | EXPORT_SYMBOL(blk_integrity_is_initialized); | ||
370 | |||
361 | /** | 371 | /** |
362 | * blk_integrity_register - Register a gendisk as being integrity-capable | 372 | * blk_integrity_register - Register a gendisk as being integrity-capable |
363 | * @disk: struct gendisk pointer to make integrity-aware | 373 | * @disk: struct gendisk pointer to make integrity-aware |
@@ -407,7 +417,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) | |||
407 | bi->get_tag_fn = template->get_tag_fn; | 417 | bi->get_tag_fn = template->get_tag_fn; |
408 | bi->tag_size = template->tag_size; | 418 | bi->tag_size = template->tag_size; |
409 | } else | 419 | } else |
410 | bi->name = "unsupported"; | 420 | bi->name = bi_unsupported_name; |
411 | 421 | ||
412 | return 0; | 422 | return 0; |
413 | } | 423 | } |
diff --git a/block/blk-lib.c b/block/blk-lib.c index bd3e8df4d5e2..25de73e4759b 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -136,8 +136,6 @@ static void bio_batch_end_io(struct bio *bio, int err) | |||
136 | * | 136 | * |
137 | * Description: | 137 | * Description: |
138 | * Generate and issue number of bios with zerofiled pages. | 138 | * Generate and issue number of bios with zerofiled pages. |
139 | * Send barrier at the beginning and at the end if requested. This guarantie | ||
140 | * correct request ordering. Empty barrier allow us to avoid post queue flush. | ||
141 | */ | 139 | */ |
142 | 140 | ||
143 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 141 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
diff --git a/block/blk-merge.c b/block/blk-merge.c index ea85e20d5e94..cfcc37cb222b 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -465,3 +465,9 @@ int attempt_front_merge(struct request_queue *q, struct request *rq) | |||
465 | 465 | ||
466 | return 0; | 466 | return 0; |
467 | } | 467 | } |
468 | |||
469 | int blk_attempt_req_merge(struct request_queue *q, struct request *rq, | ||
470 | struct request *next) | ||
471 | { | ||
472 | return attempt_merge(q, rq, next); | ||
473 | } | ||
diff --git a/block/blk-settings.c b/block/blk-settings.c index 36c8c1f2af18..1fa769293597 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -164,25 +164,10 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) | |||
164 | blk_queue_congestion_threshold(q); | 164 | blk_queue_congestion_threshold(q); |
165 | q->nr_batching = BLK_BATCH_REQ; | 165 | q->nr_batching = BLK_BATCH_REQ; |
166 | 166 | ||
167 | q->unplug_thresh = 4; /* hmm */ | ||
168 | q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */ | ||
169 | if (q->unplug_delay == 0) | ||
170 | q->unplug_delay = 1; | ||
171 | |||
172 | q->unplug_timer.function = blk_unplug_timeout; | ||
173 | q->unplug_timer.data = (unsigned long)q; | ||
174 | |||
175 | blk_set_default_limits(&q->limits); | 167 | blk_set_default_limits(&q->limits); |
176 | blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); | 168 | blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); |
177 | 169 | ||
178 | /* | 170 | /* |
179 | * If the caller didn't supply a lock, fall back to our embedded | ||
180 | * per-queue locks | ||
181 | */ | ||
182 | if (!q->queue_lock) | ||
183 | q->queue_lock = &q->__queue_lock; | ||
184 | |||
185 | /* | ||
186 | * by default assume old behaviour and bounce for any highmem page | 171 | * by default assume old behaviour and bounce for any highmem page |
187 | */ | 172 | */ |
188 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | 173 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 41fb69150b4d..bd236313f35d 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -66,14 +66,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
66 | 66 | ||
67 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { | 67 | if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { |
68 | blk_set_queue_full(q, BLK_RW_SYNC); | 68 | blk_set_queue_full(q, BLK_RW_SYNC); |
69 | } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) { | 69 | } else { |
70 | blk_clear_queue_full(q, BLK_RW_SYNC); | 70 | blk_clear_queue_full(q, BLK_RW_SYNC); |
71 | wake_up(&rl->wait[BLK_RW_SYNC]); | 71 | wake_up(&rl->wait[BLK_RW_SYNC]); |
72 | } | 72 | } |
73 | 73 | ||
74 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { | 74 | if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { |
75 | blk_set_queue_full(q, BLK_RW_ASYNC); | 75 | blk_set_queue_full(q, BLK_RW_ASYNC); |
76 | } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) { | 76 | } else { |
77 | blk_clear_queue_full(q, BLK_RW_ASYNC); | 77 | blk_clear_queue_full(q, BLK_RW_ASYNC); |
78 | wake_up(&rl->wait[BLK_RW_ASYNC]); | 78 | wake_up(&rl->wait[BLK_RW_ASYNC]); |
79 | } | 79 | } |
@@ -471,8 +471,6 @@ static void blk_release_queue(struct kobject *kobj) | |||
471 | 471 | ||
472 | blk_sync_queue(q); | 472 | blk_sync_queue(q); |
473 | 473 | ||
474 | blk_throtl_exit(q); | ||
475 | |||
476 | if (rl->rq_pool) | 474 | if (rl->rq_pool) |
477 | mempool_destroy(rl->rq_pool); | 475 | mempool_destroy(rl->rq_pool); |
478 | 476 | ||
@@ -500,7 +498,6 @@ int blk_register_queue(struct gendisk *disk) | |||
500 | { | 498 | { |
501 | int ret; | 499 | int ret; |
502 | struct device *dev = disk_to_dev(disk); | 500 | struct device *dev = disk_to_dev(disk); |
503 | |||
504 | struct request_queue *q = disk->queue; | 501 | struct request_queue *q = disk->queue; |
505 | 502 | ||
506 | if (WARN_ON(!q)) | 503 | if (WARN_ON(!q)) |
@@ -511,8 +508,10 @@ int blk_register_queue(struct gendisk *disk) | |||
511 | return ret; | 508 | return ret; |
512 | 509 | ||
513 | ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); | 510 | ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); |
514 | if (ret < 0) | 511 | if (ret < 0) { |
512 | blk_trace_remove_sysfs(dev); | ||
515 | return ret; | 513 | return ret; |
514 | } | ||
516 | 515 | ||
517 | kobject_uevent(&q->kobj, KOBJ_ADD); | 516 | kobject_uevent(&q->kobj, KOBJ_ADD); |
518 | 517 | ||
@@ -523,7 +522,7 @@ int blk_register_queue(struct gendisk *disk) | |||
523 | if (ret) { | 522 | if (ret) { |
524 | kobject_uevent(&q->kobj, KOBJ_REMOVE); | 523 | kobject_uevent(&q->kobj, KOBJ_REMOVE); |
525 | kobject_del(&q->kobj); | 524 | kobject_del(&q->kobj); |
526 | blk_trace_remove_sysfs(disk_to_dev(disk)); | 525 | blk_trace_remove_sysfs(dev); |
527 | kobject_put(&dev->kobj); | 526 | kobject_put(&dev->kobj); |
528 | return ret; | 527 | return ret; |
529 | } | 528 | } |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e36cc10a346c..0475a22a420d 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -77,7 +77,7 @@ struct throtl_grp { | |||
77 | unsigned long slice_end[2]; | 77 | unsigned long slice_end[2]; |
78 | 78 | ||
79 | /* Some throttle limits got updated for the group */ | 79 | /* Some throttle limits got updated for the group */ |
80 | bool limits_changed; | 80 | int limits_changed; |
81 | }; | 81 | }; |
82 | 82 | ||
83 | struct throtl_data | 83 | struct throtl_data |
@@ -102,7 +102,7 @@ struct throtl_data | |||
102 | /* Work for dispatching throttled bios */ | 102 | /* Work for dispatching throttled bios */ |
103 | struct delayed_work throtl_work; | 103 | struct delayed_work throtl_work; |
104 | 104 | ||
105 | atomic_t limits_changed; | 105 | int limits_changed; |
106 | }; | 106 | }; |
107 | 107 | ||
108 | enum tg_state_flags { | 108 | enum tg_state_flags { |
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, | |||
201 | RB_CLEAR_NODE(&tg->rb_node); | 201 | RB_CLEAR_NODE(&tg->rb_node); |
202 | bio_list_init(&tg->bio_lists[0]); | 202 | bio_list_init(&tg->bio_lists[0]); |
203 | bio_list_init(&tg->bio_lists[1]); | 203 | bio_list_init(&tg->bio_lists[1]); |
204 | td->limits_changed = false; | ||
204 | 205 | ||
205 | /* | 206 | /* |
206 | * Take the initial reference that will be released on destroy | 207 | * Take the initial reference that will be released on destroy |
@@ -737,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td) | |||
737 | struct throtl_grp *tg; | 738 | struct throtl_grp *tg; |
738 | struct hlist_node *pos, *n; | 739 | struct hlist_node *pos, *n; |
739 | 740 | ||
740 | if (!atomic_read(&td->limits_changed)) | 741 | if (!td->limits_changed) |
741 | return; | 742 | return; |
742 | 743 | ||
743 | throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); | 744 | xchg(&td->limits_changed, false); |
744 | 745 | ||
745 | /* | 746 | throtl_log(td, "limits changed"); |
746 | * Make sure updates from throtl_update_blkio_group_read_bps() group | ||
747 | * of functions to tg->limits_changed are visible. We do not | ||
748 | * want update td->limits_changed to be visible but update to | ||
749 | * tg->limits_changed not being visible yet on this cpu. Hence | ||
750 | * the read barrier. | ||
751 | */ | ||
752 | smp_rmb(); | ||
753 | 747 | ||
754 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { | 748 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { |
755 | if (throtl_tg_on_rr(tg) && tg->limits_changed) { | 749 | if (!tg->limits_changed) |
756 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" | 750 | continue; |
757 | " riops=%u wiops=%u", tg->bps[READ], | 751 | |
758 | tg->bps[WRITE], tg->iops[READ], | 752 | if (!xchg(&tg->limits_changed, false)) |
759 | tg->iops[WRITE]); | 753 | continue; |
754 | |||
755 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" | ||
756 | " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE], | ||
757 | tg->iops[READ], tg->iops[WRITE]); | ||
758 | |||
759 | /* | ||
760 | * Restart the slices for both READ and WRITES. It | ||
761 | * might happen that a group's limit are dropped | ||
762 | * suddenly and we don't want to account recently | ||
763 | * dispatched IO with new low rate | ||
764 | */ | ||
765 | throtl_start_new_slice(td, tg, 0); | ||
766 | throtl_start_new_slice(td, tg, 1); | ||
767 | |||
768 | if (throtl_tg_on_rr(tg)) | ||
760 | tg_update_disptime(td, tg); | 769 | tg_update_disptime(td, tg); |
761 | tg->limits_changed = false; | ||
762 | } | ||
763 | } | 770 | } |
764 | |||
765 | smp_mb__before_atomic_dec(); | ||
766 | atomic_dec(&td->limits_changed); | ||
767 | smp_mb__after_atomic_dec(); | ||
768 | } | 771 | } |
769 | 772 | ||
770 | /* Dispatch throttled bios. Should be called without queue lock held. */ | 773 | /* Dispatch throttled bios. Should be called without queue lock held. */ |
@@ -774,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q) | |||
774 | unsigned int nr_disp = 0; | 777 | unsigned int nr_disp = 0; |
775 | struct bio_list bio_list_on_stack; | 778 | struct bio_list bio_list_on_stack; |
776 | struct bio *bio; | 779 | struct bio *bio; |
780 | struct blk_plug plug; | ||
777 | 781 | ||
778 | spin_lock_irq(q->queue_lock); | 782 | spin_lock_irq(q->queue_lock); |
779 | 783 | ||
@@ -802,9 +806,10 @@ out: | |||
802 | * immediate dispatch | 806 | * immediate dispatch |
803 | */ | 807 | */ |
804 | if (nr_disp) { | 808 | if (nr_disp) { |
809 | blk_start_plug(&plug); | ||
805 | while((bio = bio_list_pop(&bio_list_on_stack))) | 810 | while((bio = bio_list_pop(&bio_list_on_stack))) |
806 | generic_make_request(bio); | 811 | generic_make_request(bio); |
807 | blk_unplug(q); | 812 | blk_finish_plug(&plug); |
808 | } | 813 | } |
809 | return nr_disp; | 814 | return nr_disp; |
810 | } | 815 | } |
@@ -825,7 +830,8 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) | |||
825 | 830 | ||
826 | struct delayed_work *dwork = &td->throtl_work; | 831 | struct delayed_work *dwork = &td->throtl_work; |
827 | 832 | ||
828 | if (total_nr_queued(td) > 0) { | 833 | /* schedule work if limits changed even if no bio is queued */ |
834 | if (total_nr_queued(td) > 0 || td->limits_changed) { | ||
829 | /* | 835 | /* |
830 | * We might have a work scheduled to be executed in future. | 836 | * We might have a work scheduled to be executed in future. |
831 | * Cancel that and schedule a new one. | 837 | * Cancel that and schedule a new one. |
@@ -898,10 +904,19 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg) | |||
898 | spin_unlock_irqrestore(td->queue->queue_lock, flags); | 904 | spin_unlock_irqrestore(td->queue->queue_lock, flags); |
899 | } | 905 | } |
900 | 906 | ||
907 | static void throtl_update_blkio_group_common(struct throtl_data *td, | ||
908 | struct throtl_grp *tg) | ||
909 | { | ||
910 | xchg(&tg->limits_changed, true); | ||
911 | xchg(&td->limits_changed, true); | ||
912 | /* Schedule a work now to process the limit change */ | ||
913 | throtl_schedule_delayed_work(td, 0); | ||
914 | } | ||
915 | |||
901 | /* | 916 | /* |
902 | * For all update functions, key should be a valid pointer because these | 917 | * For all update functions, key should be a valid pointer because these |
903 | * update functions are called under blkcg_lock, that means, blkg is | 918 | * update functions are called under blkcg_lock, that means, blkg is |
904 | * valid and in turn key is valid. queue exit path can not race becuase | 919 | * valid and in turn key is valid. queue exit path can not race because |
905 | * of blkcg_lock | 920 | * of blkcg_lock |
906 | * | 921 | * |
907 | * Can not take queue lock in update functions as queue lock under blkcg_lock | 922 | * Can not take queue lock in update functions as queue lock under blkcg_lock |
@@ -911,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key, | |||
911 | struct blkio_group *blkg, u64 read_bps) | 926 | struct blkio_group *blkg, u64 read_bps) |
912 | { | 927 | { |
913 | struct throtl_data *td = key; | 928 | struct throtl_data *td = key; |
929 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
914 | 930 | ||
915 | tg_of_blkg(blkg)->bps[READ] = read_bps; | 931 | tg->bps[READ] = read_bps; |
916 | /* Make sure read_bps is updated before setting limits_changed */ | 932 | throtl_update_blkio_group_common(td, tg); |
917 | smp_wmb(); | ||
918 | tg_of_blkg(blkg)->limits_changed = true; | ||
919 | |||
920 | /* Make sure tg->limits_changed is updated before td->limits_changed */ | ||
921 | smp_mb__before_atomic_inc(); | ||
922 | atomic_inc(&td->limits_changed); | ||
923 | smp_mb__after_atomic_inc(); | ||
924 | |||
925 | /* Schedule a work now to process the limit change */ | ||
926 | throtl_schedule_delayed_work(td, 0); | ||
927 | } | 933 | } |
928 | 934 | ||
929 | static void throtl_update_blkio_group_write_bps(void *key, | 935 | static void throtl_update_blkio_group_write_bps(void *key, |
930 | struct blkio_group *blkg, u64 write_bps) | 936 | struct blkio_group *blkg, u64 write_bps) |
931 | { | 937 | { |
932 | struct throtl_data *td = key; | 938 | struct throtl_data *td = key; |
939 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
933 | 940 | ||
934 | tg_of_blkg(blkg)->bps[WRITE] = write_bps; | 941 | tg->bps[WRITE] = write_bps; |
935 | smp_wmb(); | 942 | throtl_update_blkio_group_common(td, tg); |
936 | tg_of_blkg(blkg)->limits_changed = true; | ||
937 | smp_mb__before_atomic_inc(); | ||
938 | atomic_inc(&td->limits_changed); | ||
939 | smp_mb__after_atomic_inc(); | ||
940 | throtl_schedule_delayed_work(td, 0); | ||
941 | } | 943 | } |
942 | 944 | ||
943 | static void throtl_update_blkio_group_read_iops(void *key, | 945 | static void throtl_update_blkio_group_read_iops(void *key, |
944 | struct blkio_group *blkg, unsigned int read_iops) | 946 | struct blkio_group *blkg, unsigned int read_iops) |
945 | { | 947 | { |
946 | struct throtl_data *td = key; | 948 | struct throtl_data *td = key; |
949 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
947 | 950 | ||
948 | tg_of_blkg(blkg)->iops[READ] = read_iops; | 951 | tg->iops[READ] = read_iops; |
949 | smp_wmb(); | 952 | throtl_update_blkio_group_common(td, tg); |
950 | tg_of_blkg(blkg)->limits_changed = true; | ||
951 | smp_mb__before_atomic_inc(); | ||
952 | atomic_inc(&td->limits_changed); | ||
953 | smp_mb__after_atomic_inc(); | ||
954 | throtl_schedule_delayed_work(td, 0); | ||
955 | } | 953 | } |
956 | 954 | ||
957 | static void throtl_update_blkio_group_write_iops(void *key, | 955 | static void throtl_update_blkio_group_write_iops(void *key, |
958 | struct blkio_group *blkg, unsigned int write_iops) | 956 | struct blkio_group *blkg, unsigned int write_iops) |
959 | { | 957 | { |
960 | struct throtl_data *td = key; | 958 | struct throtl_data *td = key; |
959 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
961 | 960 | ||
962 | tg_of_blkg(blkg)->iops[WRITE] = write_iops; | 961 | tg->iops[WRITE] = write_iops; |
963 | smp_wmb(); | 962 | throtl_update_blkio_group_common(td, tg); |
964 | tg_of_blkg(blkg)->limits_changed = true; | ||
965 | smp_mb__before_atomic_inc(); | ||
966 | atomic_inc(&td->limits_changed); | ||
967 | smp_mb__after_atomic_inc(); | ||
968 | throtl_schedule_delayed_work(td, 0); | ||
969 | } | 963 | } |
970 | 964 | ||
971 | void throtl_shutdown_timer_wq(struct request_queue *q) | 965 | static void throtl_shutdown_wq(struct request_queue *q) |
972 | { | 966 | { |
973 | struct throtl_data *td = q->td; | 967 | struct throtl_data *td = q->td; |
974 | 968 | ||
@@ -1009,20 +1003,28 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) | |||
1009 | /* | 1003 | /* |
1010 | * There is already another bio queued in same dir. No | 1004 | * There is already another bio queued in same dir. No |
1011 | * need to update dispatch time. | 1005 | * need to update dispatch time. |
1012 | * Still update the disptime if rate limits on this group | ||
1013 | * were changed. | ||
1014 | */ | 1006 | */ |
1015 | if (!tg->limits_changed) | 1007 | update_disptime = false; |
1016 | update_disptime = false; | ||
1017 | else | ||
1018 | tg->limits_changed = false; | ||
1019 | |||
1020 | goto queue_bio; | 1008 | goto queue_bio; |
1009 | |||
1021 | } | 1010 | } |
1022 | 1011 | ||
1023 | /* Bio is with-in rate limit of group */ | 1012 | /* Bio is with-in rate limit of group */ |
1024 | if (tg_may_dispatch(td, tg, bio, NULL)) { | 1013 | if (tg_may_dispatch(td, tg, bio, NULL)) { |
1025 | throtl_charge_bio(tg, bio); | 1014 | throtl_charge_bio(tg, bio); |
1015 | |||
1016 | /* | ||
1017 | * We need to trim slice even when bios are not being queued | ||
1018 | * otherwise it might happen that a bio is not queued for | ||
1019 | * a long time and slice keeps on extending and trim is not | ||
1020 | * called for a long time. Now if limits are reduced suddenly | ||
1021 | * we take into account all the IO dispatched so far at new | ||
1022 | * low rate and * newly queued IO gets a really long dispatch | ||
1023 | * time. | ||
1024 | * | ||
1025 | * So keep on trimming slice even if bio is not queued. | ||
1026 | */ | ||
1027 | throtl_trim_slice(td, tg, rw); | ||
1026 | goto out; | 1028 | goto out; |
1027 | } | 1029 | } |
1028 | 1030 | ||
@@ -1058,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q) | |||
1058 | 1060 | ||
1059 | INIT_HLIST_HEAD(&td->tg_list); | 1061 | INIT_HLIST_HEAD(&td->tg_list); |
1060 | td->tg_service_tree = THROTL_RB_ROOT; | 1062 | td->tg_service_tree = THROTL_RB_ROOT; |
1061 | atomic_set(&td->limits_changed, 0); | 1063 | td->limits_changed = false; |
1062 | 1064 | ||
1063 | /* Init root group */ | 1065 | /* Init root group */ |
1064 | tg = &td->root_tg; | 1066 | tg = &td->root_tg; |
@@ -1070,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q) | |||
1070 | /* Practically unlimited BW */ | 1072 | /* Practically unlimited BW */ |
1071 | tg->bps[0] = tg->bps[1] = -1; | 1073 | tg->bps[0] = tg->bps[1] = -1; |
1072 | tg->iops[0] = tg->iops[1] = -1; | 1074 | tg->iops[0] = tg->iops[1] = -1; |
1075 | td->limits_changed = false; | ||
1073 | 1076 | ||
1074 | /* | 1077 | /* |
1075 | * Set root group reference to 2. One reference will be dropped when | 1078 | * Set root group reference to 2. One reference will be dropped when |
@@ -1102,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q) | |||
1102 | 1105 | ||
1103 | BUG_ON(!td); | 1106 | BUG_ON(!td); |
1104 | 1107 | ||
1105 | throtl_shutdown_timer_wq(q); | 1108 | throtl_shutdown_wq(q); |
1106 | 1109 | ||
1107 | spin_lock_irq(q->queue_lock); | 1110 | spin_lock_irq(q->queue_lock); |
1108 | throtl_release_tgs(td); | 1111 | throtl_release_tgs(td); |
@@ -1132,7 +1135,7 @@ void blk_throtl_exit(struct request_queue *q) | |||
1132 | * update limits through cgroup and another work got queued, cancel | 1135 | * update limits through cgroup and another work got queued, cancel |
1133 | * it. | 1136 | * it. |
1134 | */ | 1137 | */ |
1135 | throtl_shutdown_timer_wq(q); | 1138 | throtl_shutdown_wq(q); |
1136 | throtl_td_free(td); | 1139 | throtl_td_free(td); |
1137 | } | 1140 | } |
1138 | 1141 | ||
diff --git a/block/blk.h b/block/blk.h index 2db8f32838e7..61263463e38e 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -18,8 +18,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, | |||
18 | void blk_dequeue_request(struct request *rq); | 18 | void blk_dequeue_request(struct request *rq); |
19 | void __blk_queue_free_tags(struct request_queue *q); | 19 | void __blk_queue_free_tags(struct request_queue *q); |
20 | 20 | ||
21 | void blk_unplug_work(struct work_struct *work); | ||
22 | void blk_unplug_timeout(unsigned long data); | ||
23 | void blk_rq_timed_out_timer(unsigned long data); | 21 | void blk_rq_timed_out_timer(unsigned long data); |
24 | void blk_delete_timer(struct request *); | 22 | void blk_delete_timer(struct request *); |
25 | void blk_add_timer(struct request *); | 23 | void blk_add_timer(struct request *); |
@@ -34,7 +32,7 @@ enum rq_atomic_flags { | |||
34 | 32 | ||
35 | /* | 33 | /* |
36 | * EH timer and IO completion will both attempt to 'grab' the request, make | 34 | * EH timer and IO completion will both attempt to 'grab' the request, make |
37 | * sure that only one of them suceeds | 35 | * sure that only one of them succeeds |
38 | */ | 36 | */ |
39 | static inline int blk_mark_rq_complete(struct request *rq) | 37 | static inline int blk_mark_rq_complete(struct request *rq) |
40 | { | 38 | { |
@@ -51,21 +49,17 @@ static inline void blk_clear_rq_complete(struct request *rq) | |||
51 | */ | 49 | */ |
52 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) | 50 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) |
53 | 51 | ||
54 | struct request *blk_do_flush(struct request_queue *q, struct request *rq); | 52 | void blk_insert_flush(struct request *rq); |
53 | void blk_abort_flushes(struct request_queue *q); | ||
55 | 54 | ||
56 | static inline struct request *__elv_next_request(struct request_queue *q) | 55 | static inline struct request *__elv_next_request(struct request_queue *q) |
57 | { | 56 | { |
58 | struct request *rq; | 57 | struct request *rq; |
59 | 58 | ||
60 | while (1) { | 59 | while (1) { |
61 | while (!list_empty(&q->queue_head)) { | 60 | if (!list_empty(&q->queue_head)) { |
62 | rq = list_entry_rq(q->queue_head.next); | 61 | rq = list_entry_rq(q->queue_head.next); |
63 | if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || | 62 | return rq; |
64 | rq == &q->flush_rq) | ||
65 | return rq; | ||
66 | rq = blk_do_flush(q, rq); | ||
67 | if (rq) | ||
68 | return rq; | ||
69 | } | 63 | } |
70 | 64 | ||
71 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) | 65 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) |
@@ -109,6 +103,8 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, | |||
109 | struct bio *bio); | 103 | struct bio *bio); |
110 | int attempt_back_merge(struct request_queue *q, struct request *rq); | 104 | int attempt_back_merge(struct request_queue *q, struct request *rq); |
111 | int attempt_front_merge(struct request_queue *q, struct request *rq); | 105 | int attempt_front_merge(struct request_queue *q, struct request *rq); |
106 | int blk_attempt_req_merge(struct request_queue *q, struct request *rq, | ||
107 | struct request *next); | ||
112 | void blk_recalc_rq_segments(struct request *rq); | 108 | void blk_recalc_rq_segments(struct request *rq); |
113 | void blk_rq_set_mixed_merge(struct request *rq); | 109 | void blk_rq_set_mixed_merge(struct request *rq); |
114 | 110 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ea83a4f0c27d..5b52011e3a40 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -54,9 +54,9 @@ static const int cfq_hist_divisor = 4; | |||
54 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) | 54 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) |
55 | 55 | ||
56 | #define RQ_CIC(rq) \ | 56 | #define RQ_CIC(rq) \ |
57 | ((struct cfq_io_context *) (rq)->elevator_private) | 57 | ((struct cfq_io_context *) (rq)->elevator_private[0]) |
58 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) | 58 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private[1]) |
59 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) | 59 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private[2]) |
60 | 60 | ||
61 | static struct kmem_cache *cfq_pool; | 61 | static struct kmem_cache *cfq_pool; |
62 | static struct kmem_cache *cfq_ioc_pool; | 62 | static struct kmem_cache *cfq_ioc_pool; |
@@ -146,7 +146,6 @@ struct cfq_queue { | |||
146 | struct cfq_rb_root *service_tree; | 146 | struct cfq_rb_root *service_tree; |
147 | struct cfq_queue *new_cfqq; | 147 | struct cfq_queue *new_cfqq; |
148 | struct cfq_group *cfqg; | 148 | struct cfq_group *cfqg; |
149 | struct cfq_group *orig_cfqg; | ||
150 | /* Number of sectors dispatched from queue in single dispatch round */ | 149 | /* Number of sectors dispatched from queue in single dispatch round */ |
151 | unsigned long nr_sectors; | 150 | unsigned long nr_sectors; |
152 | }; | 151 | }; |
@@ -179,6 +178,8 @@ struct cfq_group { | |||
179 | /* group service_tree key */ | 178 | /* group service_tree key */ |
180 | u64 vdisktime; | 179 | u64 vdisktime; |
181 | unsigned int weight; | 180 | unsigned int weight; |
181 | unsigned int new_weight; | ||
182 | bool needs_update; | ||
182 | 183 | ||
183 | /* number of cfqq currently on this group */ | 184 | /* number of cfqq currently on this group */ |
184 | int nr_cfqq; | 185 | int nr_cfqq; |
@@ -238,6 +239,7 @@ struct cfq_data { | |||
238 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; | 239 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; |
239 | 240 | ||
240 | unsigned int busy_queues; | 241 | unsigned int busy_queues; |
242 | unsigned int busy_sync_queues; | ||
241 | 243 | ||
242 | int rq_in_driver; | 244 | int rq_in_driver; |
243 | int rq_in_flight[2]; | 245 | int rq_in_flight[2]; |
@@ -285,7 +287,6 @@ struct cfq_data { | |||
285 | unsigned int cfq_slice_idle; | 287 | unsigned int cfq_slice_idle; |
286 | unsigned int cfq_group_idle; | 288 | unsigned int cfq_group_idle; |
287 | unsigned int cfq_latency; | 289 | unsigned int cfq_latency; |
288 | unsigned int cfq_group_isolation; | ||
289 | 290 | ||
290 | unsigned int cic_index; | 291 | unsigned int cic_index; |
291 | struct list_head cic_list; | 292 | struct list_head cic_list; |
@@ -501,13 +502,6 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) | |||
501 | } | 502 | } |
502 | } | 503 | } |
503 | 504 | ||
504 | static int cfq_queue_empty(struct request_queue *q) | ||
505 | { | ||
506 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
507 | |||
508 | return !cfqd->rq_queued; | ||
509 | } | ||
510 | |||
511 | /* | 505 | /* |
512 | * Scale schedule slice based on io priority. Use the sync time slice only | 506 | * Scale schedule slice based on io priority. Use the sync time slice only |
513 | * if a queue is marked sync and has sync io queued. A sync queue with async | 507 | * if a queue is marked sync and has sync io queued. A sync queue with async |
@@ -558,15 +552,13 @@ static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) | |||
558 | 552 | ||
559 | static void update_min_vdisktime(struct cfq_rb_root *st) | 553 | static void update_min_vdisktime(struct cfq_rb_root *st) |
560 | { | 554 | { |
561 | u64 vdisktime = st->min_vdisktime; | ||
562 | struct cfq_group *cfqg; | 555 | struct cfq_group *cfqg; |
563 | 556 | ||
564 | if (st->left) { | 557 | if (st->left) { |
565 | cfqg = rb_entry_cfqg(st->left); | 558 | cfqg = rb_entry_cfqg(st->left); |
566 | vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); | 559 | st->min_vdisktime = max_vdisktime(st->min_vdisktime, |
560 | cfqg->vdisktime); | ||
567 | } | 561 | } |
568 | |||
569 | st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime); | ||
570 | } | 562 | } |
571 | 563 | ||
572 | /* | 564 | /* |
@@ -863,7 +855,27 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | |||
863 | } | 855 | } |
864 | 856 | ||
865 | static void | 857 | static void |
866 | cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | 858 | cfq_update_group_weight(struct cfq_group *cfqg) |
859 | { | ||
860 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | ||
861 | if (cfqg->needs_update) { | ||
862 | cfqg->weight = cfqg->new_weight; | ||
863 | cfqg->needs_update = false; | ||
864 | } | ||
865 | } | ||
866 | |||
867 | static void | ||
868 | cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | ||
869 | { | ||
870 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | ||
871 | |||
872 | cfq_update_group_weight(cfqg); | ||
873 | __cfq_group_service_tree_add(st, cfqg); | ||
874 | st->total_weight += cfqg->weight; | ||
875 | } | ||
876 | |||
877 | static void | ||
878 | cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | ||
867 | { | 879 | { |
868 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 880 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
869 | struct cfq_group *__cfqg; | 881 | struct cfq_group *__cfqg; |
@@ -876,7 +888,7 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
876 | /* | 888 | /* |
877 | * Currently put the group at the end. Later implement something | 889 | * Currently put the group at the end. Later implement something |
878 | * so that groups get lesser vtime based on their weights, so that | 890 | * so that groups get lesser vtime based on their weights, so that |
879 | * if group does not loose all if it was not continously backlogged. | 891 | * if group does not loose all if it was not continuously backlogged. |
880 | */ | 892 | */ |
881 | n = rb_last(&st->rb); | 893 | n = rb_last(&st->rb); |
882 | if (n) { | 894 | if (n) { |
@@ -884,13 +896,19 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
884 | cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; | 896 | cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; |
885 | } else | 897 | } else |
886 | cfqg->vdisktime = st->min_vdisktime; | 898 | cfqg->vdisktime = st->min_vdisktime; |
899 | cfq_group_service_tree_add(st, cfqg); | ||
900 | } | ||
887 | 901 | ||
888 | __cfq_group_service_tree_add(st, cfqg); | 902 | static void |
889 | st->total_weight += cfqg->weight; | 903 | cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg) |
904 | { | ||
905 | st->total_weight -= cfqg->weight; | ||
906 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | ||
907 | cfq_rb_erase(&cfqg->rb_node, st); | ||
890 | } | 908 | } |
891 | 909 | ||
892 | static void | 910 | static void |
893 | cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | 911 | cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) |
894 | { | 912 | { |
895 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 913 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
896 | 914 | ||
@@ -902,14 +920,13 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
902 | return; | 920 | return; |
903 | 921 | ||
904 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); | 922 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); |
905 | st->total_weight -= cfqg->weight; | 923 | cfq_group_service_tree_del(st, cfqg); |
906 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | ||
907 | cfq_rb_erase(&cfqg->rb_node, st); | ||
908 | cfqg->saved_workload_slice = 0; | 924 | cfqg->saved_workload_slice = 0; |
909 | cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1); | 925 | cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1); |
910 | } | 926 | } |
911 | 927 | ||
912 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | 928 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq, |
929 | unsigned int *unaccounted_time) | ||
913 | { | 930 | { |
914 | unsigned int slice_used; | 931 | unsigned int slice_used; |
915 | 932 | ||
@@ -928,8 +945,13 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | |||
928 | 1); | 945 | 1); |
929 | } else { | 946 | } else { |
930 | slice_used = jiffies - cfqq->slice_start; | 947 | slice_used = jiffies - cfqq->slice_start; |
931 | if (slice_used > cfqq->allocated_slice) | 948 | if (slice_used > cfqq->allocated_slice) { |
949 | *unaccounted_time = slice_used - cfqq->allocated_slice; | ||
932 | slice_used = cfqq->allocated_slice; | 950 | slice_used = cfqq->allocated_slice; |
951 | } | ||
952 | if (time_after(cfqq->slice_start, cfqq->dispatch_start)) | ||
953 | *unaccounted_time += cfqq->slice_start - | ||
954 | cfqq->dispatch_start; | ||
933 | } | 955 | } |
934 | 956 | ||
935 | return slice_used; | 957 | return slice_used; |
@@ -939,12 +961,12 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
939 | struct cfq_queue *cfqq) | 961 | struct cfq_queue *cfqq) |
940 | { | 962 | { |
941 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 963 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
942 | unsigned int used_sl, charge; | 964 | unsigned int used_sl, charge, unaccounted_sl = 0; |
943 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) | 965 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) |
944 | - cfqg->service_tree_idle.count; | 966 | - cfqg->service_tree_idle.count; |
945 | 967 | ||
946 | BUG_ON(nr_sync < 0); | 968 | BUG_ON(nr_sync < 0); |
947 | used_sl = charge = cfq_cfqq_slice_usage(cfqq); | 969 | used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); |
948 | 970 | ||
949 | if (iops_mode(cfqd)) | 971 | if (iops_mode(cfqd)) |
950 | charge = cfqq->slice_dispatch; | 972 | charge = cfqq->slice_dispatch; |
@@ -952,9 +974,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
952 | charge = cfqq->allocated_slice; | 974 | charge = cfqq->allocated_slice; |
953 | 975 | ||
954 | /* Can't update vdisktime while group is on service tree */ | 976 | /* Can't update vdisktime while group is on service tree */ |
955 | cfq_rb_erase(&cfqg->rb_node, st); | 977 | cfq_group_service_tree_del(st, cfqg); |
956 | cfqg->vdisktime += cfq_scale_slice(charge, cfqg); | 978 | cfqg->vdisktime += cfq_scale_slice(charge, cfqg); |
957 | __cfq_group_service_tree_add(st, cfqg); | 979 | /* If a new weight was requested, update now, off tree */ |
980 | cfq_group_service_tree_add(st, cfqg); | ||
958 | 981 | ||
959 | /* This group is being expired. Save the context */ | 982 | /* This group is being expired. Save the context */ |
960 | if (time_after(cfqd->workload_expires, jiffies)) { | 983 | if (time_after(cfqd->workload_expires, jiffies)) { |
@@ -970,7 +993,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
970 | cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u" | 993 | cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u" |
971 | " sect=%u", used_sl, cfqq->slice_dispatch, charge, | 994 | " sect=%u", used_sl, cfqq->slice_dispatch, charge, |
972 | iops_mode(cfqd), cfqq->nr_sectors); | 995 | iops_mode(cfqd), cfqq->nr_sectors); |
973 | cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); | 996 | cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl, |
997 | unaccounted_sl); | ||
974 | cfq_blkiocg_set_start_empty_time(&cfqg->blkg); | 998 | cfq_blkiocg_set_start_empty_time(&cfqg->blkg); |
975 | } | 999 | } |
976 | 1000 | ||
@@ -985,7 +1009,9 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg) | |||
985 | void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, | 1009 | void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, |
986 | unsigned int weight) | 1010 | unsigned int weight) |
987 | { | 1011 | { |
988 | cfqg_of_blkg(blkg)->weight = weight; | 1012 | struct cfq_group *cfqg = cfqg_of_blkg(blkg); |
1013 | cfqg->new_weight = weight; | ||
1014 | cfqg->needs_update = true; | ||
989 | } | 1015 | } |
990 | 1016 | ||
991 | static struct cfq_group * | 1017 | static struct cfq_group * |
@@ -1187,32 +1213,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1187 | int new_cfqq = 1; | 1213 | int new_cfqq = 1; |
1188 | int group_changed = 0; | 1214 | int group_changed = 0; |
1189 | 1215 | ||
1190 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | ||
1191 | if (!cfqd->cfq_group_isolation | ||
1192 | && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD | ||
1193 | && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) { | ||
1194 | /* Move this cfq to root group */ | ||
1195 | cfq_log_cfqq(cfqd, cfqq, "moving to root group"); | ||
1196 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | ||
1197 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | ||
1198 | cfqq->orig_cfqg = cfqq->cfqg; | ||
1199 | cfqq->cfqg = &cfqd->root_group; | ||
1200 | cfqd->root_group.ref++; | ||
1201 | group_changed = 1; | ||
1202 | } else if (!cfqd->cfq_group_isolation | ||
1203 | && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { | ||
1204 | /* cfqq is sequential now needs to go to its original group */ | ||
1205 | BUG_ON(cfqq->cfqg != &cfqd->root_group); | ||
1206 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | ||
1207 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | ||
1208 | cfq_put_cfqg(cfqq->cfqg); | ||
1209 | cfqq->cfqg = cfqq->orig_cfqg; | ||
1210 | cfqq->orig_cfqg = NULL; | ||
1211 | group_changed = 1; | ||
1212 | cfq_log_cfqq(cfqd, cfqq, "moved to origin group"); | ||
1213 | } | ||
1214 | #endif | ||
1215 | |||
1216 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), | 1216 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), |
1217 | cfqq_type(cfqq)); | 1217 | cfqq_type(cfqq)); |
1218 | if (cfq_class_idle(cfqq)) { | 1218 | if (cfq_class_idle(cfqq)) { |
@@ -1284,7 +1284,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1284 | service_tree->count++; | 1284 | service_tree->count++; |
1285 | if ((add_front || !new_cfqq) && !group_changed) | 1285 | if ((add_front || !new_cfqq) && !group_changed) |
1286 | return; | 1286 | return; |
1287 | cfq_group_service_tree_add(cfqd, cfqq->cfqg); | 1287 | cfq_group_notify_queue_add(cfqd, cfqq->cfqg); |
1288 | } | 1288 | } |
1289 | 1289 | ||
1290 | static struct cfq_queue * | 1290 | static struct cfq_queue * |
@@ -1372,6 +1372,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1372 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 1372 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
1373 | cfq_mark_cfqq_on_rr(cfqq); | 1373 | cfq_mark_cfqq_on_rr(cfqq); |
1374 | cfqd->busy_queues++; | 1374 | cfqd->busy_queues++; |
1375 | if (cfq_cfqq_sync(cfqq)) | ||
1376 | cfqd->busy_sync_queues++; | ||
1375 | 1377 | ||
1376 | cfq_resort_rr_list(cfqd, cfqq); | 1378 | cfq_resort_rr_list(cfqd, cfqq); |
1377 | } | 1379 | } |
@@ -1395,9 +1397,11 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1395 | cfqq->p_root = NULL; | 1397 | cfqq->p_root = NULL; |
1396 | } | 1398 | } |
1397 | 1399 | ||
1398 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | 1400 | cfq_group_notify_queue_del(cfqd, cfqq->cfqg); |
1399 | BUG_ON(!cfqd->busy_queues); | 1401 | BUG_ON(!cfqd->busy_queues); |
1400 | cfqd->busy_queues--; | 1402 | cfqd->busy_queues--; |
1403 | if (cfq_cfqq_sync(cfqq)) | ||
1404 | cfqd->busy_sync_queues--; | ||
1401 | } | 1405 | } |
1402 | 1406 | ||
1403 | /* | 1407 | /* |
@@ -2405,6 +2409,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2405 | * Does this cfqq already have too much IO in flight? | 2409 | * Does this cfqq already have too much IO in flight? |
2406 | */ | 2410 | */ |
2407 | if (cfqq->dispatched >= max_dispatch) { | 2411 | if (cfqq->dispatched >= max_dispatch) { |
2412 | bool promote_sync = false; | ||
2408 | /* | 2413 | /* |
2409 | * idle queue must always only have a single IO in flight | 2414 | * idle queue must always only have a single IO in flight |
2410 | */ | 2415 | */ |
@@ -2412,15 +2417,26 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2412 | return false; | 2417 | return false; |
2413 | 2418 | ||
2414 | /* | 2419 | /* |
2420 | * If there is only one sync queue | ||
2421 | * we can ignore async queue here and give the sync | ||
2422 | * queue no dispatch limit. The reason is a sync queue can | ||
2423 | * preempt async queue, limiting the sync queue doesn't make | ||
2424 | * sense. This is useful for aiostress test. | ||
2425 | */ | ||
2426 | if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1) | ||
2427 | promote_sync = true; | ||
2428 | |||
2429 | /* | ||
2415 | * We have other queues, don't allow more IO from this one | 2430 | * We have other queues, don't allow more IO from this one |
2416 | */ | 2431 | */ |
2417 | if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq)) | 2432 | if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) && |
2433 | !promote_sync) | ||
2418 | return false; | 2434 | return false; |
2419 | 2435 | ||
2420 | /* | 2436 | /* |
2421 | * Sole queue user, no limit | 2437 | * Sole queue user, no limit |
2422 | */ | 2438 | */ |
2423 | if (cfqd->busy_queues == 1) | 2439 | if (cfqd->busy_queues == 1 || promote_sync) |
2424 | max_dispatch = -1; | 2440 | max_dispatch = -1; |
2425 | else | 2441 | else |
2426 | /* | 2442 | /* |
@@ -2542,7 +2558,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) | |||
2542 | static void cfq_put_queue(struct cfq_queue *cfqq) | 2558 | static void cfq_put_queue(struct cfq_queue *cfqq) |
2543 | { | 2559 | { |
2544 | struct cfq_data *cfqd = cfqq->cfqd; | 2560 | struct cfq_data *cfqd = cfqq->cfqd; |
2545 | struct cfq_group *cfqg, *orig_cfqg; | 2561 | struct cfq_group *cfqg; |
2546 | 2562 | ||
2547 | BUG_ON(cfqq->ref <= 0); | 2563 | BUG_ON(cfqq->ref <= 0); |
2548 | 2564 | ||
@@ -2554,7 +2570,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2554 | BUG_ON(rb_first(&cfqq->sort_list)); | 2570 | BUG_ON(rb_first(&cfqq->sort_list)); |
2555 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); | 2571 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); |
2556 | cfqg = cfqq->cfqg; | 2572 | cfqg = cfqq->cfqg; |
2557 | orig_cfqg = cfqq->orig_cfqg; | ||
2558 | 2573 | ||
2559 | if (unlikely(cfqd->active_queue == cfqq)) { | 2574 | if (unlikely(cfqd->active_queue == cfqq)) { |
2560 | __cfq_slice_expired(cfqd, cfqq, 0); | 2575 | __cfq_slice_expired(cfqd, cfqq, 0); |
@@ -2564,33 +2579,23 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2564 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 2579 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
2565 | kmem_cache_free(cfq_pool, cfqq); | 2580 | kmem_cache_free(cfq_pool, cfqq); |
2566 | cfq_put_cfqg(cfqg); | 2581 | cfq_put_cfqg(cfqg); |
2567 | if (orig_cfqg) | ||
2568 | cfq_put_cfqg(orig_cfqg); | ||
2569 | } | 2582 | } |
2570 | 2583 | ||
2571 | /* | 2584 | /* |
2572 | * Must always be called with the rcu_read_lock() held | 2585 | * Call func for each cic attached to this ioc. |
2573 | */ | 2586 | */ |
2574 | static void | 2587 | static void |
2575 | __call_for_each_cic(struct io_context *ioc, | 2588 | call_for_each_cic(struct io_context *ioc, |
2576 | void (*func)(struct io_context *, struct cfq_io_context *)) | 2589 | void (*func)(struct io_context *, struct cfq_io_context *)) |
2577 | { | 2590 | { |
2578 | struct cfq_io_context *cic; | 2591 | struct cfq_io_context *cic; |
2579 | struct hlist_node *n; | 2592 | struct hlist_node *n; |
2580 | 2593 | ||
2594 | rcu_read_lock(); | ||
2595 | |||
2581 | hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) | 2596 | hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list) |
2582 | func(ioc, cic); | 2597 | func(ioc, cic); |
2583 | } | ||
2584 | 2598 | ||
2585 | /* | ||
2586 | * Call func for each cic attached to this ioc. | ||
2587 | */ | ||
2588 | static void | ||
2589 | call_for_each_cic(struct io_context *ioc, | ||
2590 | void (*func)(struct io_context *, struct cfq_io_context *)) | ||
2591 | { | ||
2592 | rcu_read_lock(); | ||
2593 | __call_for_each_cic(ioc, func); | ||
2594 | rcu_read_unlock(); | 2599 | rcu_read_unlock(); |
2595 | } | 2600 | } |
2596 | 2601 | ||
@@ -2651,7 +2656,7 @@ static void cfq_free_io_context(struct io_context *ioc) | |||
2651 | * should be ok to iterate over the known list, we will see all cic's | 2656 | * should be ok to iterate over the known list, we will see all cic's |
2652 | * since no new ones are added. | 2657 | * since no new ones are added. |
2653 | */ | 2658 | */ |
2654 | __call_for_each_cic(ioc, cic_free_func); | 2659 | call_for_each_cic(ioc, cic_free_func); |
2655 | } | 2660 | } |
2656 | 2661 | ||
2657 | static void cfq_put_cooperator(struct cfq_queue *cfqq) | 2662 | static void cfq_put_cooperator(struct cfq_queue *cfqq) |
@@ -3355,7 +3360,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
3355 | cfqd->busy_queues > 1) { | 3360 | cfqd->busy_queues > 1) { |
3356 | cfq_del_timer(cfqd, cfqq); | 3361 | cfq_del_timer(cfqd, cfqq); |
3357 | cfq_clear_cfqq_wait_request(cfqq); | 3362 | cfq_clear_cfqq_wait_request(cfqq); |
3358 | __blk_run_queue(cfqd->queue, false); | 3363 | __blk_run_queue(cfqd->queue); |
3359 | } else { | 3364 | } else { |
3360 | cfq_blkiocg_update_idle_time_stats( | 3365 | cfq_blkiocg_update_idle_time_stats( |
3361 | &cfqq->cfqg->blkg); | 3366 | &cfqq->cfqg->blkg); |
@@ -3370,7 +3375,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
3370 | * this new queue is RT and the current one is BE | 3375 | * this new queue is RT and the current one is BE |
3371 | */ | 3376 | */ |
3372 | cfq_preempt_queue(cfqd, cfqq); | 3377 | cfq_preempt_queue(cfqd, cfqq); |
3373 | __blk_run_queue(cfqd->queue, false); | 3378 | __blk_run_queue(cfqd->queue); |
3374 | } | 3379 | } |
3375 | } | 3380 | } |
3376 | 3381 | ||
@@ -3613,12 +3618,12 @@ static void cfq_put_request(struct request *rq) | |||
3613 | 3618 | ||
3614 | put_io_context(RQ_CIC(rq)->ioc); | 3619 | put_io_context(RQ_CIC(rq)->ioc); |
3615 | 3620 | ||
3616 | rq->elevator_private = NULL; | 3621 | rq->elevator_private[0] = NULL; |
3617 | rq->elevator_private2 = NULL; | 3622 | rq->elevator_private[1] = NULL; |
3618 | 3623 | ||
3619 | /* Put down rq reference on cfqg */ | 3624 | /* Put down rq reference on cfqg */ |
3620 | cfq_put_cfqg(RQ_CFQG(rq)); | 3625 | cfq_put_cfqg(RQ_CFQG(rq)); |
3621 | rq->elevator_private3 = NULL; | 3626 | rq->elevator_private[2] = NULL; |
3622 | 3627 | ||
3623 | cfq_put_queue(cfqq); | 3628 | cfq_put_queue(cfqq); |
3624 | } | 3629 | } |
@@ -3705,13 +3710,12 @@ new_queue: | |||
3705 | } | 3710 | } |
3706 | 3711 | ||
3707 | cfqq->allocated[rw]++; | 3712 | cfqq->allocated[rw]++; |
3708 | cfqq->ref++; | ||
3709 | rq->elevator_private = cic; | ||
3710 | rq->elevator_private2 = cfqq; | ||
3711 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); | ||
3712 | 3713 | ||
3714 | cfqq->ref++; | ||
3715 | rq->elevator_private[0] = cic; | ||
3716 | rq->elevator_private[1] = cfqq; | ||
3717 | rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg); | ||
3713 | spin_unlock_irqrestore(q->queue_lock, flags); | 3718 | spin_unlock_irqrestore(q->queue_lock, flags); |
3714 | |||
3715 | return 0; | 3719 | return 0; |
3716 | 3720 | ||
3717 | queue_fail: | 3721 | queue_fail: |
@@ -3731,7 +3735,7 @@ static void cfq_kick_queue(struct work_struct *work) | |||
3731 | struct request_queue *q = cfqd->queue; | 3735 | struct request_queue *q = cfqd->queue; |
3732 | 3736 | ||
3733 | spin_lock_irq(q->queue_lock); | 3737 | spin_lock_irq(q->queue_lock); |
3734 | __blk_run_queue(cfqd->queue, false); | 3738 | __blk_run_queue(cfqd->queue); |
3735 | spin_unlock_irq(q->queue_lock); | 3739 | spin_unlock_irq(q->queue_lock); |
3736 | } | 3740 | } |
3737 | 3741 | ||
@@ -3953,7 +3957,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3953 | cfqd->cfq_slice_idle = cfq_slice_idle; | 3957 | cfqd->cfq_slice_idle = cfq_slice_idle; |
3954 | cfqd->cfq_group_idle = cfq_group_idle; | 3958 | cfqd->cfq_group_idle = cfq_group_idle; |
3955 | cfqd->cfq_latency = 1; | 3959 | cfqd->cfq_latency = 1; |
3956 | cfqd->cfq_group_isolation = 0; | ||
3957 | cfqd->hw_tag = -1; | 3960 | cfqd->hw_tag = -1; |
3958 | /* | 3961 | /* |
3959 | * we optimistically start assuming sync ops weren't delayed in last | 3962 | * we optimistically start assuming sync ops weren't delayed in last |
@@ -4029,7 +4032,6 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); | |||
4029 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); | 4032 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); |
4030 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); | 4033 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); |
4031 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); | 4034 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); |
4032 | SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0); | ||
4033 | #undef SHOW_FUNCTION | 4035 | #undef SHOW_FUNCTION |
4034 | 4036 | ||
4035 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ | 4037 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ |
@@ -4063,7 +4065,6 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); | |||
4063 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, | 4065 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, |
4064 | UINT_MAX, 0); | 4066 | UINT_MAX, 0); |
4065 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); | 4067 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); |
4066 | STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0); | ||
4067 | #undef STORE_FUNCTION | 4068 | #undef STORE_FUNCTION |
4068 | 4069 | ||
4069 | #define CFQ_ATTR(name) \ | 4070 | #define CFQ_ATTR(name) \ |
@@ -4081,7 +4082,6 @@ static struct elv_fs_entry cfq_attrs[] = { | |||
4081 | CFQ_ATTR(slice_idle), | 4082 | CFQ_ATTR(slice_idle), |
4082 | CFQ_ATTR(group_idle), | 4083 | CFQ_ATTR(group_idle), |
4083 | CFQ_ATTR(low_latency), | 4084 | CFQ_ATTR(low_latency), |
4084 | CFQ_ATTR(group_isolation), | ||
4085 | __ATTR_NULL | 4085 | __ATTR_NULL |
4086 | }; | 4086 | }; |
4087 | 4087 | ||
@@ -4096,7 +4096,6 @@ static struct elevator_type iosched_cfq = { | |||
4096 | .elevator_add_req_fn = cfq_insert_request, | 4096 | .elevator_add_req_fn = cfq_insert_request, |
4097 | .elevator_activate_req_fn = cfq_activate_request, | 4097 | .elevator_activate_req_fn = cfq_activate_request, |
4098 | .elevator_deactivate_req_fn = cfq_deactivate_request, | 4098 | .elevator_deactivate_req_fn = cfq_deactivate_request, |
4099 | .elevator_queue_empty_fn = cfq_queue_empty, | ||
4100 | .elevator_completed_req_fn = cfq_completed_request, | 4099 | .elevator_completed_req_fn = cfq_completed_request, |
4101 | .elevator_former_req_fn = elv_rb_former_request, | 4100 | .elevator_former_req_fn = elv_rb_former_request, |
4102 | .elevator_latter_req_fn = elv_rb_latter_request, | 4101 | .elevator_latter_req_fn = elv_rb_latter_request, |
diff --git a/block/cfq.h b/block/cfq.h index 54a6d90f8e8c..2a155927e37c 100644 --- a/block/cfq.h +++ b/block/cfq.h | |||
@@ -16,9 +16,9 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg, | |||
16 | } | 16 | } |
17 | 17 | ||
18 | static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg, | 18 | static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg, |
19 | unsigned long time) | 19 | unsigned long time, unsigned long unaccounted_time) |
20 | { | 20 | { |
21 | blkiocg_update_timeslice_used(blkg, time); | 21 | blkiocg_update_timeslice_used(blkg, time, unaccounted_time); |
22 | } | 22 | } |
23 | 23 | ||
24 | static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) | 24 | static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) |
@@ -85,7 +85,7 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg, | |||
85 | unsigned long dequeue) {} | 85 | unsigned long dequeue) {} |
86 | 86 | ||
87 | static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg, | 87 | static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg, |
88 | unsigned long time) {} | 88 | unsigned long time, unsigned long unaccounted_time) {} |
89 | static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) {} | 89 | static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) {} |
90 | static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg, | 90 | static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg, |
91 | bool direction, bool sync) {} | 91 | bool direction, bool sync) {} |
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index b547cbca7b23..5139c0ea1864 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c | |||
@@ -326,14 +326,6 @@ dispatch_request: | |||
326 | return 1; | 326 | return 1; |
327 | } | 327 | } |
328 | 328 | ||
329 | static int deadline_queue_empty(struct request_queue *q) | ||
330 | { | ||
331 | struct deadline_data *dd = q->elevator->elevator_data; | ||
332 | |||
333 | return list_empty(&dd->fifo_list[WRITE]) | ||
334 | && list_empty(&dd->fifo_list[READ]); | ||
335 | } | ||
336 | |||
337 | static void deadline_exit_queue(struct elevator_queue *e) | 329 | static void deadline_exit_queue(struct elevator_queue *e) |
338 | { | 330 | { |
339 | struct deadline_data *dd = e->elevator_data; | 331 | struct deadline_data *dd = e->elevator_data; |
@@ -445,7 +437,6 @@ static struct elevator_type iosched_deadline = { | |||
445 | .elevator_merge_req_fn = deadline_merged_requests, | 437 | .elevator_merge_req_fn = deadline_merged_requests, |
446 | .elevator_dispatch_fn = deadline_dispatch_requests, | 438 | .elevator_dispatch_fn = deadline_dispatch_requests, |
447 | .elevator_add_req_fn = deadline_add_request, | 439 | .elevator_add_req_fn = deadline_add_request, |
448 | .elevator_queue_empty_fn = deadline_queue_empty, | ||
449 | .elevator_former_req_fn = elv_rb_former_request, | 440 | .elevator_former_req_fn = elv_rb_former_request, |
450 | .elevator_latter_req_fn = elv_rb_latter_request, | 441 | .elevator_latter_req_fn = elv_rb_latter_request, |
451 | .elevator_init_fn = deadline_init_queue, | 442 | .elevator_init_fn = deadline_init_queue, |
diff --git a/block/elevator.c b/block/elevator.c index 236e93c1f46c..45ca1e34f582 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) | |||
113 | } | 113 | } |
114 | EXPORT_SYMBOL(elv_rq_merge_ok); | 114 | EXPORT_SYMBOL(elv_rq_merge_ok); |
115 | 115 | ||
116 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) | 116 | int elv_try_merge(struct request *__rq, struct bio *bio) |
117 | { | 117 | { |
118 | int ret = ELEVATOR_NO_MERGE; | 118 | int ret = ELEVATOR_NO_MERGE; |
119 | 119 | ||
@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) | |||
421 | struct list_head *entry; | 421 | struct list_head *entry; |
422 | int stop_flags; | 422 | int stop_flags; |
423 | 423 | ||
424 | BUG_ON(rq->cmd_flags & REQ_ON_PLUG); | ||
425 | |||
424 | if (q->last_merge == rq) | 426 | if (q->last_merge == rq) |
425 | q->last_merge = NULL; | 427 | q->last_merge = NULL; |
426 | 428 | ||
@@ -519,6 +521,40 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) | |||
519 | return ELEVATOR_NO_MERGE; | 521 | return ELEVATOR_NO_MERGE; |
520 | } | 522 | } |
521 | 523 | ||
524 | /* | ||
525 | * Attempt to do an insertion back merge. Only check for the case where | ||
526 | * we can append 'rq' to an existing request, so we can throw 'rq' away | ||
527 | * afterwards. | ||
528 | * | ||
529 | * Returns true if we merged, false otherwise | ||
530 | */ | ||
531 | static bool elv_attempt_insert_merge(struct request_queue *q, | ||
532 | struct request *rq) | ||
533 | { | ||
534 | struct request *__rq; | ||
535 | |||
536 | if (blk_queue_nomerges(q)) | ||
537 | return false; | ||
538 | |||
539 | /* | ||
540 | * First try one-hit cache. | ||
541 | */ | ||
542 | if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) | ||
543 | return true; | ||
544 | |||
545 | if (blk_queue_noxmerges(q)) | ||
546 | return false; | ||
547 | |||
548 | /* | ||
549 | * See if our hash lookup can find a potential backmerge. | ||
550 | */ | ||
551 | __rq = elv_rqhash_find(q, blk_rq_pos(rq)); | ||
552 | if (__rq && blk_attempt_req_merge(q, __rq, rq)) | ||
553 | return true; | ||
554 | |||
555 | return false; | ||
556 | } | ||
557 | |||
522 | void elv_merged_request(struct request_queue *q, struct request *rq, int type) | 558 | void elv_merged_request(struct request_queue *q, struct request *rq, int type) |
523 | { | 559 | { |
524 | struct elevator_queue *e = q->elevator; | 560 | struct elevator_queue *e = q->elevator; |
@@ -536,14 +572,18 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, | |||
536 | struct request *next) | 572 | struct request *next) |
537 | { | 573 | { |
538 | struct elevator_queue *e = q->elevator; | 574 | struct elevator_queue *e = q->elevator; |
575 | const int next_sorted = next->cmd_flags & REQ_SORTED; | ||
539 | 576 | ||
540 | if (e->ops->elevator_merge_req_fn) | 577 | if (next_sorted && e->ops->elevator_merge_req_fn) |
541 | e->ops->elevator_merge_req_fn(q, rq, next); | 578 | e->ops->elevator_merge_req_fn(q, rq, next); |
542 | 579 | ||
543 | elv_rqhash_reposition(q, rq); | 580 | elv_rqhash_reposition(q, rq); |
544 | elv_rqhash_del(q, next); | ||
545 | 581 | ||
546 | q->nr_sorted--; | 582 | if (next_sorted) { |
583 | elv_rqhash_del(q, next); | ||
584 | q->nr_sorted--; | ||
585 | } | ||
586 | |||
547 | q->last_merge = rq; | 587 | q->last_merge = rq; |
548 | } | 588 | } |
549 | 589 | ||
@@ -570,7 +610,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) | |||
570 | 610 | ||
571 | rq->cmd_flags &= ~REQ_STARTED; | 611 | rq->cmd_flags &= ~REQ_STARTED; |
572 | 612 | ||
573 | elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); | 613 | __elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE); |
574 | } | 614 | } |
575 | 615 | ||
576 | void elv_drain_elevator(struct request_queue *q) | 616 | void elv_drain_elevator(struct request_queue *q) |
@@ -602,7 +642,7 @@ void elv_quiesce_start(struct request_queue *q) | |||
602 | */ | 642 | */ |
603 | elv_drain_elevator(q); | 643 | elv_drain_elevator(q); |
604 | while (q->rq.elvpriv) { | 644 | while (q->rq.elvpriv) { |
605 | __blk_run_queue(q, false); | 645 | __blk_run_queue(q); |
606 | spin_unlock_irq(q->queue_lock); | 646 | spin_unlock_irq(q->queue_lock); |
607 | msleep(10); | 647 | msleep(10); |
608 | spin_lock_irq(q->queue_lock); | 648 | spin_lock_irq(q->queue_lock); |
@@ -615,23 +655,28 @@ void elv_quiesce_end(struct request_queue *q) | |||
615 | queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); | 655 | queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); |
616 | } | 656 | } |
617 | 657 | ||
618 | void elv_insert(struct request_queue *q, struct request *rq, int where) | 658 | void __elv_add_request(struct request_queue *q, struct request *rq, int where) |
619 | { | 659 | { |
620 | int unplug_it = 1; | ||
621 | |||
622 | trace_block_rq_insert(q, rq); | 660 | trace_block_rq_insert(q, rq); |
623 | 661 | ||
624 | rq->q = q; | 662 | rq->q = q; |
625 | 663 | ||
664 | BUG_ON(rq->cmd_flags & REQ_ON_PLUG); | ||
665 | |||
666 | if (rq->cmd_flags & REQ_SOFTBARRIER) { | ||
667 | /* barriers are scheduling boundary, update end_sector */ | ||
668 | if (rq->cmd_type == REQ_TYPE_FS || | ||
669 | (rq->cmd_flags & REQ_DISCARD)) { | ||
670 | q->end_sector = rq_end_sector(rq); | ||
671 | q->boundary_rq = rq; | ||
672 | } | ||
673 | } else if (!(rq->cmd_flags & REQ_ELVPRIV) && | ||
674 | (where == ELEVATOR_INSERT_SORT || | ||
675 | where == ELEVATOR_INSERT_SORT_MERGE)) | ||
676 | where = ELEVATOR_INSERT_BACK; | ||
677 | |||
626 | switch (where) { | 678 | switch (where) { |
627 | case ELEVATOR_INSERT_REQUEUE: | 679 | case ELEVATOR_INSERT_REQUEUE: |
628 | /* | ||
629 | * Most requeues happen because of a busy condition, | ||
630 | * don't force unplug of the queue for that case. | ||
631 | * Clear unplug_it and fall through. | ||
632 | */ | ||
633 | unplug_it = 0; | ||
634 | |||
635 | case ELEVATOR_INSERT_FRONT: | 680 | case ELEVATOR_INSERT_FRONT: |
636 | rq->cmd_flags |= REQ_SOFTBARRIER; | 681 | rq->cmd_flags |= REQ_SOFTBARRIER; |
637 | list_add(&rq->queuelist, &q->queue_head); | 682 | list_add(&rq->queuelist, &q->queue_head); |
@@ -651,9 +696,17 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) | |||
651 | * with anything. There's no point in delaying queue | 696 | * with anything. There's no point in delaying queue |
652 | * processing. | 697 | * processing. |
653 | */ | 698 | */ |
654 | __blk_run_queue(q, false); | 699 | __blk_run_queue(q); |
655 | break; | 700 | break; |
656 | 701 | ||
702 | case ELEVATOR_INSERT_SORT_MERGE: | ||
703 | /* | ||
704 | * If we succeed in merging this request with one in the | ||
705 | * queue already, we are done - rq has now been freed, | ||
706 | * so no need to do anything further. | ||
707 | */ | ||
708 | if (elv_attempt_insert_merge(q, rq)) | ||
709 | break; | ||
657 | case ELEVATOR_INSERT_SORT: | 710 | case ELEVATOR_INSERT_SORT: |
658 | BUG_ON(rq->cmd_type != REQ_TYPE_FS && | 711 | BUG_ON(rq->cmd_type != REQ_TYPE_FS && |
659 | !(rq->cmd_flags & REQ_DISCARD)); | 712 | !(rq->cmd_flags & REQ_DISCARD)); |
@@ -673,67 +726,28 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) | |||
673 | q->elevator->ops->elevator_add_req_fn(q, rq); | 726 | q->elevator->ops->elevator_add_req_fn(q, rq); |
674 | break; | 727 | break; |
675 | 728 | ||
729 | case ELEVATOR_INSERT_FLUSH: | ||
730 | rq->cmd_flags |= REQ_SOFTBARRIER; | ||
731 | blk_insert_flush(rq); | ||
732 | break; | ||
676 | default: | 733 | default: |
677 | printk(KERN_ERR "%s: bad insertion point %d\n", | 734 | printk(KERN_ERR "%s: bad insertion point %d\n", |
678 | __func__, where); | 735 | __func__, where); |
679 | BUG(); | 736 | BUG(); |
680 | } | 737 | } |
681 | |||
682 | if (unplug_it && blk_queue_plugged(q)) { | ||
683 | int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC] | ||
684 | - queue_in_flight(q); | ||
685 | |||
686 | if (nrq >= q->unplug_thresh) | ||
687 | __generic_unplug_device(q); | ||
688 | } | ||
689 | } | ||
690 | |||
691 | void __elv_add_request(struct request_queue *q, struct request *rq, int where, | ||
692 | int plug) | ||
693 | { | ||
694 | if (rq->cmd_flags & REQ_SOFTBARRIER) { | ||
695 | /* barriers are scheduling boundary, update end_sector */ | ||
696 | if (rq->cmd_type == REQ_TYPE_FS || | ||
697 | (rq->cmd_flags & REQ_DISCARD)) { | ||
698 | q->end_sector = rq_end_sector(rq); | ||
699 | q->boundary_rq = rq; | ||
700 | } | ||
701 | } else if (!(rq->cmd_flags & REQ_ELVPRIV) && | ||
702 | where == ELEVATOR_INSERT_SORT) | ||
703 | where = ELEVATOR_INSERT_BACK; | ||
704 | |||
705 | if (plug) | ||
706 | blk_plug_device(q); | ||
707 | |||
708 | elv_insert(q, rq, where); | ||
709 | } | 738 | } |
710 | EXPORT_SYMBOL(__elv_add_request); | 739 | EXPORT_SYMBOL(__elv_add_request); |
711 | 740 | ||
712 | void elv_add_request(struct request_queue *q, struct request *rq, int where, | 741 | void elv_add_request(struct request_queue *q, struct request *rq, int where) |
713 | int plug) | ||
714 | { | 742 | { |
715 | unsigned long flags; | 743 | unsigned long flags; |
716 | 744 | ||
717 | spin_lock_irqsave(q->queue_lock, flags); | 745 | spin_lock_irqsave(q->queue_lock, flags); |
718 | __elv_add_request(q, rq, where, plug); | 746 | __elv_add_request(q, rq, where); |
719 | spin_unlock_irqrestore(q->queue_lock, flags); | 747 | spin_unlock_irqrestore(q->queue_lock, flags); |
720 | } | 748 | } |
721 | EXPORT_SYMBOL(elv_add_request); | 749 | EXPORT_SYMBOL(elv_add_request); |
722 | 750 | ||
723 | int elv_queue_empty(struct request_queue *q) | ||
724 | { | ||
725 | struct elevator_queue *e = q->elevator; | ||
726 | |||
727 | if (!list_empty(&q->queue_head)) | ||
728 | return 0; | ||
729 | |||
730 | if (e->ops->elevator_queue_empty_fn) | ||
731 | return e->ops->elevator_queue_empty_fn(q); | ||
732 | |||
733 | return 1; | ||
734 | } | ||
735 | EXPORT_SYMBOL(elv_queue_empty); | ||
736 | |||
737 | struct request *elv_latter_request(struct request_queue *q, struct request *rq) | 751 | struct request *elv_latter_request(struct request_queue *q, struct request *rq) |
738 | { | 752 | { |
739 | struct elevator_queue *e = q->elevator; | 753 | struct elevator_queue *e = q->elevator; |
@@ -759,7 +773,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | |||
759 | if (e->ops->elevator_set_req_fn) | 773 | if (e->ops->elevator_set_req_fn) |
760 | return e->ops->elevator_set_req_fn(q, rq, gfp_mask); | 774 | return e->ops->elevator_set_req_fn(q, rq, gfp_mask); |
761 | 775 | ||
762 | rq->elevator_private = NULL; | 776 | rq->elevator_private[0] = NULL; |
763 | return 0; | 777 | return 0; |
764 | } | 778 | } |
765 | 779 | ||
@@ -785,6 +799,8 @@ void elv_abort_queue(struct request_queue *q) | |||
785 | { | 799 | { |
786 | struct request *rq; | 800 | struct request *rq; |
787 | 801 | ||
802 | blk_abort_flushes(q); | ||
803 | |||
788 | while (!list_empty(&q->queue_head)) { | 804 | while (!list_empty(&q->queue_head)) { |
789 | rq = list_entry_rq(q->queue_head.next); | 805 | rq = list_entry_rq(q->queue_head.next); |
790 | rq->cmd_flags |= REQ_QUIET; | 806 | rq->cmd_flags |= REQ_QUIET; |
diff --git a/block/genhd.c b/block/genhd.c index cbf1112a885c..2dd988723d73 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -739,7 +739,7 @@ void __init printk_all_partitions(void) | |||
739 | 739 | ||
740 | /* | 740 | /* |
741 | * Don't show empty devices or things that have been | 741 | * Don't show empty devices or things that have been |
742 | * surpressed | 742 | * suppressed |
743 | */ | 743 | */ |
744 | if (get_capacity(disk) == 0 || | 744 | if (get_capacity(disk) == 0 || |
745 | (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) | 745 | (disk->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)) |
@@ -1158,14 +1158,14 @@ static int diskstats_show(struct seq_file *seqf, void *v) | |||
1158 | "%u %lu %lu %llu %u %u %u %u\n", | 1158 | "%u %lu %lu %llu %u %u %u %u\n", |
1159 | MAJOR(part_devt(hd)), MINOR(part_devt(hd)), | 1159 | MAJOR(part_devt(hd)), MINOR(part_devt(hd)), |
1160 | disk_name(gp, hd->partno, buf), | 1160 | disk_name(gp, hd->partno, buf), |
1161 | part_stat_read(hd, ios[0]), | 1161 | part_stat_read(hd, ios[READ]), |
1162 | part_stat_read(hd, merges[0]), | 1162 | part_stat_read(hd, merges[READ]), |
1163 | (unsigned long long)part_stat_read(hd, sectors[0]), | 1163 | (unsigned long long)part_stat_read(hd, sectors[READ]), |
1164 | jiffies_to_msecs(part_stat_read(hd, ticks[0])), | 1164 | jiffies_to_msecs(part_stat_read(hd, ticks[READ])), |
1165 | part_stat_read(hd, ios[1]), | 1165 | part_stat_read(hd, ios[WRITE]), |
1166 | part_stat_read(hd, merges[1]), | 1166 | part_stat_read(hd, merges[WRITE]), |
1167 | (unsigned long long)part_stat_read(hd, sectors[1]), | 1167 | (unsigned long long)part_stat_read(hd, sectors[WRITE]), |
1168 | jiffies_to_msecs(part_stat_read(hd, ticks[1])), | 1168 | jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), |
1169 | part_in_flight(hd), | 1169 | part_in_flight(hd), |
1170 | jiffies_to_msecs(part_stat_read(hd, io_ticks)), | 1170 | jiffies_to_msecs(part_stat_read(hd, io_ticks)), |
1171 | jiffies_to_msecs(part_stat_read(hd, time_in_queue)) | 1171 | jiffies_to_msecs(part_stat_read(hd, time_in_queue)) |
@@ -1494,7 +1494,7 @@ void disk_block_events(struct gendisk *disk) | |||
1494 | void disk_unblock_events(struct gendisk *disk) | 1494 | void disk_unblock_events(struct gendisk *disk) |
1495 | { | 1495 | { |
1496 | if (disk->ev) | 1496 | if (disk->ev) |
1497 | __disk_unblock_events(disk, true); | 1497 | __disk_unblock_events(disk, false); |
1498 | } | 1498 | } |
1499 | 1499 | ||
1500 | /** | 1500 | /** |
@@ -1588,9 +1588,13 @@ static void disk_events_workfn(struct work_struct *work) | |||
1588 | 1588 | ||
1589 | spin_unlock_irq(&ev->lock); | 1589 | spin_unlock_irq(&ev->lock); |
1590 | 1590 | ||
1591 | /* tell userland about new events */ | 1591 | /* |
1592 | * Tell userland about new events. Only the events listed in | ||
1593 | * @disk->events are reported. Unlisted events are processed the | ||
1594 | * same internally but never get reported to userland. | ||
1595 | */ | ||
1592 | for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) | 1596 | for (i = 0; i < ARRAY_SIZE(disk_uevents); i++) |
1593 | if (events & (1 << i)) | 1597 | if (events & disk->events & (1 << i)) |
1594 | envp[nr_events++] = disk_uevents[i]; | 1598 | envp[nr_events++] = disk_uevents[i]; |
1595 | 1599 | ||
1596 | if (nr_events) | 1600 | if (nr_events) |
diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 232c4b38cd37..06389e9ef96d 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c | |||
@@ -39,13 +39,6 @@ static void noop_add_request(struct request_queue *q, struct request *rq) | |||
39 | list_add_tail(&rq->queuelist, &nd->queue); | 39 | list_add_tail(&rq->queuelist, &nd->queue); |
40 | } | 40 | } |
41 | 41 | ||
42 | static int noop_queue_empty(struct request_queue *q) | ||
43 | { | ||
44 | struct noop_data *nd = q->elevator->elevator_data; | ||
45 | |||
46 | return list_empty(&nd->queue); | ||
47 | } | ||
48 | |||
49 | static struct request * | 42 | static struct request * |
50 | noop_former_request(struct request_queue *q, struct request *rq) | 43 | noop_former_request(struct request_queue *q, struct request *rq) |
51 | { | 44 | { |
@@ -90,7 +83,6 @@ static struct elevator_type elevator_noop = { | |||
90 | .elevator_merge_req_fn = noop_merged_requests, | 83 | .elevator_merge_req_fn = noop_merged_requests, |
91 | .elevator_dispatch_fn = noop_dispatch, | 84 | .elevator_dispatch_fn = noop_dispatch, |
92 | .elevator_add_req_fn = noop_add_request, | 85 | .elevator_add_req_fn = noop_add_request, |
93 | .elevator_queue_empty_fn = noop_queue_empty, | ||
94 | .elevator_former_req_fn = noop_former_request, | 86 | .elevator_former_req_fn = noop_former_request, |
95 | .elevator_latter_req_fn = noop_latter_request, | 87 | .elevator_latter_req_fn = noop_latter_request, |
96 | .elevator_init_fn = noop_init_queue, | 88 | .elevator_init_fn = noop_init_queue, |