diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-24 13:16:26 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-24 13:16:26 -0400 |
commit | 6c5103890057b1bb781b26b7aae38d33e4c517d8 (patch) | |
tree | e6e57961dcddcb5841acb34956e70b9dc696a880 /block | |
parent | 3dab04e6978e358ad2307bca563fabd6c5d2c58b (diff) | |
parent | 9d2e157d970a73b3f270b631828e03eb452d525e (diff) |
Merge branch 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.39/core' of git://git.kernel.dk/linux-2.6-block: (65 commits)
Documentation/iostats.txt: bit-size reference etc.
cfq-iosched: removing unnecessary think time checking
cfq-iosched: Don't clear queue stats when preempt.
blk-throttle: Reset group slice when limits are changed
blk-cgroup: Only give unaccounted_time under debug
cfq-iosched: Don't set active queue in preempt
block: fix non-atomic access to genhd inflight structures
block: attempt to merge with existing requests on plug flush
block: NULL dereference on error path in __blkdev_get()
cfq-iosched: Don't update group weights when on service tree
fs: assign sb->s_bdi to default_backing_dev_info if the bdi is going away
block: Require subsystems to explicitly allocate bio_set integrity mempool
jbd2: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging
jbd: finish conversion from WRITE_SYNC_PLUG to WRITE_SYNC and explicit plugging
fs: make fsync_buffers_list() plug
mm: make generic_writepages() use plugging
blk-cgroup: Add unaccounted time to timeslice_used.
block: fixup plugging stubs for !CONFIG_BLOCK
block: remove obsolete comments for blkdev_issue_zeroout.
blktrace: Use rq->cmd_flags directly in blk_add_trace_rq.
...
Fix up conflicts in fs/{aio.c,super.c}
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 16 | ||||
-rw-r--r-- | block/blk-cgroup.h | 14 | ||||
-rw-r--r-- | block/blk-core.c | 646 | ||||
-rw-r--r-- | block/blk-exec.c | 4 | ||||
-rw-r--r-- | block/blk-flush.c | 439 | ||||
-rw-r--r-- | block/blk-lib.c | 2 | ||||
-rw-r--r-- | block/blk-merge.c | 6 | ||||
-rw-r--r-- | block/blk-settings.c | 15 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/blk-throttle.c | 139 | ||||
-rw-r--r-- | block/blk.h | 16 | ||||
-rw-r--r-- | block/cfq-iosched.c | 163 | ||||
-rw-r--r-- | block/cfq.h | 6 | ||||
-rw-r--r-- | block/deadline-iosched.c | 9 | ||||
-rw-r--r-- | block/elevator.c | 108 | ||||
-rw-r--r-- | block/genhd.c | 18 | ||||
-rw-r--r-- | block/noop-iosched.c | 8 |
17 files changed, 955 insertions, 656 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 455768a3eb9e..2bef5705ce24 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -371,12 +371,14 @@ void blkiocg_update_io_remove_stats(struct blkio_group *blkg, | |||
371 | } | 371 | } |
372 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); | 372 | EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats); |
373 | 373 | ||
374 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time) | 374 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time, |
375 | unsigned long unaccounted_time) | ||
375 | { | 376 | { |
376 | unsigned long flags; | 377 | unsigned long flags; |
377 | 378 | ||
378 | spin_lock_irqsave(&blkg->stats_lock, flags); | 379 | spin_lock_irqsave(&blkg->stats_lock, flags); |
379 | blkg->stats.time += time; | 380 | blkg->stats.time += time; |
381 | blkg->stats.unaccounted_time += unaccounted_time; | ||
380 | spin_unlock_irqrestore(&blkg->stats_lock, flags); | 382 | spin_unlock_irqrestore(&blkg->stats_lock, flags); |
381 | } | 383 | } |
382 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); | 384 | EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used); |
@@ -604,6 +606,9 @@ static uint64_t blkio_get_stat(struct blkio_group *blkg, | |||
604 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | 606 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, |
605 | blkg->stats.sectors, cb, dev); | 607 | blkg->stats.sectors, cb, dev); |
606 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 608 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
609 | if (type == BLKIO_STAT_UNACCOUNTED_TIME) | ||
610 | return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, | ||
611 | blkg->stats.unaccounted_time, cb, dev); | ||
607 | if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { | 612 | if (type == BLKIO_STAT_AVG_QUEUE_SIZE) { |
608 | uint64_t sum = blkg->stats.avg_queue_size_sum; | 613 | uint64_t sum = blkg->stats.avg_queue_size_sum; |
609 | uint64_t samples = blkg->stats.avg_queue_size_samples; | 614 | uint64_t samples = blkg->stats.avg_queue_size_samples; |
@@ -1125,6 +1130,9 @@ static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft, | |||
1125 | return blkio_read_blkg_stats(blkcg, cft, cb, | 1130 | return blkio_read_blkg_stats(blkcg, cft, cb, |
1126 | BLKIO_STAT_QUEUED, 1); | 1131 | BLKIO_STAT_QUEUED, 1); |
1127 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 1132 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
1133 | case BLKIO_PROP_unaccounted_time: | ||
1134 | return blkio_read_blkg_stats(blkcg, cft, cb, | ||
1135 | BLKIO_STAT_UNACCOUNTED_TIME, 0); | ||
1128 | case BLKIO_PROP_dequeue: | 1136 | case BLKIO_PROP_dequeue: |
1129 | return blkio_read_blkg_stats(blkcg, cft, cb, | 1137 | return blkio_read_blkg_stats(blkcg, cft, cb, |
1130 | BLKIO_STAT_DEQUEUE, 0); | 1138 | BLKIO_STAT_DEQUEUE, 0); |
@@ -1382,6 +1390,12 @@ struct cftype blkio_files[] = { | |||
1382 | BLKIO_PROP_dequeue), | 1390 | BLKIO_PROP_dequeue), |
1383 | .read_map = blkiocg_file_read_map, | 1391 | .read_map = blkiocg_file_read_map, |
1384 | }, | 1392 | }, |
1393 | { | ||
1394 | .name = "unaccounted_time", | ||
1395 | .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP, | ||
1396 | BLKIO_PROP_unaccounted_time), | ||
1397 | .read_map = blkiocg_file_read_map, | ||
1398 | }, | ||
1385 | #endif | 1399 | #endif |
1386 | }; | 1400 | }; |
1387 | 1401 | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index ea4861bdd549..10919fae2d3a 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -49,6 +49,8 @@ enum stat_type { | |||
49 | /* All the single valued stats go below this */ | 49 | /* All the single valued stats go below this */ |
50 | BLKIO_STAT_TIME, | 50 | BLKIO_STAT_TIME, |
51 | BLKIO_STAT_SECTORS, | 51 | BLKIO_STAT_SECTORS, |
52 | /* Time not charged to this cgroup */ | ||
53 | BLKIO_STAT_UNACCOUNTED_TIME, | ||
52 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 54 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
53 | BLKIO_STAT_AVG_QUEUE_SIZE, | 55 | BLKIO_STAT_AVG_QUEUE_SIZE, |
54 | BLKIO_STAT_IDLE_TIME, | 56 | BLKIO_STAT_IDLE_TIME, |
@@ -81,6 +83,7 @@ enum blkcg_file_name_prop { | |||
81 | BLKIO_PROP_io_serviced, | 83 | BLKIO_PROP_io_serviced, |
82 | BLKIO_PROP_time, | 84 | BLKIO_PROP_time, |
83 | BLKIO_PROP_sectors, | 85 | BLKIO_PROP_sectors, |
86 | BLKIO_PROP_unaccounted_time, | ||
84 | BLKIO_PROP_io_service_time, | 87 | BLKIO_PROP_io_service_time, |
85 | BLKIO_PROP_io_wait_time, | 88 | BLKIO_PROP_io_wait_time, |
86 | BLKIO_PROP_io_merged, | 89 | BLKIO_PROP_io_merged, |
@@ -114,6 +117,8 @@ struct blkio_group_stats { | |||
114 | /* total disk time and nr sectors dispatched by this group */ | 117 | /* total disk time and nr sectors dispatched by this group */ |
115 | uint64_t time; | 118 | uint64_t time; |
116 | uint64_t sectors; | 119 | uint64_t sectors; |
120 | /* Time not charged to this cgroup */ | ||
121 | uint64_t unaccounted_time; | ||
117 | uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL]; | 122 | uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL]; |
118 | #ifdef CONFIG_DEBUG_BLK_CGROUP | 123 | #ifdef CONFIG_DEBUG_BLK_CGROUP |
119 | /* Sum of number of IOs queued across all samples */ | 124 | /* Sum of number of IOs queued across all samples */ |
@@ -240,7 +245,7 @@ static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } | |||
240 | 245 | ||
241 | #endif | 246 | #endif |
242 | 247 | ||
243 | #define BLKIO_WEIGHT_MIN 100 | 248 | #define BLKIO_WEIGHT_MIN 10 |
244 | #define BLKIO_WEIGHT_MAX 1000 | 249 | #define BLKIO_WEIGHT_MAX 1000 |
245 | #define BLKIO_WEIGHT_DEFAULT 500 | 250 | #define BLKIO_WEIGHT_DEFAULT 500 |
246 | 251 | ||
@@ -293,7 +298,8 @@ extern int blkiocg_del_blkio_group(struct blkio_group *blkg); | |||
293 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | 298 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, |
294 | void *key); | 299 | void *key); |
295 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, | 300 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
296 | unsigned long time); | 301 | unsigned long time, |
302 | unsigned long unaccounted_time); | ||
297 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes, | 303 | void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes, |
298 | bool direction, bool sync); | 304 | bool direction, bool sync); |
299 | void blkiocg_update_completion_stats(struct blkio_group *blkg, | 305 | void blkiocg_update_completion_stats(struct blkio_group *blkg, |
@@ -319,7 +325,9 @@ blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } | |||
319 | static inline struct blkio_group * | 325 | static inline struct blkio_group * |
320 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } | 326 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } |
321 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, | 327 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
322 | unsigned long time) {} | 328 | unsigned long time, |
329 | unsigned long unaccounted_time) | ||
330 | {} | ||
323 | static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg, | 331 | static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg, |
324 | uint64_t bytes, bool direction, bool sync) {} | 332 | uint64_t bytes, bool direction, bool sync) {} |
325 | static inline void blkiocg_update_completion_stats(struct blkio_group *blkg, | 333 | static inline void blkiocg_update_completion_stats(struct blkio_group *blkg, |
diff --git a/block/blk-core.c b/block/blk-core.c index a63336d49f30..59b5c00c0126 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/writeback.h> | 27 | #include <linux/writeback.h> |
28 | #include <linux/task_io_accounting_ops.h> | 28 | #include <linux/task_io_accounting_ops.h> |
29 | #include <linux/fault-inject.h> | 29 | #include <linux/fault-inject.h> |
30 | #include <linux/list_sort.h> | ||
30 | 31 | ||
31 | #define CREATE_TRACE_POINTS | 32 | #define CREATE_TRACE_POINTS |
32 | #include <trace/events/block.h> | 33 | #include <trace/events/block.h> |
@@ -149,39 +150,29 @@ EXPORT_SYMBOL(blk_rq_init); | |||
149 | static void req_bio_endio(struct request *rq, struct bio *bio, | 150 | static void req_bio_endio(struct request *rq, struct bio *bio, |
150 | unsigned int nbytes, int error) | 151 | unsigned int nbytes, int error) |
151 | { | 152 | { |
152 | struct request_queue *q = rq->q; | 153 | if (error) |
153 | 154 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | |
154 | if (&q->flush_rq != rq) { | 155 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) |
155 | if (error) | 156 | error = -EIO; |
156 | clear_bit(BIO_UPTODATE, &bio->bi_flags); | ||
157 | else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
158 | error = -EIO; | ||
159 | 157 | ||
160 | if (unlikely(nbytes > bio->bi_size)) { | 158 | if (unlikely(nbytes > bio->bi_size)) { |
161 | printk(KERN_ERR "%s: want %u bytes done, %u left\n", | 159 | printk(KERN_ERR "%s: want %u bytes done, %u left\n", |
162 | __func__, nbytes, bio->bi_size); | 160 | __func__, nbytes, bio->bi_size); |
163 | nbytes = bio->bi_size; | 161 | nbytes = bio->bi_size; |
164 | } | 162 | } |
165 | 163 | ||
166 | if (unlikely(rq->cmd_flags & REQ_QUIET)) | 164 | if (unlikely(rq->cmd_flags & REQ_QUIET)) |
167 | set_bit(BIO_QUIET, &bio->bi_flags); | 165 | set_bit(BIO_QUIET, &bio->bi_flags); |
168 | 166 | ||
169 | bio->bi_size -= nbytes; | 167 | bio->bi_size -= nbytes; |
170 | bio->bi_sector += (nbytes >> 9); | 168 | bio->bi_sector += (nbytes >> 9); |
171 | 169 | ||
172 | if (bio_integrity(bio)) | 170 | if (bio_integrity(bio)) |
173 | bio_integrity_advance(bio, nbytes); | 171 | bio_integrity_advance(bio, nbytes); |
174 | 172 | ||
175 | if (bio->bi_size == 0) | 173 | /* don't actually finish bio if it's part of flush sequence */ |
176 | bio_endio(bio, error); | 174 | if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) |
177 | } else { | 175 | bio_endio(bio, error); |
178 | /* | ||
179 | * Okay, this is the sequenced flush request in | ||
180 | * progress, just record the error; | ||
181 | */ | ||
182 | if (error && !q->flush_err) | ||
183 | q->flush_err = error; | ||
184 | } | ||
185 | } | 176 | } |
186 | 177 | ||
187 | void blk_dump_rq_flags(struct request *rq, char *msg) | 178 | void blk_dump_rq_flags(struct request *rq, char *msg) |
@@ -208,135 +199,43 @@ void blk_dump_rq_flags(struct request *rq, char *msg) | |||
208 | EXPORT_SYMBOL(blk_dump_rq_flags); | 199 | EXPORT_SYMBOL(blk_dump_rq_flags); |
209 | 200 | ||
210 | /* | 201 | /* |
211 | * "plug" the device if there are no outstanding requests: this will | 202 | * Make sure that plugs that were pending when this function was entered, |
212 | * force the transfer to start only after we have put all the requests | 203 | * are now complete and requests pushed to the queue. |
213 | * on the list. | 204 | */ |
214 | * | 205 | static inline void queue_sync_plugs(struct request_queue *q) |
215 | * This is called with interrupts off and no requests on the queue and | ||
216 | * with the queue lock held. | ||
217 | */ | ||
218 | void blk_plug_device(struct request_queue *q) | ||
219 | { | 206 | { |
220 | WARN_ON(!irqs_disabled()); | ||
221 | |||
222 | /* | 207 | /* |
223 | * don't plug a stopped queue, it must be paired with blk_start_queue() | 208 | * If the current process is plugged and has barriers submitted, |
224 | * which will restart the queueing | 209 | * we will livelock if we don't unplug first. |
225 | */ | 210 | */ |
226 | if (blk_queue_stopped(q)) | 211 | blk_flush_plug(current); |
227 | return; | ||
228 | |||
229 | if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { | ||
230 | mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); | ||
231 | trace_block_plug(q); | ||
232 | } | ||
233 | } | ||
234 | EXPORT_SYMBOL(blk_plug_device); | ||
235 | |||
236 | /** | ||
237 | * blk_plug_device_unlocked - plug a device without queue lock held | ||
238 | * @q: The &struct request_queue to plug | ||
239 | * | ||
240 | * Description: | ||
241 | * Like @blk_plug_device(), but grabs the queue lock and disables | ||
242 | * interrupts. | ||
243 | **/ | ||
244 | void blk_plug_device_unlocked(struct request_queue *q) | ||
245 | { | ||
246 | unsigned long flags; | ||
247 | |||
248 | spin_lock_irqsave(q->queue_lock, flags); | ||
249 | blk_plug_device(q); | ||
250 | spin_unlock_irqrestore(q->queue_lock, flags); | ||
251 | } | ||
252 | EXPORT_SYMBOL(blk_plug_device_unlocked); | ||
253 | |||
254 | /* | ||
255 | * remove the queue from the plugged list, if present. called with | ||
256 | * queue lock held and interrupts disabled. | ||
257 | */ | ||
258 | int blk_remove_plug(struct request_queue *q) | ||
259 | { | ||
260 | WARN_ON(!irqs_disabled()); | ||
261 | |||
262 | if (!queue_flag_test_and_clear(QUEUE_FLAG_PLUGGED, q)) | ||
263 | return 0; | ||
264 | |||
265 | del_timer(&q->unplug_timer); | ||
266 | return 1; | ||
267 | } | 212 | } |
268 | EXPORT_SYMBOL(blk_remove_plug); | ||
269 | 213 | ||
270 | /* | 214 | static void blk_delay_work(struct work_struct *work) |
271 | * remove the plug and let it rip.. | ||
272 | */ | ||
273 | void __generic_unplug_device(struct request_queue *q) | ||
274 | { | 215 | { |
275 | if (unlikely(blk_queue_stopped(q))) | 216 | struct request_queue *q; |
276 | return; | ||
277 | if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) | ||
278 | return; | ||
279 | 217 | ||
280 | q->request_fn(q); | 218 | q = container_of(work, struct request_queue, delay_work.work); |
219 | spin_lock_irq(q->queue_lock); | ||
220 | __blk_run_queue(q, false); | ||
221 | spin_unlock_irq(q->queue_lock); | ||
281 | } | 222 | } |
282 | 223 | ||
283 | /** | 224 | /** |
284 | * generic_unplug_device - fire a request queue | 225 | * blk_delay_queue - restart queueing after defined interval |
285 | * @q: The &struct request_queue in question | 226 | * @q: The &struct request_queue in question |
227 | * @msecs: Delay in msecs | ||
286 | * | 228 | * |
287 | * Description: | 229 | * Description: |
288 | * Linux uses plugging to build bigger requests queues before letting | 230 | * Sometimes queueing needs to be postponed for a little while, to allow |
289 | * the device have at them. If a queue is plugged, the I/O scheduler | 231 | * resources to come back. This function will make sure that queueing is |
290 | * is still adding and merging requests on the queue. Once the queue | 232 | * restarted around the specified time. |
291 | * gets unplugged, the request_fn defined for the queue is invoked and | 233 | */ |
292 | * transfers started. | 234 | void blk_delay_queue(struct request_queue *q, unsigned long msecs) |
293 | **/ | ||
294 | void generic_unplug_device(struct request_queue *q) | ||
295 | { | ||
296 | if (blk_queue_plugged(q)) { | ||
297 | spin_lock_irq(q->queue_lock); | ||
298 | __generic_unplug_device(q); | ||
299 | spin_unlock_irq(q->queue_lock); | ||
300 | } | ||
301 | } | ||
302 | EXPORT_SYMBOL(generic_unplug_device); | ||
303 | |||
304 | static void blk_backing_dev_unplug(struct backing_dev_info *bdi, | ||
305 | struct page *page) | ||
306 | { | ||
307 | struct request_queue *q = bdi->unplug_io_data; | ||
308 | |||
309 | blk_unplug(q); | ||
310 | } | ||
311 | |||
312 | void blk_unplug_work(struct work_struct *work) | ||
313 | { | ||
314 | struct request_queue *q = | ||
315 | container_of(work, struct request_queue, unplug_work); | ||
316 | |||
317 | trace_block_unplug_io(q); | ||
318 | q->unplug_fn(q); | ||
319 | } | ||
320 | |||
321 | void blk_unplug_timeout(unsigned long data) | ||
322 | { | ||
323 | struct request_queue *q = (struct request_queue *)data; | ||
324 | |||
325 | trace_block_unplug_timer(q); | ||
326 | kblockd_schedule_work(q, &q->unplug_work); | ||
327 | } | ||
328 | |||
329 | void blk_unplug(struct request_queue *q) | ||
330 | { | 235 | { |
331 | /* | 236 | schedule_delayed_work(&q->delay_work, msecs_to_jiffies(msecs)); |
332 | * devices don't necessarily have an ->unplug_fn defined | ||
333 | */ | ||
334 | if (q->unplug_fn) { | ||
335 | trace_block_unplug_io(q); | ||
336 | q->unplug_fn(q); | ||
337 | } | ||
338 | } | 237 | } |
339 | EXPORT_SYMBOL(blk_unplug); | 238 | EXPORT_SYMBOL(blk_delay_queue); |
340 | 239 | ||
341 | /** | 240 | /** |
342 | * blk_start_queue - restart a previously stopped queue | 241 | * blk_start_queue - restart a previously stopped queue |
@@ -372,7 +271,7 @@ EXPORT_SYMBOL(blk_start_queue); | |||
372 | **/ | 271 | **/ |
373 | void blk_stop_queue(struct request_queue *q) | 272 | void blk_stop_queue(struct request_queue *q) |
374 | { | 273 | { |
375 | blk_remove_plug(q); | 274 | cancel_delayed_work(&q->delay_work); |
376 | queue_flag_set(QUEUE_FLAG_STOPPED, q); | 275 | queue_flag_set(QUEUE_FLAG_STOPPED, q); |
377 | } | 276 | } |
378 | EXPORT_SYMBOL(blk_stop_queue); | 277 | EXPORT_SYMBOL(blk_stop_queue); |
@@ -390,13 +289,16 @@ EXPORT_SYMBOL(blk_stop_queue); | |||
390 | * that its ->make_request_fn will not re-add plugging prior to calling | 289 | * that its ->make_request_fn will not re-add plugging prior to calling |
391 | * this function. | 290 | * this function. |
392 | * | 291 | * |
292 | * This function does not cancel any asynchronous activity arising | ||
293 | * out of elevator or throttling code. That would require elevaotor_exit() | ||
294 | * and blk_throtl_exit() to be called with queue lock initialized. | ||
295 | * | ||
393 | */ | 296 | */ |
394 | void blk_sync_queue(struct request_queue *q) | 297 | void blk_sync_queue(struct request_queue *q) |
395 | { | 298 | { |
396 | del_timer_sync(&q->unplug_timer); | ||
397 | del_timer_sync(&q->timeout); | 299 | del_timer_sync(&q->timeout); |
398 | cancel_work_sync(&q->unplug_work); | 300 | cancel_delayed_work_sync(&q->delay_work); |
399 | throtl_shutdown_timer_wq(q); | 301 | queue_sync_plugs(q); |
400 | } | 302 | } |
401 | EXPORT_SYMBOL(blk_sync_queue); | 303 | EXPORT_SYMBOL(blk_sync_queue); |
402 | 304 | ||
@@ -412,14 +314,9 @@ EXPORT_SYMBOL(blk_sync_queue); | |||
412 | */ | 314 | */ |
413 | void __blk_run_queue(struct request_queue *q, bool force_kblockd) | 315 | void __blk_run_queue(struct request_queue *q, bool force_kblockd) |
414 | { | 316 | { |
415 | blk_remove_plug(q); | ||
416 | |||
417 | if (unlikely(blk_queue_stopped(q))) | 317 | if (unlikely(blk_queue_stopped(q))) |
418 | return; | 318 | return; |
419 | 319 | ||
420 | if (elv_queue_empty(q)) | ||
421 | return; | ||
422 | |||
423 | /* | 320 | /* |
424 | * Only recurse once to avoid overrunning the stack, let the unplug | 321 | * Only recurse once to avoid overrunning the stack, let the unplug |
425 | * handling reinvoke the handler shortly if we already got there. | 322 | * handling reinvoke the handler shortly if we already got there. |
@@ -427,10 +324,8 @@ void __blk_run_queue(struct request_queue *q, bool force_kblockd) | |||
427 | if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { | 324 | if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { |
428 | q->request_fn(q); | 325 | q->request_fn(q); |
429 | queue_flag_clear(QUEUE_FLAG_REENTER, q); | 326 | queue_flag_clear(QUEUE_FLAG_REENTER, q); |
430 | } else { | 327 | } else |
431 | queue_flag_set(QUEUE_FLAG_PLUGGED, q); | 328 | queue_delayed_work(kblockd_workqueue, &q->delay_work, 0); |
432 | kblockd_schedule_work(q, &q->unplug_work); | ||
433 | } | ||
434 | } | 329 | } |
435 | EXPORT_SYMBOL(__blk_run_queue); | 330 | EXPORT_SYMBOL(__blk_run_queue); |
436 | 331 | ||
@@ -457,6 +352,11 @@ void blk_put_queue(struct request_queue *q) | |||
457 | kobject_put(&q->kobj); | 352 | kobject_put(&q->kobj); |
458 | } | 353 | } |
459 | 354 | ||
355 | /* | ||
356 | * Note: If a driver supplied the queue lock, it should not zap that lock | ||
357 | * unexpectedly as some queue cleanup components like elevator_exit() and | ||
358 | * blk_throtl_exit() need queue lock. | ||
359 | */ | ||
460 | void blk_cleanup_queue(struct request_queue *q) | 360 | void blk_cleanup_queue(struct request_queue *q) |
461 | { | 361 | { |
462 | /* | 362 | /* |
@@ -475,6 +375,8 @@ void blk_cleanup_queue(struct request_queue *q) | |||
475 | if (q->elevator) | 375 | if (q->elevator) |
476 | elevator_exit(q->elevator); | 376 | elevator_exit(q->elevator); |
477 | 377 | ||
378 | blk_throtl_exit(q); | ||
379 | |||
478 | blk_put_queue(q); | 380 | blk_put_queue(q); |
479 | } | 381 | } |
480 | EXPORT_SYMBOL(blk_cleanup_queue); | 382 | EXPORT_SYMBOL(blk_cleanup_queue); |
@@ -517,8 +419,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
517 | if (!q) | 419 | if (!q) |
518 | return NULL; | 420 | return NULL; |
519 | 421 | ||
520 | q->backing_dev_info.unplug_io_fn = blk_backing_dev_unplug; | ||
521 | q->backing_dev_info.unplug_io_data = q; | ||
522 | q->backing_dev_info.ra_pages = | 422 | q->backing_dev_info.ra_pages = |
523 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; | 423 | (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; |
524 | q->backing_dev_info.state = 0; | 424 | q->backing_dev_info.state = 0; |
@@ -538,17 +438,24 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
538 | 438 | ||
539 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, | 439 | setup_timer(&q->backing_dev_info.laptop_mode_wb_timer, |
540 | laptop_mode_timer_fn, (unsigned long) q); | 440 | laptop_mode_timer_fn, (unsigned long) q); |
541 | init_timer(&q->unplug_timer); | ||
542 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); | 441 | setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); |
543 | INIT_LIST_HEAD(&q->timeout_list); | 442 | INIT_LIST_HEAD(&q->timeout_list); |
544 | INIT_LIST_HEAD(&q->pending_flushes); | 443 | INIT_LIST_HEAD(&q->flush_queue[0]); |
545 | INIT_WORK(&q->unplug_work, blk_unplug_work); | 444 | INIT_LIST_HEAD(&q->flush_queue[1]); |
445 | INIT_LIST_HEAD(&q->flush_data_in_flight); | ||
446 | INIT_DELAYED_WORK(&q->delay_work, blk_delay_work); | ||
546 | 447 | ||
547 | kobject_init(&q->kobj, &blk_queue_ktype); | 448 | kobject_init(&q->kobj, &blk_queue_ktype); |
548 | 449 | ||
549 | mutex_init(&q->sysfs_lock); | 450 | mutex_init(&q->sysfs_lock); |
550 | spin_lock_init(&q->__queue_lock); | 451 | spin_lock_init(&q->__queue_lock); |
551 | 452 | ||
453 | /* | ||
454 | * By default initialize queue_lock to internal lock and driver can | ||
455 | * override it later if need be. | ||
456 | */ | ||
457 | q->queue_lock = &q->__queue_lock; | ||
458 | |||
552 | return q; | 459 | return q; |
553 | } | 460 | } |
554 | EXPORT_SYMBOL(blk_alloc_queue_node); | 461 | EXPORT_SYMBOL(blk_alloc_queue_node); |
@@ -631,9 +538,11 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, | |||
631 | q->request_fn = rfn; | 538 | q->request_fn = rfn; |
632 | q->prep_rq_fn = NULL; | 539 | q->prep_rq_fn = NULL; |
633 | q->unprep_rq_fn = NULL; | 540 | q->unprep_rq_fn = NULL; |
634 | q->unplug_fn = generic_unplug_device; | ||
635 | q->queue_flags = QUEUE_FLAG_DEFAULT; | 541 | q->queue_flags = QUEUE_FLAG_DEFAULT; |
636 | q->queue_lock = lock; | 542 | |
543 | /* Override internal queue lock with supplied lock pointer */ | ||
544 | if (lock) | ||
545 | q->queue_lock = lock; | ||
637 | 546 | ||
638 | /* | 547 | /* |
639 | * This also sets hw/phys segments, boundary and size | 548 | * This also sets hw/phys segments, boundary and size |
@@ -666,6 +575,8 @@ int blk_get_queue(struct request_queue *q) | |||
666 | 575 | ||
667 | static inline void blk_free_request(struct request_queue *q, struct request *rq) | 576 | static inline void blk_free_request(struct request_queue *q, struct request *rq) |
668 | { | 577 | { |
578 | BUG_ON(rq->cmd_flags & REQ_ON_PLUG); | ||
579 | |||
669 | if (rq->cmd_flags & REQ_ELVPRIV) | 580 | if (rq->cmd_flags & REQ_ELVPRIV) |
670 | elv_put_request(q, rq); | 581 | elv_put_request(q, rq); |
671 | mempool_free(rq, q->rq.rq_pool); | 582 | mempool_free(rq, q->rq.rq_pool); |
@@ -762,6 +673,25 @@ static void freed_request(struct request_queue *q, int sync, int priv) | |||
762 | } | 673 | } |
763 | 674 | ||
764 | /* | 675 | /* |
676 | * Determine if elevator data should be initialized when allocating the | ||
677 | * request associated with @bio. | ||
678 | */ | ||
679 | static bool blk_rq_should_init_elevator(struct bio *bio) | ||
680 | { | ||
681 | if (!bio) | ||
682 | return true; | ||
683 | |||
684 | /* | ||
685 | * Flush requests do not use the elevator so skip initialization. | ||
686 | * This allows a request to share the flush and elevator data. | ||
687 | */ | ||
688 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) | ||
689 | return false; | ||
690 | |||
691 | return true; | ||
692 | } | ||
693 | |||
694 | /* | ||
765 | * Get a free request, queue_lock must be held. | 695 | * Get a free request, queue_lock must be held. |
766 | * Returns NULL on failure, with queue_lock held. | 696 | * Returns NULL on failure, with queue_lock held. |
767 | * Returns !NULL on success, with queue_lock *not held*. | 697 | * Returns !NULL on success, with queue_lock *not held*. |
@@ -773,7 +703,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
773 | struct request_list *rl = &q->rq; | 703 | struct request_list *rl = &q->rq; |
774 | struct io_context *ioc = NULL; | 704 | struct io_context *ioc = NULL; |
775 | const bool is_sync = rw_is_sync(rw_flags) != 0; | 705 | const bool is_sync = rw_is_sync(rw_flags) != 0; |
776 | int may_queue, priv; | 706 | int may_queue, priv = 0; |
777 | 707 | ||
778 | may_queue = elv_may_queue(q, rw_flags); | 708 | may_queue = elv_may_queue(q, rw_flags); |
779 | if (may_queue == ELV_MQUEUE_NO) | 709 | if (may_queue == ELV_MQUEUE_NO) |
@@ -817,9 +747,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags, | |||
817 | rl->count[is_sync]++; | 747 | rl->count[is_sync]++; |
818 | rl->starved[is_sync] = 0; | 748 | rl->starved[is_sync] = 0; |
819 | 749 | ||
820 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); | 750 | if (blk_rq_should_init_elevator(bio)) { |
821 | if (priv) | 751 | priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); |
822 | rl->elvpriv++; | 752 | if (priv) |
753 | rl->elvpriv++; | ||
754 | } | ||
823 | 755 | ||
824 | if (blk_queue_io_stat(q)) | 756 | if (blk_queue_io_stat(q)) |
825 | rw_flags |= REQ_IO_STAT; | 757 | rw_flags |= REQ_IO_STAT; |
@@ -866,8 +798,8 @@ out: | |||
866 | } | 798 | } |
867 | 799 | ||
868 | /* | 800 | /* |
869 | * No available requests for this queue, unplug the device and wait for some | 801 | * No available requests for this queue, wait for some requests to become |
870 | * requests to become available. | 802 | * available. |
871 | * | 803 | * |
872 | * Called with q->queue_lock held, and returns with it unlocked. | 804 | * Called with q->queue_lock held, and returns with it unlocked. |
873 | */ | 805 | */ |
@@ -888,7 +820,6 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, | |||
888 | 820 | ||
889 | trace_block_sleeprq(q, bio, rw_flags & 1); | 821 | trace_block_sleeprq(q, bio, rw_flags & 1); |
890 | 822 | ||
891 | __generic_unplug_device(q); | ||
892 | spin_unlock_irq(q->queue_lock); | 823 | spin_unlock_irq(q->queue_lock); |
893 | io_schedule(); | 824 | io_schedule(); |
894 | 825 | ||
@@ -1010,6 +941,13 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) | |||
1010 | } | 941 | } |
1011 | EXPORT_SYMBOL(blk_requeue_request); | 942 | EXPORT_SYMBOL(blk_requeue_request); |
1012 | 943 | ||
944 | static void add_acct_request(struct request_queue *q, struct request *rq, | ||
945 | int where) | ||
946 | { | ||
947 | drive_stat_acct(rq, 1); | ||
948 | __elv_add_request(q, rq, where); | ||
949 | } | ||
950 | |||
1013 | /** | 951 | /** |
1014 | * blk_insert_request - insert a special request into a request queue | 952 | * blk_insert_request - insert a special request into a request queue |
1015 | * @q: request queue where request should be inserted | 953 | * @q: request queue where request should be inserted |
@@ -1052,8 +990,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, | |||
1052 | if (blk_rq_tagged(rq)) | 990 | if (blk_rq_tagged(rq)) |
1053 | blk_queue_end_tag(q, rq); | 991 | blk_queue_end_tag(q, rq); |
1054 | 992 | ||
1055 | drive_stat_acct(rq, 1); | 993 | add_acct_request(q, rq, where); |
1056 | __elv_add_request(q, rq, where, 0); | ||
1057 | __blk_run_queue(q, false); | 994 | __blk_run_queue(q, false); |
1058 | spin_unlock_irqrestore(q->queue_lock, flags); | 995 | spin_unlock_irqrestore(q->queue_lock, flags); |
1059 | } | 996 | } |
@@ -1174,6 +1111,113 @@ void blk_add_request_payload(struct request *rq, struct page *page, | |||
1174 | } | 1111 | } |
1175 | EXPORT_SYMBOL_GPL(blk_add_request_payload); | 1112 | EXPORT_SYMBOL_GPL(blk_add_request_payload); |
1176 | 1113 | ||
1114 | static bool bio_attempt_back_merge(struct request_queue *q, struct request *req, | ||
1115 | struct bio *bio) | ||
1116 | { | ||
1117 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; | ||
1118 | |||
1119 | /* | ||
1120 | * Debug stuff, kill later | ||
1121 | */ | ||
1122 | if (!rq_mergeable(req)) { | ||
1123 | blk_dump_rq_flags(req, "back"); | ||
1124 | return false; | ||
1125 | } | ||
1126 | |||
1127 | if (!ll_back_merge_fn(q, req, bio)) | ||
1128 | return false; | ||
1129 | |||
1130 | trace_block_bio_backmerge(q, bio); | ||
1131 | |||
1132 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) | ||
1133 | blk_rq_set_mixed_merge(req); | ||
1134 | |||
1135 | req->biotail->bi_next = bio; | ||
1136 | req->biotail = bio; | ||
1137 | req->__data_len += bio->bi_size; | ||
1138 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | ||
1139 | |||
1140 | drive_stat_acct(req, 0); | ||
1141 | return true; | ||
1142 | } | ||
1143 | |||
1144 | static bool bio_attempt_front_merge(struct request_queue *q, | ||
1145 | struct request *req, struct bio *bio) | ||
1146 | { | ||
1147 | const int ff = bio->bi_rw & REQ_FAILFAST_MASK; | ||
1148 | sector_t sector; | ||
1149 | |||
1150 | /* | ||
1151 | * Debug stuff, kill later | ||
1152 | */ | ||
1153 | if (!rq_mergeable(req)) { | ||
1154 | blk_dump_rq_flags(req, "front"); | ||
1155 | return false; | ||
1156 | } | ||
1157 | |||
1158 | if (!ll_front_merge_fn(q, req, bio)) | ||
1159 | return false; | ||
1160 | |||
1161 | trace_block_bio_frontmerge(q, bio); | ||
1162 | |||
1163 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) | ||
1164 | blk_rq_set_mixed_merge(req); | ||
1165 | |||
1166 | sector = bio->bi_sector; | ||
1167 | |||
1168 | bio->bi_next = req->bio; | ||
1169 | req->bio = bio; | ||
1170 | |||
1171 | /* | ||
1172 | * may not be valid. if the low level driver said | ||
1173 | * it didn't need a bounce buffer then it better | ||
1174 | * not touch req->buffer either... | ||
1175 | */ | ||
1176 | req->buffer = bio_data(bio); | ||
1177 | req->__sector = bio->bi_sector; | ||
1178 | req->__data_len += bio->bi_size; | ||
1179 | req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); | ||
1180 | |||
1181 | drive_stat_acct(req, 0); | ||
1182 | return true; | ||
1183 | } | ||
1184 | |||
1185 | /* | ||
1186 | * Attempts to merge with the plugged list in the current process. Returns | ||
1187 | * true if merge was succesful, otherwise false. | ||
1188 | */ | ||
1189 | static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, | ||
1190 | struct bio *bio) | ||
1191 | { | ||
1192 | struct blk_plug *plug; | ||
1193 | struct request *rq; | ||
1194 | bool ret = false; | ||
1195 | |||
1196 | plug = tsk->plug; | ||
1197 | if (!plug) | ||
1198 | goto out; | ||
1199 | |||
1200 | list_for_each_entry_reverse(rq, &plug->list, queuelist) { | ||
1201 | int el_ret; | ||
1202 | |||
1203 | if (rq->q != q) | ||
1204 | continue; | ||
1205 | |||
1206 | el_ret = elv_try_merge(rq, bio); | ||
1207 | if (el_ret == ELEVATOR_BACK_MERGE) { | ||
1208 | ret = bio_attempt_back_merge(q, rq, bio); | ||
1209 | if (ret) | ||
1210 | break; | ||
1211 | } else if (el_ret == ELEVATOR_FRONT_MERGE) { | ||
1212 | ret = bio_attempt_front_merge(q, rq, bio); | ||
1213 | if (ret) | ||
1214 | break; | ||
1215 | } | ||
1216 | } | ||
1217 | out: | ||
1218 | return ret; | ||
1219 | } | ||
1220 | |||
1177 | void init_request_from_bio(struct request *req, struct bio *bio) | 1221 | void init_request_from_bio(struct request *req, struct bio *bio) |
1178 | { | 1222 | { |
1179 | req->cpu = bio->bi_comp_cpu; | 1223 | req->cpu = bio->bi_comp_cpu; |
@@ -1189,26 +1233,12 @@ void init_request_from_bio(struct request *req, struct bio *bio) | |||
1189 | blk_rq_bio_prep(req->q, req, bio); | 1233 | blk_rq_bio_prep(req->q, req, bio); |
1190 | } | 1234 | } |
1191 | 1235 | ||
1192 | /* | ||
1193 | * Only disabling plugging for non-rotational devices if it does tagging | ||
1194 | * as well, otherwise we do need the proper merging | ||
1195 | */ | ||
1196 | static inline bool queue_should_plug(struct request_queue *q) | ||
1197 | { | ||
1198 | return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); | ||
1199 | } | ||
1200 | |||
1201 | static int __make_request(struct request_queue *q, struct bio *bio) | 1236 | static int __make_request(struct request_queue *q, struct bio *bio) |
1202 | { | 1237 | { |
1203 | struct request *req; | ||
1204 | int el_ret; | ||
1205 | unsigned int bytes = bio->bi_size; | ||
1206 | const unsigned short prio = bio_prio(bio); | ||
1207 | const bool sync = !!(bio->bi_rw & REQ_SYNC); | 1238 | const bool sync = !!(bio->bi_rw & REQ_SYNC); |
1208 | const bool unplug = !!(bio->bi_rw & REQ_UNPLUG); | 1239 | struct blk_plug *plug; |
1209 | const unsigned long ff = bio->bi_rw & REQ_FAILFAST_MASK; | 1240 | int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; |
1210 | int where = ELEVATOR_INSERT_SORT; | 1241 | struct request *req; |
1211 | int rw_flags; | ||
1212 | 1242 | ||
1213 | /* | 1243 | /* |
1214 | * low level driver can indicate that it wants pages above a | 1244 | * low level driver can indicate that it wants pages above a |
@@ -1217,78 +1247,36 @@ static int __make_request(struct request_queue *q, struct bio *bio) | |||
1217 | */ | 1247 | */ |
1218 | blk_queue_bounce(q, &bio); | 1248 | blk_queue_bounce(q, &bio); |
1219 | 1249 | ||
1220 | spin_lock_irq(q->queue_lock); | ||
1221 | |||
1222 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { | 1250 | if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { |
1223 | where = ELEVATOR_INSERT_FRONT; | 1251 | spin_lock_irq(q->queue_lock); |
1252 | where = ELEVATOR_INSERT_FLUSH; | ||
1224 | goto get_rq; | 1253 | goto get_rq; |
1225 | } | 1254 | } |
1226 | 1255 | ||
1227 | if (elv_queue_empty(q)) | 1256 | /* |
1228 | goto get_rq; | 1257 | * Check if we can merge with the plugged list before grabbing |
1229 | 1258 | * any locks. | |
1230 | el_ret = elv_merge(q, &req, bio); | 1259 | */ |
1231 | switch (el_ret) { | 1260 | if (attempt_plug_merge(current, q, bio)) |
1232 | case ELEVATOR_BACK_MERGE: | ||
1233 | BUG_ON(!rq_mergeable(req)); | ||
1234 | |||
1235 | if (!ll_back_merge_fn(q, req, bio)) | ||
1236 | break; | ||
1237 | |||
1238 | trace_block_bio_backmerge(q, bio); | ||
1239 | |||
1240 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) | ||
1241 | blk_rq_set_mixed_merge(req); | ||
1242 | |||
1243 | req->biotail->bi_next = bio; | ||
1244 | req->biotail = bio; | ||
1245 | req->__data_len += bytes; | ||
1246 | req->ioprio = ioprio_best(req->ioprio, prio); | ||
1247 | if (!blk_rq_cpu_valid(req)) | ||
1248 | req->cpu = bio->bi_comp_cpu; | ||
1249 | drive_stat_acct(req, 0); | ||
1250 | elv_bio_merged(q, req, bio); | ||
1251 | if (!attempt_back_merge(q, req)) | ||
1252 | elv_merged_request(q, req, el_ret); | ||
1253 | goto out; | 1261 | goto out; |
1254 | 1262 | ||
1255 | case ELEVATOR_FRONT_MERGE: | 1263 | spin_lock_irq(q->queue_lock); |
1256 | BUG_ON(!rq_mergeable(req)); | ||
1257 | |||
1258 | if (!ll_front_merge_fn(q, req, bio)) | ||
1259 | break; | ||
1260 | |||
1261 | trace_block_bio_frontmerge(q, bio); | ||
1262 | 1264 | ||
1263 | if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) { | 1265 | el_ret = elv_merge(q, &req, bio); |
1264 | blk_rq_set_mixed_merge(req); | 1266 | if (el_ret == ELEVATOR_BACK_MERGE) { |
1265 | req->cmd_flags &= ~REQ_FAILFAST_MASK; | 1267 | BUG_ON(req->cmd_flags & REQ_ON_PLUG); |
1266 | req->cmd_flags |= ff; | 1268 | if (bio_attempt_back_merge(q, req, bio)) { |
1269 | if (!attempt_back_merge(q, req)) | ||
1270 | elv_merged_request(q, req, el_ret); | ||
1271 | goto out_unlock; | ||
1272 | } | ||
1273 | } else if (el_ret == ELEVATOR_FRONT_MERGE) { | ||
1274 | BUG_ON(req->cmd_flags & REQ_ON_PLUG); | ||
1275 | if (bio_attempt_front_merge(q, req, bio)) { | ||
1276 | if (!attempt_front_merge(q, req)) | ||
1277 | elv_merged_request(q, req, el_ret); | ||
1278 | goto out_unlock; | ||
1267 | } | 1279 | } |
1268 | |||
1269 | bio->bi_next = req->bio; | ||
1270 | req->bio = bio; | ||
1271 | |||
1272 | /* | ||
1273 | * may not be valid. if the low level driver said | ||
1274 | * it didn't need a bounce buffer then it better | ||
1275 | * not touch req->buffer either... | ||
1276 | */ | ||
1277 | req->buffer = bio_data(bio); | ||
1278 | req->__sector = bio->bi_sector; | ||
1279 | req->__data_len += bytes; | ||
1280 | req->ioprio = ioprio_best(req->ioprio, prio); | ||
1281 | if (!blk_rq_cpu_valid(req)) | ||
1282 | req->cpu = bio->bi_comp_cpu; | ||
1283 | drive_stat_acct(req, 0); | ||
1284 | elv_bio_merged(q, req, bio); | ||
1285 | if (!attempt_front_merge(q, req)) | ||
1286 | elv_merged_request(q, req, el_ret); | ||
1287 | goto out; | ||
1288 | |||
1289 | /* ELV_NO_MERGE: elevator says don't/can't merge. */ | ||
1290 | default: | ||
1291 | ; | ||
1292 | } | 1280 | } |
1293 | 1281 | ||
1294 | get_rq: | 1282 | get_rq: |
@@ -1315,20 +1303,35 @@ get_rq: | |||
1315 | */ | 1303 | */ |
1316 | init_request_from_bio(req, bio); | 1304 | init_request_from_bio(req, bio); |
1317 | 1305 | ||
1318 | spin_lock_irq(q->queue_lock); | ||
1319 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || | 1306 | if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || |
1320 | bio_flagged(bio, BIO_CPU_AFFINE)) | 1307 | bio_flagged(bio, BIO_CPU_AFFINE)) { |
1321 | req->cpu = blk_cpu_to_group(smp_processor_id()); | 1308 | req->cpu = blk_cpu_to_group(get_cpu()); |
1322 | if (queue_should_plug(q) && elv_queue_empty(q)) | 1309 | put_cpu(); |
1323 | blk_plug_device(q); | 1310 | } |
1324 | 1311 | ||
1325 | /* insert the request into the elevator */ | 1312 | plug = current->plug; |
1326 | drive_stat_acct(req, 1); | 1313 | if (plug) { |
1327 | __elv_add_request(q, req, where, 0); | 1314 | if (!plug->should_sort && !list_empty(&plug->list)) { |
1315 | struct request *__rq; | ||
1316 | |||
1317 | __rq = list_entry_rq(plug->list.prev); | ||
1318 | if (__rq->q != q) | ||
1319 | plug->should_sort = 1; | ||
1320 | } | ||
1321 | /* | ||
1322 | * Debug flag, kill later | ||
1323 | */ | ||
1324 | req->cmd_flags |= REQ_ON_PLUG; | ||
1325 | list_add_tail(&req->queuelist, &plug->list); | ||
1326 | drive_stat_acct(req, 1); | ||
1327 | } else { | ||
1328 | spin_lock_irq(q->queue_lock); | ||
1329 | add_acct_request(q, req, where); | ||
1330 | __blk_run_queue(q, false); | ||
1331 | out_unlock: | ||
1332 | spin_unlock_irq(q->queue_lock); | ||
1333 | } | ||
1328 | out: | 1334 | out: |
1329 | if (unplug || !queue_should_plug(q)) | ||
1330 | __generic_unplug_device(q); | ||
1331 | spin_unlock_irq(q->queue_lock); | ||
1332 | return 0; | 1335 | return 0; |
1333 | } | 1336 | } |
1334 | 1337 | ||
@@ -1731,9 +1734,7 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) | |||
1731 | */ | 1734 | */ |
1732 | BUG_ON(blk_queued_rq(rq)); | 1735 | BUG_ON(blk_queued_rq(rq)); |
1733 | 1736 | ||
1734 | drive_stat_acct(rq, 1); | 1737 | add_acct_request(q, rq, ELEVATOR_INSERT_BACK); |
1735 | __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0); | ||
1736 | |||
1737 | spin_unlock_irqrestore(q->queue_lock, flags); | 1738 | spin_unlock_irqrestore(q->queue_lock, flags); |
1738 | 1739 | ||
1739 | return 0; | 1740 | return 0; |
@@ -1805,7 +1806,7 @@ static void blk_account_io_done(struct request *req) | |||
1805 | * normal IO on queueing nor completion. Accounting the | 1806 | * normal IO on queueing nor completion. Accounting the |
1806 | * containing request is enough. | 1807 | * containing request is enough. |
1807 | */ | 1808 | */ |
1808 | if (blk_do_io_stat(req) && req != &req->q->flush_rq) { | 1809 | if (blk_do_io_stat(req) && !(req->cmd_flags & REQ_FLUSH_SEQ)) { |
1809 | unsigned long duration = jiffies - req->start_time; | 1810 | unsigned long duration = jiffies - req->start_time; |
1810 | const int rw = rq_data_dir(req); | 1811 | const int rw = rq_data_dir(req); |
1811 | struct hd_struct *part; | 1812 | struct hd_struct *part; |
@@ -2628,6 +2629,113 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) | |||
2628 | } | 2629 | } |
2629 | EXPORT_SYMBOL(kblockd_schedule_work); | 2630 | EXPORT_SYMBOL(kblockd_schedule_work); |
2630 | 2631 | ||
2632 | int kblockd_schedule_delayed_work(struct request_queue *q, | ||
2633 | struct delayed_work *dwork, unsigned long delay) | ||
2634 | { | ||
2635 | return queue_delayed_work(kblockd_workqueue, dwork, delay); | ||
2636 | } | ||
2637 | EXPORT_SYMBOL(kblockd_schedule_delayed_work); | ||
2638 | |||
2639 | #define PLUG_MAGIC 0x91827364 | ||
2640 | |||
2641 | void blk_start_plug(struct blk_plug *plug) | ||
2642 | { | ||
2643 | struct task_struct *tsk = current; | ||
2644 | |||
2645 | plug->magic = PLUG_MAGIC; | ||
2646 | INIT_LIST_HEAD(&plug->list); | ||
2647 | plug->should_sort = 0; | ||
2648 | |||
2649 | /* | ||
2650 | * If this is a nested plug, don't actually assign it. It will be | ||
2651 | * flushed on its own. | ||
2652 | */ | ||
2653 | if (!tsk->plug) { | ||
2654 | /* | ||
2655 | * Store ordering should not be needed here, since a potential | ||
2656 | * preempt will imply a full memory barrier | ||
2657 | */ | ||
2658 | tsk->plug = plug; | ||
2659 | } | ||
2660 | } | ||
2661 | EXPORT_SYMBOL(blk_start_plug); | ||
2662 | |||
2663 | static int plug_rq_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
2664 | { | ||
2665 | struct request *rqa = container_of(a, struct request, queuelist); | ||
2666 | struct request *rqb = container_of(b, struct request, queuelist); | ||
2667 | |||
2668 | return !(rqa->q == rqb->q); | ||
2669 | } | ||
2670 | |||
2671 | static void flush_plug_list(struct blk_plug *plug) | ||
2672 | { | ||
2673 | struct request_queue *q; | ||
2674 | unsigned long flags; | ||
2675 | struct request *rq; | ||
2676 | |||
2677 | BUG_ON(plug->magic != PLUG_MAGIC); | ||
2678 | |||
2679 | if (list_empty(&plug->list)) | ||
2680 | return; | ||
2681 | |||
2682 | if (plug->should_sort) | ||
2683 | list_sort(NULL, &plug->list, plug_rq_cmp); | ||
2684 | |||
2685 | q = NULL; | ||
2686 | local_irq_save(flags); | ||
2687 | while (!list_empty(&plug->list)) { | ||
2688 | rq = list_entry_rq(plug->list.next); | ||
2689 | list_del_init(&rq->queuelist); | ||
2690 | BUG_ON(!(rq->cmd_flags & REQ_ON_PLUG)); | ||
2691 | BUG_ON(!rq->q); | ||
2692 | if (rq->q != q) { | ||
2693 | if (q) { | ||
2694 | __blk_run_queue(q, false); | ||
2695 | spin_unlock(q->queue_lock); | ||
2696 | } | ||
2697 | q = rq->q; | ||
2698 | spin_lock(q->queue_lock); | ||
2699 | } | ||
2700 | rq->cmd_flags &= ~REQ_ON_PLUG; | ||
2701 | |||
2702 | /* | ||
2703 | * rq is already accounted, so use raw insert | ||
2704 | */ | ||
2705 | __elv_add_request(q, rq, ELEVATOR_INSERT_SORT_MERGE); | ||
2706 | } | ||
2707 | |||
2708 | if (q) { | ||
2709 | __blk_run_queue(q, false); | ||
2710 | spin_unlock(q->queue_lock); | ||
2711 | } | ||
2712 | |||
2713 | BUG_ON(!list_empty(&plug->list)); | ||
2714 | local_irq_restore(flags); | ||
2715 | } | ||
2716 | |||
2717 | static void __blk_finish_plug(struct task_struct *tsk, struct blk_plug *plug) | ||
2718 | { | ||
2719 | flush_plug_list(plug); | ||
2720 | |||
2721 | if (plug == tsk->plug) | ||
2722 | tsk->plug = NULL; | ||
2723 | } | ||
2724 | |||
2725 | void blk_finish_plug(struct blk_plug *plug) | ||
2726 | { | ||
2727 | if (plug) | ||
2728 | __blk_finish_plug(current, plug); | ||
2729 | } | ||
2730 | EXPORT_SYMBOL(blk_finish_plug); | ||
2731 | |||
2732 | void __blk_flush_plug(struct task_struct *tsk, struct blk_plug *plug) | ||
2733 | { | ||
2734 | __blk_finish_plug(tsk, plug); | ||
2735 | tsk->plug = plug; | ||
2736 | } | ||
2737 | EXPORT_SYMBOL(__blk_flush_plug); | ||
2738 | |||
2631 | int __init blk_dev_init(void) | 2739 | int __init blk_dev_init(void) |
2632 | { | 2740 | { |
2633 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * | 2741 | BUILD_BUG_ON(__REQ_NR_BITS > 8 * |
diff --git a/block/blk-exec.c b/block/blk-exec.c index cf1456a02acd..7482b7fa863b 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c | |||
@@ -54,8 +54,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, | |||
54 | rq->end_io = done; | 54 | rq->end_io = done; |
55 | WARN_ON(irqs_disabled()); | 55 | WARN_ON(irqs_disabled()); |
56 | spin_lock_irq(q->queue_lock); | 56 | spin_lock_irq(q->queue_lock); |
57 | __elv_add_request(q, rq, where, 1); | 57 | __elv_add_request(q, rq, where); |
58 | __generic_unplug_device(q); | 58 | __blk_run_queue(q, false); |
59 | /* the queue is stopped so it won't be plugged+unplugged */ | 59 | /* the queue is stopped so it won't be plugged+unplugged */ |
60 | if (rq->cmd_type == REQ_TYPE_PM_RESUME) | 60 | if (rq->cmd_type == REQ_TYPE_PM_RESUME) |
61 | q->request_fn(q); | 61 | q->request_fn(q); |
diff --git a/block/blk-flush.c b/block/blk-flush.c index b27d0208611b..93d5fd8e51eb 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -1,6 +1,69 @@ | |||
1 | /* | 1 | /* |
2 | * Functions to sequence FLUSH and FUA writes. | 2 | * Functions to sequence FLUSH and FUA writes. |
3 | * | ||
4 | * Copyright (C) 2011 Max Planck Institute for Gravitational Physics | ||
5 | * Copyright (C) 2011 Tejun Heo <tj@kernel.org> | ||
6 | * | ||
7 | * This file is released under the GPLv2. | ||
8 | * | ||
9 | * REQ_{FLUSH|FUA} requests are decomposed to sequences consisted of three | ||
10 | * optional steps - PREFLUSH, DATA and POSTFLUSH - according to the request | ||
11 | * properties and hardware capability. | ||
12 | * | ||
13 | * If a request doesn't have data, only REQ_FLUSH makes sense, which | ||
14 | * indicates a simple flush request. If there is data, REQ_FLUSH indicates | ||
15 | * that the device cache should be flushed before the data is executed, and | ||
16 | * REQ_FUA means that the data must be on non-volatile media on request | ||
17 | * completion. | ||
18 | * | ||
19 | * If the device doesn't have writeback cache, FLUSH and FUA don't make any | ||
20 | * difference. The requests are either completed immediately if there's no | ||
21 | * data or executed as normal requests otherwise. | ||
22 | * | ||
23 | * If the device has writeback cache and supports FUA, REQ_FLUSH is | ||
24 | * translated to PREFLUSH but REQ_FUA is passed down directly with DATA. | ||
25 | * | ||
26 | * If the device has writeback cache and doesn't support FUA, REQ_FLUSH is | ||
27 | * translated to PREFLUSH and REQ_FUA to POSTFLUSH. | ||
28 | * | ||
29 | * The actual execution of flush is double buffered. Whenever a request | ||
30 | * needs to execute PRE or POSTFLUSH, it queues at | ||
31 | * q->flush_queue[q->flush_pending_idx]. Once certain criteria are met, a | ||
32 | * flush is issued and the pending_idx is toggled. When the flush | ||
33 | * completes, all the requests which were pending are proceeded to the next | ||
34 | * step. This allows arbitrary merging of different types of FLUSH/FUA | ||
35 | * requests. | ||
36 | * | ||
37 | * Currently, the following conditions are used to determine when to issue | ||
38 | * flush. | ||
39 | * | ||
40 | * C1. At any given time, only one flush shall be in progress. This makes | ||
41 | * double buffering sufficient. | ||
42 | * | ||
43 | * C2. Flush is deferred if any request is executing DATA of its sequence. | ||
44 | * This avoids issuing separate POSTFLUSHes for requests which shared | ||
45 | * PREFLUSH. | ||
46 | * | ||
47 | * C3. The second condition is ignored if there is a request which has | ||
48 | * waited longer than FLUSH_PENDING_TIMEOUT. This is to avoid | ||
49 | * starvation in the unlikely case where there are continuous stream of | ||
50 | * FUA (without FLUSH) requests. | ||
51 | * | ||
52 | * For devices which support FUA, it isn't clear whether C2 (and thus C3) | ||
53 | * is beneficial. | ||
54 | * | ||
55 | * Note that a sequenced FLUSH/FUA request with DATA is completed twice. | ||
56 | * Once while executing DATA and again after the whole sequence is | ||
57 | * complete. The first completion updates the contained bio but doesn't | ||
58 | * finish it so that the bio submitter is notified only after the whole | ||
59 | * sequence is complete. This is implemented by testing REQ_FLUSH_SEQ in | ||
60 | * req_bio_endio(). | ||
61 | * | ||
62 | * The above peculiarity requires that each FLUSH/FUA request has only one | ||
63 | * bio attached to it, which is guaranteed as they aren't allowed to be | ||
64 | * merged in the usual way. | ||
3 | */ | 65 | */ |
66 | |||
4 | #include <linux/kernel.h> | 67 | #include <linux/kernel.h> |
5 | #include <linux/module.h> | 68 | #include <linux/module.h> |
6 | #include <linux/bio.h> | 69 | #include <linux/bio.h> |
@@ -11,58 +74,142 @@ | |||
11 | 74 | ||
12 | /* FLUSH/FUA sequences */ | 75 | /* FLUSH/FUA sequences */ |
13 | enum { | 76 | enum { |
14 | QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ | 77 | REQ_FSEQ_PREFLUSH = (1 << 0), /* pre-flushing in progress */ |
15 | QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ | 78 | REQ_FSEQ_DATA = (1 << 1), /* data write in progress */ |
16 | QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ | 79 | REQ_FSEQ_POSTFLUSH = (1 << 2), /* post-flushing in progress */ |
17 | QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ | 80 | REQ_FSEQ_DONE = (1 << 3), |
18 | QUEUE_FSEQ_DONE = (1 << 4), | 81 | |
82 | REQ_FSEQ_ACTIONS = REQ_FSEQ_PREFLUSH | REQ_FSEQ_DATA | | ||
83 | REQ_FSEQ_POSTFLUSH, | ||
84 | |||
85 | /* | ||
86 | * If flush has been pending longer than the following timeout, | ||
87 | * it's issued even if flush_data requests are still in flight. | ||
88 | */ | ||
89 | FLUSH_PENDING_TIMEOUT = 5 * HZ, | ||
19 | }; | 90 | }; |
20 | 91 | ||
21 | static struct request *queue_next_fseq(struct request_queue *q); | 92 | static bool blk_kick_flush(struct request_queue *q); |
22 | 93 | ||
23 | unsigned blk_flush_cur_seq(struct request_queue *q) | 94 | static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) |
24 | { | 95 | { |
25 | if (!q->flush_seq) | 96 | unsigned int policy = 0; |
26 | return 0; | 97 | |
27 | return 1 << ffz(q->flush_seq); | 98 | if (fflags & REQ_FLUSH) { |
99 | if (rq->cmd_flags & REQ_FLUSH) | ||
100 | policy |= REQ_FSEQ_PREFLUSH; | ||
101 | if (blk_rq_sectors(rq)) | ||
102 | policy |= REQ_FSEQ_DATA; | ||
103 | if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) | ||
104 | policy |= REQ_FSEQ_POSTFLUSH; | ||
105 | } | ||
106 | return policy; | ||
28 | } | 107 | } |
29 | 108 | ||
30 | static struct request *blk_flush_complete_seq(struct request_queue *q, | 109 | static unsigned int blk_flush_cur_seq(struct request *rq) |
31 | unsigned seq, int error) | ||
32 | { | 110 | { |
33 | struct request *next_rq = NULL; | 111 | return 1 << ffz(rq->flush.seq); |
34 | 112 | } | |
35 | if (error && !q->flush_err) | 113 | |
36 | q->flush_err = error; | 114 | static void blk_flush_restore_request(struct request *rq) |
37 | 115 | { | |
38 | BUG_ON(q->flush_seq & seq); | 116 | /* |
39 | q->flush_seq |= seq; | 117 | * After flush data completion, @rq->bio is %NULL but we need to |
40 | 118 | * complete the bio again. @rq->biotail is guaranteed to equal the | |
41 | if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { | 119 | * original @rq->bio. Restore it. |
42 | /* not complete yet, queue the next flush sequence */ | 120 | */ |
43 | next_rq = queue_next_fseq(q); | 121 | rq->bio = rq->biotail; |
44 | } else { | 122 | |
45 | /* complete this flush request */ | 123 | /* make @rq a normal request */ |
46 | __blk_end_request_all(q->orig_flush_rq, q->flush_err); | 124 | rq->cmd_flags &= ~REQ_FLUSH_SEQ; |
47 | q->orig_flush_rq = NULL; | 125 | rq->end_io = NULL; |
48 | q->flush_seq = 0; | 126 | } |
49 | 127 | ||
50 | /* dispatch the next flush if there's one */ | 128 | /** |
51 | if (!list_empty(&q->pending_flushes)) { | 129 | * blk_flush_complete_seq - complete flush sequence |
52 | next_rq = list_entry_rq(q->pending_flushes.next); | 130 | * @rq: FLUSH/FUA request being sequenced |
53 | list_move(&next_rq->queuelist, &q->queue_head); | 131 | * @seq: sequences to complete (mask of %REQ_FSEQ_*, can be zero) |
54 | } | 132 | * @error: whether an error occurred |
133 | * | ||
134 | * @rq just completed @seq part of its flush sequence, record the | ||
135 | * completion and trigger the next step. | ||
136 | * | ||
137 | * CONTEXT: | ||
138 | * spin_lock_irq(q->queue_lock) | ||
139 | * | ||
140 | * RETURNS: | ||
141 | * %true if requests were added to the dispatch queue, %false otherwise. | ||
142 | */ | ||
143 | static bool blk_flush_complete_seq(struct request *rq, unsigned int seq, | ||
144 | int error) | ||
145 | { | ||
146 | struct request_queue *q = rq->q; | ||
147 | struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; | ||
148 | bool queued = false; | ||
149 | |||
150 | BUG_ON(rq->flush.seq & seq); | ||
151 | rq->flush.seq |= seq; | ||
152 | |||
153 | if (likely(!error)) | ||
154 | seq = blk_flush_cur_seq(rq); | ||
155 | else | ||
156 | seq = REQ_FSEQ_DONE; | ||
157 | |||
158 | switch (seq) { | ||
159 | case REQ_FSEQ_PREFLUSH: | ||
160 | case REQ_FSEQ_POSTFLUSH: | ||
161 | /* queue for flush */ | ||
162 | if (list_empty(pending)) | ||
163 | q->flush_pending_since = jiffies; | ||
164 | list_move_tail(&rq->flush.list, pending); | ||
165 | break; | ||
166 | |||
167 | case REQ_FSEQ_DATA: | ||
168 | list_move_tail(&rq->flush.list, &q->flush_data_in_flight); | ||
169 | list_add(&rq->queuelist, &q->queue_head); | ||
170 | queued = true; | ||
171 | break; | ||
172 | |||
173 | case REQ_FSEQ_DONE: | ||
174 | /* | ||
175 | * @rq was previously adjusted by blk_flush_issue() for | ||
176 | * flush sequencing and may already have gone through the | ||
177 | * flush data request completion path. Restore @rq for | ||
178 | * normal completion and end it. | ||
179 | */ | ||
180 | BUG_ON(!list_empty(&rq->queuelist)); | ||
181 | list_del_init(&rq->flush.list); | ||
182 | blk_flush_restore_request(rq); | ||
183 | __blk_end_request_all(rq, error); | ||
184 | break; | ||
185 | |||
186 | default: | ||
187 | BUG(); | ||
55 | } | 188 | } |
56 | return next_rq; | 189 | |
190 | return blk_kick_flush(q) | queued; | ||
57 | } | 191 | } |
58 | 192 | ||
59 | static void blk_flush_complete_seq_end_io(struct request_queue *q, | 193 | static void flush_end_io(struct request *flush_rq, int error) |
60 | unsigned seq, int error) | ||
61 | { | 194 | { |
62 | bool was_empty = elv_queue_empty(q); | 195 | struct request_queue *q = flush_rq->q; |
63 | struct request *next_rq; | 196 | struct list_head *running = &q->flush_queue[q->flush_running_idx]; |
197 | bool queued = false; | ||
198 | struct request *rq, *n; | ||
64 | 199 | ||
65 | next_rq = blk_flush_complete_seq(q, seq, error); | 200 | BUG_ON(q->flush_pending_idx == q->flush_running_idx); |
201 | |||
202 | /* account completion of the flush request */ | ||
203 | q->flush_running_idx ^= 1; | ||
204 | elv_completed_request(q, flush_rq); | ||
205 | |||
206 | /* and push the waiting requests to the next stage */ | ||
207 | list_for_each_entry_safe(rq, n, running, flush.list) { | ||
208 | unsigned int seq = blk_flush_cur_seq(rq); | ||
209 | |||
210 | BUG_ON(seq != REQ_FSEQ_PREFLUSH && seq != REQ_FSEQ_POSTFLUSH); | ||
211 | queued |= blk_flush_complete_seq(rq, seq, error); | ||
212 | } | ||
66 | 213 | ||
67 | /* | 214 | /* |
68 | * Moving a request silently to empty queue_head may stall the | 215 | * Moving a request silently to empty queue_head may stall the |
@@ -70,127 +217,153 @@ static void blk_flush_complete_seq_end_io(struct request_queue *q, | |||
70 | * from request completion path and calling directly into | 217 | * from request completion path and calling directly into |
71 | * request_fn may confuse the driver. Always use kblockd. | 218 | * request_fn may confuse the driver. Always use kblockd. |
72 | */ | 219 | */ |
73 | if (was_empty && next_rq) | 220 | if (queued) |
74 | __blk_run_queue(q, true); | 221 | __blk_run_queue(q, true); |
75 | } | 222 | } |
76 | 223 | ||
77 | static void pre_flush_end_io(struct request *rq, int error) | 224 | /** |
225 | * blk_kick_flush - consider issuing flush request | ||
226 | * @q: request_queue being kicked | ||
227 | * | ||
228 | * Flush related states of @q have changed, consider issuing flush request. | ||
229 | * Please read the comment at the top of this file for more info. | ||
230 | * | ||
231 | * CONTEXT: | ||
232 | * spin_lock_irq(q->queue_lock) | ||
233 | * | ||
234 | * RETURNS: | ||
235 | * %true if flush was issued, %false otherwise. | ||
236 | */ | ||
237 | static bool blk_kick_flush(struct request_queue *q) | ||
78 | { | 238 | { |
79 | elv_completed_request(rq->q, rq); | 239 | struct list_head *pending = &q->flush_queue[q->flush_pending_idx]; |
80 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); | 240 | struct request *first_rq = |
241 | list_first_entry(pending, struct request, flush.list); | ||
242 | |||
243 | /* C1 described at the top of this file */ | ||
244 | if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending)) | ||
245 | return false; | ||
246 | |||
247 | /* C2 and C3 */ | ||
248 | if (!list_empty(&q->flush_data_in_flight) && | ||
249 | time_before(jiffies, | ||
250 | q->flush_pending_since + FLUSH_PENDING_TIMEOUT)) | ||
251 | return false; | ||
252 | |||
253 | /* | ||
254 | * Issue flush and toggle pending_idx. This makes pending_idx | ||
255 | * different from running_idx, which means flush is in flight. | ||
256 | */ | ||
257 | blk_rq_init(q, &q->flush_rq); | ||
258 | q->flush_rq.cmd_type = REQ_TYPE_FS; | ||
259 | q->flush_rq.cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ; | ||
260 | q->flush_rq.rq_disk = first_rq->rq_disk; | ||
261 | q->flush_rq.end_io = flush_end_io; | ||
262 | |||
263 | q->flush_pending_idx ^= 1; | ||
264 | elv_insert(q, &q->flush_rq, ELEVATOR_INSERT_REQUEUE); | ||
265 | return true; | ||
81 | } | 266 | } |
82 | 267 | ||
83 | static void flush_data_end_io(struct request *rq, int error) | 268 | static void flush_data_end_io(struct request *rq, int error) |
84 | { | 269 | { |
85 | elv_completed_request(rq->q, rq); | 270 | struct request_queue *q = rq->q; |
86 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); | ||
87 | } | ||
88 | 271 | ||
89 | static void post_flush_end_io(struct request *rq, int error) | 272 | /* |
90 | { | 273 | * After populating an empty queue, kick it to avoid stall. Read |
91 | elv_completed_request(rq->q, rq); | 274 | * the comment in flush_end_io(). |
92 | blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); | 275 | */ |
276 | if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error)) | ||
277 | __blk_run_queue(q, true); | ||
93 | } | 278 | } |
94 | 279 | ||
95 | static void init_flush_request(struct request *rq, struct gendisk *disk) | 280 | /** |
281 | * blk_insert_flush - insert a new FLUSH/FUA request | ||
282 | * @rq: request to insert | ||
283 | * | ||
284 | * To be called from elv_insert() for %ELEVATOR_INSERT_FLUSH insertions. | ||
285 | * @rq is being submitted. Analyze what needs to be done and put it on the | ||
286 | * right queue. | ||
287 | * | ||
288 | * CONTEXT: | ||
289 | * spin_lock_irq(q->queue_lock) | ||
290 | */ | ||
291 | void blk_insert_flush(struct request *rq) | ||
96 | { | 292 | { |
97 | rq->cmd_type = REQ_TYPE_FS; | 293 | struct request_queue *q = rq->q; |
98 | rq->cmd_flags = WRITE_FLUSH; | 294 | unsigned int fflags = q->flush_flags; /* may change, cache */ |
99 | rq->rq_disk = disk; | 295 | unsigned int policy = blk_flush_policy(fflags, rq); |
100 | } | ||
101 | 296 | ||
102 | static struct request *queue_next_fseq(struct request_queue *q) | 297 | BUG_ON(rq->end_io); |
103 | { | 298 | BUG_ON(!rq->bio || rq->bio != rq->biotail); |
104 | struct request *orig_rq = q->orig_flush_rq; | ||
105 | struct request *rq = &q->flush_rq; | ||
106 | 299 | ||
107 | blk_rq_init(q, rq); | 300 | /* |
301 | * @policy now records what operations need to be done. Adjust | ||
302 | * REQ_FLUSH and FUA for the driver. | ||
303 | */ | ||
304 | rq->cmd_flags &= ~REQ_FLUSH; | ||
305 | if (!(fflags & REQ_FUA)) | ||
306 | rq->cmd_flags &= ~REQ_FUA; | ||
108 | 307 | ||
109 | switch (blk_flush_cur_seq(q)) { | 308 | /* |
110 | case QUEUE_FSEQ_PREFLUSH: | 309 | * If there's data but flush is not necessary, the request can be |
111 | init_flush_request(rq, orig_rq->rq_disk); | 310 | * processed directly without going through flush machinery. Queue |
112 | rq->end_io = pre_flush_end_io; | 311 | * for normal execution. |
113 | break; | 312 | */ |
114 | case QUEUE_FSEQ_DATA: | 313 | if ((policy & REQ_FSEQ_DATA) && |
115 | init_request_from_bio(rq, orig_rq->bio); | 314 | !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { |
116 | /* | 315 | list_add(&rq->queuelist, &q->queue_head); |
117 | * orig_rq->rq_disk may be different from | 316 | return; |
118 | * bio->bi_bdev->bd_disk if orig_rq got here through | ||
119 | * remapping drivers. Make sure rq->rq_disk points | ||
120 | * to the same one as orig_rq. | ||
121 | */ | ||
122 | rq->rq_disk = orig_rq->rq_disk; | ||
123 | rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); | ||
124 | rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); | ||
125 | rq->end_io = flush_data_end_io; | ||
126 | break; | ||
127 | case QUEUE_FSEQ_POSTFLUSH: | ||
128 | init_flush_request(rq, orig_rq->rq_disk); | ||
129 | rq->end_io = post_flush_end_io; | ||
130 | break; | ||
131 | default: | ||
132 | BUG(); | ||
133 | } | 317 | } |
134 | 318 | ||
135 | elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); | 319 | /* |
136 | return rq; | 320 | * @rq should go through flush machinery. Mark it part of flush |
321 | * sequence and submit for further processing. | ||
322 | */ | ||
323 | memset(&rq->flush, 0, sizeof(rq->flush)); | ||
324 | INIT_LIST_HEAD(&rq->flush.list); | ||
325 | rq->cmd_flags |= REQ_FLUSH_SEQ; | ||
326 | rq->end_io = flush_data_end_io; | ||
327 | |||
328 | blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); | ||
137 | } | 329 | } |
138 | 330 | ||
139 | struct request *blk_do_flush(struct request_queue *q, struct request *rq) | 331 | /** |
332 | * blk_abort_flushes - @q is being aborted, abort flush requests | ||
333 | * @q: request_queue being aborted | ||
334 | * | ||
335 | * To be called from elv_abort_queue(). @q is being aborted. Prepare all | ||
336 | * FLUSH/FUA requests for abortion. | ||
337 | * | ||
338 | * CONTEXT: | ||
339 | * spin_lock_irq(q->queue_lock) | ||
340 | */ | ||
341 | void blk_abort_flushes(struct request_queue *q) | ||
140 | { | 342 | { |
141 | unsigned int fflags = q->flush_flags; /* may change, cache it */ | 343 | struct request *rq, *n; |
142 | bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; | 344 | int i; |
143 | bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); | ||
144 | bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); | ||
145 | unsigned skip = 0; | ||
146 | 345 | ||
147 | /* | 346 | /* |
148 | * Special case. If there's data but flush is not necessary, | 347 | * Requests in flight for data are already owned by the dispatch |
149 | * the request can be issued directly. | 348 | * queue or the device driver. Just restore for normal completion. |
150 | * | ||
151 | * Flush w/o data should be able to be issued directly too but | ||
152 | * currently some drivers assume that rq->bio contains | ||
153 | * non-zero data if it isn't NULL and empty FLUSH requests | ||
154 | * getting here usually have bio's without data. | ||
155 | */ | 349 | */ |
156 | if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { | 350 | list_for_each_entry_safe(rq, n, &q->flush_data_in_flight, flush.list) { |
157 | rq->cmd_flags &= ~REQ_FLUSH; | 351 | list_del_init(&rq->flush.list); |
158 | if (!has_fua) | 352 | blk_flush_restore_request(rq); |
159 | rq->cmd_flags &= ~REQ_FUA; | ||
160 | return rq; | ||
161 | } | 353 | } |
162 | 354 | ||
163 | /* | 355 | /* |
164 | * Sequenced flushes can't be processed in parallel. If | 356 | * We need to give away requests on flush queues. Restore for |
165 | * another one is already in progress, queue for later | 357 | * normal completion and put them on the dispatch queue. |
166 | * processing. | ||
167 | */ | 358 | */ |
168 | if (q->flush_seq) { | 359 | for (i = 0; i < ARRAY_SIZE(q->flush_queue); i++) { |
169 | list_move_tail(&rq->queuelist, &q->pending_flushes); | 360 | list_for_each_entry_safe(rq, n, &q->flush_queue[i], |
170 | return NULL; | 361 | flush.list) { |
362 | list_del_init(&rq->flush.list); | ||
363 | blk_flush_restore_request(rq); | ||
364 | list_add_tail(&rq->queuelist, &q->queue_head); | ||
365 | } | ||
171 | } | 366 | } |
172 | |||
173 | /* | ||
174 | * Start a new flush sequence | ||
175 | */ | ||
176 | q->flush_err = 0; | ||
177 | q->flush_seq |= QUEUE_FSEQ_STARTED; | ||
178 | |||
179 | /* adjust FLUSH/FUA of the original request and stash it away */ | ||
180 | rq->cmd_flags &= ~REQ_FLUSH; | ||
181 | if (!has_fua) | ||
182 | rq->cmd_flags &= ~REQ_FUA; | ||
183 | blk_dequeue_request(rq); | ||
184 | q->orig_flush_rq = rq; | ||
185 | |||
186 | /* skip unneded sequences and return the first one */ | ||
187 | if (!do_preflush) | ||
188 | skip |= QUEUE_FSEQ_PREFLUSH; | ||
189 | if (!blk_rq_sectors(rq)) | ||
190 | skip |= QUEUE_FSEQ_DATA; | ||
191 | if (!do_postflush) | ||
192 | skip |= QUEUE_FSEQ_POSTFLUSH; | ||
193 | return blk_flush_complete_seq(q, skip, 0); | ||
194 | } | 367 | } |
195 | 368 | ||
196 | static void bio_end_flush(struct bio *bio, int err) | 369 | static void bio_end_flush(struct bio *bio, int err) |
diff --git a/block/blk-lib.c b/block/blk-lib.c index bd3e8df4d5e2..25de73e4759b 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -136,8 +136,6 @@ static void bio_batch_end_io(struct bio *bio, int err) | |||
136 | * | 136 | * |
137 | * Description: | 137 | * Description: |
138 | * Generate and issue number of bios with zerofiled pages. | 138 | * Generate and issue number of bios with zerofiled pages. |
139 | * Send barrier at the beginning and at the end if requested. This guarantie | ||
140 | * correct request ordering. Empty barrier allow us to avoid post queue flush. | ||
141 | */ | 139 | */ |
142 | 140 | ||
143 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 141 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
diff --git a/block/blk-merge.c b/block/blk-merge.c index ea85e20d5e94..cfcc37cb222b 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -465,3 +465,9 @@ int attempt_front_merge(struct request_queue *q, struct request *rq) | |||
465 | 465 | ||
466 | return 0; | 466 | return 0; |
467 | } | 467 | } |
468 | |||
469 | int blk_attempt_req_merge(struct request_queue *q, struct request *rq, | ||
470 | struct request *next) | ||
471 | { | ||
472 | return attempt_merge(q, rq, next); | ||
473 | } | ||
diff --git a/block/blk-settings.c b/block/blk-settings.c index 36c8c1f2af18..1fa769293597 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -164,25 +164,10 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) | |||
164 | blk_queue_congestion_threshold(q); | 164 | blk_queue_congestion_threshold(q); |
165 | q->nr_batching = BLK_BATCH_REQ; | 165 | q->nr_batching = BLK_BATCH_REQ; |
166 | 166 | ||
167 | q->unplug_thresh = 4; /* hmm */ | ||
168 | q->unplug_delay = msecs_to_jiffies(3); /* 3 milliseconds */ | ||
169 | if (q->unplug_delay == 0) | ||
170 | q->unplug_delay = 1; | ||
171 | |||
172 | q->unplug_timer.function = blk_unplug_timeout; | ||
173 | q->unplug_timer.data = (unsigned long)q; | ||
174 | |||
175 | blk_set_default_limits(&q->limits); | 167 | blk_set_default_limits(&q->limits); |
176 | blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); | 168 | blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); |
177 | 169 | ||
178 | /* | 170 | /* |
179 | * If the caller didn't supply a lock, fall back to our embedded | ||
180 | * per-queue locks | ||
181 | */ | ||
182 | if (!q->queue_lock) | ||
183 | q->queue_lock = &q->__queue_lock; | ||
184 | |||
185 | /* | ||
186 | * by default assume old behaviour and bounce for any highmem page | 171 | * by default assume old behaviour and bounce for any highmem page |
187 | */ | 172 | */ |
188 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); | 173 | blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH); |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 41fb69150b4d..261c75c665ae 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -471,8 +471,6 @@ static void blk_release_queue(struct kobject *kobj) | |||
471 | 471 | ||
472 | blk_sync_queue(q); | 472 | blk_sync_queue(q); |
473 | 473 | ||
474 | blk_throtl_exit(q); | ||
475 | |||
476 | if (rl->rq_pool) | 474 | if (rl->rq_pool) |
477 | mempool_destroy(rl->rq_pool); | 475 | mempool_destroy(rl->rq_pool); |
478 | 476 | ||
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e36cc10a346c..5352bdafbcf0 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -102,7 +102,7 @@ struct throtl_data | |||
102 | /* Work for dispatching throttled bios */ | 102 | /* Work for dispatching throttled bios */ |
103 | struct delayed_work throtl_work; | 103 | struct delayed_work throtl_work; |
104 | 104 | ||
105 | atomic_t limits_changed; | 105 | bool limits_changed; |
106 | }; | 106 | }; |
107 | 107 | ||
108 | enum tg_state_flags { | 108 | enum tg_state_flags { |
@@ -201,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, | |||
201 | RB_CLEAR_NODE(&tg->rb_node); | 201 | RB_CLEAR_NODE(&tg->rb_node); |
202 | bio_list_init(&tg->bio_lists[0]); | 202 | bio_list_init(&tg->bio_lists[0]); |
203 | bio_list_init(&tg->bio_lists[1]); | 203 | bio_list_init(&tg->bio_lists[1]); |
204 | td->limits_changed = false; | ||
204 | 205 | ||
205 | /* | 206 | /* |
206 | * Take the initial reference that will be released on destroy | 207 | * Take the initial reference that will be released on destroy |
@@ -737,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td) | |||
737 | struct throtl_grp *tg; | 738 | struct throtl_grp *tg; |
738 | struct hlist_node *pos, *n; | 739 | struct hlist_node *pos, *n; |
739 | 740 | ||
740 | if (!atomic_read(&td->limits_changed)) | 741 | if (!td->limits_changed) |
741 | return; | 742 | return; |
742 | 743 | ||
743 | throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); | 744 | xchg(&td->limits_changed, false); |
744 | 745 | ||
745 | /* | 746 | throtl_log(td, "limits changed"); |
746 | * Make sure updates from throtl_update_blkio_group_read_bps() group | ||
747 | * of functions to tg->limits_changed are visible. We do not | ||
748 | * want update td->limits_changed to be visible but update to | ||
749 | * tg->limits_changed not being visible yet on this cpu. Hence | ||
750 | * the read barrier. | ||
751 | */ | ||
752 | smp_rmb(); | ||
753 | 747 | ||
754 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { | 748 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { |
755 | if (throtl_tg_on_rr(tg) && tg->limits_changed) { | 749 | if (!tg->limits_changed) |
756 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" | 750 | continue; |
757 | " riops=%u wiops=%u", tg->bps[READ], | 751 | |
758 | tg->bps[WRITE], tg->iops[READ], | 752 | if (!xchg(&tg->limits_changed, false)) |
759 | tg->iops[WRITE]); | 753 | continue; |
754 | |||
755 | throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" | ||
756 | " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE], | ||
757 | tg->iops[READ], tg->iops[WRITE]); | ||
758 | |||
759 | /* | ||
760 | * Restart the slices for both READ and WRITES. It | ||
761 | * might happen that a group's limit are dropped | ||
762 | * suddenly and we don't want to account recently | ||
763 | * dispatched IO with new low rate | ||
764 | */ | ||
765 | throtl_start_new_slice(td, tg, 0); | ||
766 | throtl_start_new_slice(td, tg, 1); | ||
767 | |||
768 | if (throtl_tg_on_rr(tg)) | ||
760 | tg_update_disptime(td, tg); | 769 | tg_update_disptime(td, tg); |
761 | tg->limits_changed = false; | ||
762 | } | ||
763 | } | 770 | } |
764 | |||
765 | smp_mb__before_atomic_dec(); | ||
766 | atomic_dec(&td->limits_changed); | ||
767 | smp_mb__after_atomic_dec(); | ||
768 | } | 771 | } |
769 | 772 | ||
770 | /* Dispatch throttled bios. Should be called without queue lock held. */ | 773 | /* Dispatch throttled bios. Should be called without queue lock held. */ |
@@ -774,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q) | |||
774 | unsigned int nr_disp = 0; | 777 | unsigned int nr_disp = 0; |
775 | struct bio_list bio_list_on_stack; | 778 | struct bio_list bio_list_on_stack; |
776 | struct bio *bio; | 779 | struct bio *bio; |
780 | struct blk_plug plug; | ||
777 | 781 | ||
778 | spin_lock_irq(q->queue_lock); | 782 | spin_lock_irq(q->queue_lock); |
779 | 783 | ||
@@ -802,9 +806,10 @@ out: | |||
802 | * immediate dispatch | 806 | * immediate dispatch |
803 | */ | 807 | */ |
804 | if (nr_disp) { | 808 | if (nr_disp) { |
809 | blk_start_plug(&plug); | ||
805 | while((bio = bio_list_pop(&bio_list_on_stack))) | 810 | while((bio = bio_list_pop(&bio_list_on_stack))) |
806 | generic_make_request(bio); | 811 | generic_make_request(bio); |
807 | blk_unplug(q); | 812 | blk_finish_plug(&plug); |
808 | } | 813 | } |
809 | return nr_disp; | 814 | return nr_disp; |
810 | } | 815 | } |
@@ -825,7 +830,8 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) | |||
825 | 830 | ||
826 | struct delayed_work *dwork = &td->throtl_work; | 831 | struct delayed_work *dwork = &td->throtl_work; |
827 | 832 | ||
828 | if (total_nr_queued(td) > 0) { | 833 | /* schedule work if limits changed even if no bio is queued */ |
834 | if (total_nr_queued(td) > 0 || td->limits_changed) { | ||
829 | /* | 835 | /* |
830 | * We might have a work scheduled to be executed in future. | 836 | * We might have a work scheduled to be executed in future. |
831 | * Cancel that and schedule a new one. | 837 | * Cancel that and schedule a new one. |
@@ -898,6 +904,15 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg) | |||
898 | spin_unlock_irqrestore(td->queue->queue_lock, flags); | 904 | spin_unlock_irqrestore(td->queue->queue_lock, flags); |
899 | } | 905 | } |
900 | 906 | ||
907 | static void throtl_update_blkio_group_common(struct throtl_data *td, | ||
908 | struct throtl_grp *tg) | ||
909 | { | ||
910 | xchg(&tg->limits_changed, true); | ||
911 | xchg(&td->limits_changed, true); | ||
912 | /* Schedule a work now to process the limit change */ | ||
913 | throtl_schedule_delayed_work(td, 0); | ||
914 | } | ||
915 | |||
901 | /* | 916 | /* |
902 | * For all update functions, key should be a valid pointer because these | 917 | * For all update functions, key should be a valid pointer because these |
903 | * update functions are called under blkcg_lock, that means, blkg is | 918 | * update functions are called under blkcg_lock, that means, blkg is |
@@ -911,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key, | |||
911 | struct blkio_group *blkg, u64 read_bps) | 926 | struct blkio_group *blkg, u64 read_bps) |
912 | { | 927 | { |
913 | struct throtl_data *td = key; | 928 | struct throtl_data *td = key; |
929 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
914 | 930 | ||
915 | tg_of_blkg(blkg)->bps[READ] = read_bps; | 931 | tg->bps[READ] = read_bps; |
916 | /* Make sure read_bps is updated before setting limits_changed */ | 932 | throtl_update_blkio_group_common(td, tg); |
917 | smp_wmb(); | ||
918 | tg_of_blkg(blkg)->limits_changed = true; | ||
919 | |||
920 | /* Make sure tg->limits_changed is updated before td->limits_changed */ | ||
921 | smp_mb__before_atomic_inc(); | ||
922 | atomic_inc(&td->limits_changed); | ||
923 | smp_mb__after_atomic_inc(); | ||
924 | |||
925 | /* Schedule a work now to process the limit change */ | ||
926 | throtl_schedule_delayed_work(td, 0); | ||
927 | } | 933 | } |
928 | 934 | ||
929 | static void throtl_update_blkio_group_write_bps(void *key, | 935 | static void throtl_update_blkio_group_write_bps(void *key, |
930 | struct blkio_group *blkg, u64 write_bps) | 936 | struct blkio_group *blkg, u64 write_bps) |
931 | { | 937 | { |
932 | struct throtl_data *td = key; | 938 | struct throtl_data *td = key; |
939 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
933 | 940 | ||
934 | tg_of_blkg(blkg)->bps[WRITE] = write_bps; | 941 | tg->bps[WRITE] = write_bps; |
935 | smp_wmb(); | 942 | throtl_update_blkio_group_common(td, tg); |
936 | tg_of_blkg(blkg)->limits_changed = true; | ||
937 | smp_mb__before_atomic_inc(); | ||
938 | atomic_inc(&td->limits_changed); | ||
939 | smp_mb__after_atomic_inc(); | ||
940 | throtl_schedule_delayed_work(td, 0); | ||
941 | } | 943 | } |
942 | 944 | ||
943 | static void throtl_update_blkio_group_read_iops(void *key, | 945 | static void throtl_update_blkio_group_read_iops(void *key, |
944 | struct blkio_group *blkg, unsigned int read_iops) | 946 | struct blkio_group *blkg, unsigned int read_iops) |
945 | { | 947 | { |
946 | struct throtl_data *td = key; | 948 | struct throtl_data *td = key; |
949 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
947 | 950 | ||
948 | tg_of_blkg(blkg)->iops[READ] = read_iops; | 951 | tg->iops[READ] = read_iops; |
949 | smp_wmb(); | 952 | throtl_update_blkio_group_common(td, tg); |
950 | tg_of_blkg(blkg)->limits_changed = true; | ||
951 | smp_mb__before_atomic_inc(); | ||
952 | atomic_inc(&td->limits_changed); | ||
953 | smp_mb__after_atomic_inc(); | ||
954 | throtl_schedule_delayed_work(td, 0); | ||
955 | } | 953 | } |
956 | 954 | ||
957 | static void throtl_update_blkio_group_write_iops(void *key, | 955 | static void throtl_update_blkio_group_write_iops(void *key, |
958 | struct blkio_group *blkg, unsigned int write_iops) | 956 | struct blkio_group *blkg, unsigned int write_iops) |
959 | { | 957 | { |
960 | struct throtl_data *td = key; | 958 | struct throtl_data *td = key; |
959 | struct throtl_grp *tg = tg_of_blkg(blkg); | ||
961 | 960 | ||
962 | tg_of_blkg(blkg)->iops[WRITE] = write_iops; | 961 | tg->iops[WRITE] = write_iops; |
963 | smp_wmb(); | 962 | throtl_update_blkio_group_common(td, tg); |
964 | tg_of_blkg(blkg)->limits_changed = true; | ||
965 | smp_mb__before_atomic_inc(); | ||
966 | atomic_inc(&td->limits_changed); | ||
967 | smp_mb__after_atomic_inc(); | ||
968 | throtl_schedule_delayed_work(td, 0); | ||
969 | } | 963 | } |
970 | 964 | ||
971 | void throtl_shutdown_timer_wq(struct request_queue *q) | 965 | static void throtl_shutdown_wq(struct request_queue *q) |
972 | { | 966 | { |
973 | struct throtl_data *td = q->td; | 967 | struct throtl_data *td = q->td; |
974 | 968 | ||
@@ -1009,20 +1003,28 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) | |||
1009 | /* | 1003 | /* |
1010 | * There is already another bio queued in same dir. No | 1004 | * There is already another bio queued in same dir. No |
1011 | * need to update dispatch time. | 1005 | * need to update dispatch time. |
1012 | * Still update the disptime if rate limits on this group | ||
1013 | * were changed. | ||
1014 | */ | 1006 | */ |
1015 | if (!tg->limits_changed) | 1007 | update_disptime = false; |
1016 | update_disptime = false; | ||
1017 | else | ||
1018 | tg->limits_changed = false; | ||
1019 | |||
1020 | goto queue_bio; | 1008 | goto queue_bio; |
1009 | |||
1021 | } | 1010 | } |
1022 | 1011 | ||
1023 | /* Bio is with-in rate limit of group */ | 1012 | /* Bio is with-in rate limit of group */ |
1024 | if (tg_may_dispatch(td, tg, bio, NULL)) { | 1013 | if (tg_may_dispatch(td, tg, bio, NULL)) { |
1025 | throtl_charge_bio(tg, bio); | 1014 | throtl_charge_bio(tg, bio); |
1015 | |||
1016 | /* | ||
1017 | * We need to trim slice even when bios are not being queued | ||
1018 | * otherwise it might happen that a bio is not queued for | ||
1019 | * a long time and slice keeps on extending and trim is not | ||
1020 | * called for a long time. Now if limits are reduced suddenly | ||
1021 | * we take into account all the IO dispatched so far at new | ||
1022 | * low rate and * newly queued IO gets a really long dispatch | ||
1023 | * time. | ||
1024 | * | ||
1025 | * So keep on trimming slice even if bio is not queued. | ||
1026 | */ | ||
1027 | throtl_trim_slice(td, tg, rw); | ||
1026 | goto out; | 1028 | goto out; |
1027 | } | 1029 | } |
1028 | 1030 | ||
@@ -1058,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q) | |||
1058 | 1060 | ||
1059 | INIT_HLIST_HEAD(&td->tg_list); | 1061 | INIT_HLIST_HEAD(&td->tg_list); |
1060 | td->tg_service_tree = THROTL_RB_ROOT; | 1062 | td->tg_service_tree = THROTL_RB_ROOT; |
1061 | atomic_set(&td->limits_changed, 0); | 1063 | td->limits_changed = false; |
1062 | 1064 | ||
1063 | /* Init root group */ | 1065 | /* Init root group */ |
1064 | tg = &td->root_tg; | 1066 | tg = &td->root_tg; |
@@ -1070,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q) | |||
1070 | /* Practically unlimited BW */ | 1072 | /* Practically unlimited BW */ |
1071 | tg->bps[0] = tg->bps[1] = -1; | 1073 | tg->bps[0] = tg->bps[1] = -1; |
1072 | tg->iops[0] = tg->iops[1] = -1; | 1074 | tg->iops[0] = tg->iops[1] = -1; |
1075 | td->limits_changed = false; | ||
1073 | 1076 | ||
1074 | /* | 1077 | /* |
1075 | * Set root group reference to 2. One reference will be dropped when | 1078 | * Set root group reference to 2. One reference will be dropped when |
@@ -1102,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q) | |||
1102 | 1105 | ||
1103 | BUG_ON(!td); | 1106 | BUG_ON(!td); |
1104 | 1107 | ||
1105 | throtl_shutdown_timer_wq(q); | 1108 | throtl_shutdown_wq(q); |
1106 | 1109 | ||
1107 | spin_lock_irq(q->queue_lock); | 1110 | spin_lock_irq(q->queue_lock); |
1108 | throtl_release_tgs(td); | 1111 | throtl_release_tgs(td); |
@@ -1132,7 +1135,7 @@ void blk_throtl_exit(struct request_queue *q) | |||
1132 | * update limits through cgroup and another work got queued, cancel | 1135 | * update limits through cgroup and another work got queued, cancel |
1133 | * it. | 1136 | * it. |
1134 | */ | 1137 | */ |
1135 | throtl_shutdown_timer_wq(q); | 1138 | throtl_shutdown_wq(q); |
1136 | throtl_td_free(td); | 1139 | throtl_td_free(td); |
1137 | } | 1140 | } |
1138 | 1141 | ||
diff --git a/block/blk.h b/block/blk.h index 2db8f32838e7..c8db371a921d 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -18,8 +18,6 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, | |||
18 | void blk_dequeue_request(struct request *rq); | 18 | void blk_dequeue_request(struct request *rq); |
19 | void __blk_queue_free_tags(struct request_queue *q); | 19 | void __blk_queue_free_tags(struct request_queue *q); |
20 | 20 | ||
21 | void blk_unplug_work(struct work_struct *work); | ||
22 | void blk_unplug_timeout(unsigned long data); | ||
23 | void blk_rq_timed_out_timer(unsigned long data); | 21 | void blk_rq_timed_out_timer(unsigned long data); |
24 | void blk_delete_timer(struct request *); | 22 | void blk_delete_timer(struct request *); |
25 | void blk_add_timer(struct request *); | 23 | void blk_add_timer(struct request *); |
@@ -51,21 +49,17 @@ static inline void blk_clear_rq_complete(struct request *rq) | |||
51 | */ | 49 | */ |
52 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) | 50 | #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) |
53 | 51 | ||
54 | struct request *blk_do_flush(struct request_queue *q, struct request *rq); | 52 | void blk_insert_flush(struct request *rq); |
53 | void blk_abort_flushes(struct request_queue *q); | ||
55 | 54 | ||
56 | static inline struct request *__elv_next_request(struct request_queue *q) | 55 | static inline struct request *__elv_next_request(struct request_queue *q) |
57 | { | 56 | { |
58 | struct request *rq; | 57 | struct request *rq; |
59 | 58 | ||
60 | while (1) { | 59 | while (1) { |
61 | while (!list_empty(&q->queue_head)) { | 60 | if (!list_empty(&q->queue_head)) { |
62 | rq = list_entry_rq(q->queue_head.next); | 61 | rq = list_entry_rq(q->queue_head.next); |
63 | if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || | 62 | return rq; |
64 | rq == &q->flush_rq) | ||
65 | return rq; | ||
66 | rq = blk_do_flush(q, rq); | ||
67 | if (rq) | ||
68 | return rq; | ||
69 | } | 63 | } |
70 | 64 | ||
71 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) | 65 | if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) |
@@ -109,6 +103,8 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, | |||
109 | struct bio *bio); | 103 | struct bio *bio); |
110 | int attempt_back_merge(struct request_queue *q, struct request *rq); | 104 | int attempt_back_merge(struct request_queue *q, struct request *rq); |
111 | int attempt_front_merge(struct request_queue *q, struct request *rq); | 105 | int attempt_front_merge(struct request_queue *q, struct request *rq); |
106 | int blk_attempt_req_merge(struct request_queue *q, struct request *rq, | ||
107 | struct request *next); | ||
112 | void blk_recalc_rq_segments(struct request *rq); | 108 | void blk_recalc_rq_segments(struct request *rq); |
113 | void blk_rq_set_mixed_merge(struct request *rq); | 109 | void blk_rq_set_mixed_merge(struct request *rq); |
114 | 110 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index ea83a4f0c27d..7785169f3c8f 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -54,9 +54,9 @@ static const int cfq_hist_divisor = 4; | |||
54 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) | 54 | #define CFQQ_SEEKY(cfqq) (hweight32(cfqq->seek_history) > 32/8) |
55 | 55 | ||
56 | #define RQ_CIC(rq) \ | 56 | #define RQ_CIC(rq) \ |
57 | ((struct cfq_io_context *) (rq)->elevator_private) | 57 | ((struct cfq_io_context *) (rq)->elevator_private[0]) |
58 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private2) | 58 | #define RQ_CFQQ(rq) (struct cfq_queue *) ((rq)->elevator_private[1]) |
59 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private3) | 59 | #define RQ_CFQG(rq) (struct cfq_group *) ((rq)->elevator_private[2]) |
60 | 60 | ||
61 | static struct kmem_cache *cfq_pool; | 61 | static struct kmem_cache *cfq_pool; |
62 | static struct kmem_cache *cfq_ioc_pool; | 62 | static struct kmem_cache *cfq_ioc_pool; |
@@ -146,7 +146,6 @@ struct cfq_queue { | |||
146 | struct cfq_rb_root *service_tree; | 146 | struct cfq_rb_root *service_tree; |
147 | struct cfq_queue *new_cfqq; | 147 | struct cfq_queue *new_cfqq; |
148 | struct cfq_group *cfqg; | 148 | struct cfq_group *cfqg; |
149 | struct cfq_group *orig_cfqg; | ||
150 | /* Number of sectors dispatched from queue in single dispatch round */ | 149 | /* Number of sectors dispatched from queue in single dispatch round */ |
151 | unsigned long nr_sectors; | 150 | unsigned long nr_sectors; |
152 | }; | 151 | }; |
@@ -179,6 +178,8 @@ struct cfq_group { | |||
179 | /* group service_tree key */ | 178 | /* group service_tree key */ |
180 | u64 vdisktime; | 179 | u64 vdisktime; |
181 | unsigned int weight; | 180 | unsigned int weight; |
181 | unsigned int new_weight; | ||
182 | bool needs_update; | ||
182 | 183 | ||
183 | /* number of cfqq currently on this group */ | 184 | /* number of cfqq currently on this group */ |
184 | int nr_cfqq; | 185 | int nr_cfqq; |
@@ -238,6 +239,7 @@ struct cfq_data { | |||
238 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; | 239 | struct rb_root prio_trees[CFQ_PRIO_LISTS]; |
239 | 240 | ||
240 | unsigned int busy_queues; | 241 | unsigned int busy_queues; |
242 | unsigned int busy_sync_queues; | ||
241 | 243 | ||
242 | int rq_in_driver; | 244 | int rq_in_driver; |
243 | int rq_in_flight[2]; | 245 | int rq_in_flight[2]; |
@@ -285,7 +287,6 @@ struct cfq_data { | |||
285 | unsigned int cfq_slice_idle; | 287 | unsigned int cfq_slice_idle; |
286 | unsigned int cfq_group_idle; | 288 | unsigned int cfq_group_idle; |
287 | unsigned int cfq_latency; | 289 | unsigned int cfq_latency; |
288 | unsigned int cfq_group_isolation; | ||
289 | 290 | ||
290 | unsigned int cic_index; | 291 | unsigned int cic_index; |
291 | struct list_head cic_list; | 292 | struct list_head cic_list; |
@@ -501,13 +502,6 @@ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) | |||
501 | } | 502 | } |
502 | } | 503 | } |
503 | 504 | ||
504 | static int cfq_queue_empty(struct request_queue *q) | ||
505 | { | ||
506 | struct cfq_data *cfqd = q->elevator->elevator_data; | ||
507 | |||
508 | return !cfqd->rq_queued; | ||
509 | } | ||
510 | |||
511 | /* | 505 | /* |
512 | * Scale schedule slice based on io priority. Use the sync time slice only | 506 | * Scale schedule slice based on io priority. Use the sync time slice only |
513 | * if a queue is marked sync and has sync io queued. A sync queue with async | 507 | * if a queue is marked sync and has sync io queued. A sync queue with async |
@@ -558,15 +552,13 @@ static inline u64 min_vdisktime(u64 min_vdisktime, u64 vdisktime) | |||
558 | 552 | ||
559 | static void update_min_vdisktime(struct cfq_rb_root *st) | 553 | static void update_min_vdisktime(struct cfq_rb_root *st) |
560 | { | 554 | { |
561 | u64 vdisktime = st->min_vdisktime; | ||
562 | struct cfq_group *cfqg; | 555 | struct cfq_group *cfqg; |
563 | 556 | ||
564 | if (st->left) { | 557 | if (st->left) { |
565 | cfqg = rb_entry_cfqg(st->left); | 558 | cfqg = rb_entry_cfqg(st->left); |
566 | vdisktime = min_vdisktime(vdisktime, cfqg->vdisktime); | 559 | st->min_vdisktime = max_vdisktime(st->min_vdisktime, |
560 | cfqg->vdisktime); | ||
567 | } | 561 | } |
568 | |||
569 | st->min_vdisktime = max_vdisktime(st->min_vdisktime, vdisktime); | ||
570 | } | 562 | } |
571 | 563 | ||
572 | /* | 564 | /* |
@@ -863,7 +855,27 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | |||
863 | } | 855 | } |
864 | 856 | ||
865 | static void | 857 | static void |
866 | cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | 858 | cfq_update_group_weight(struct cfq_group *cfqg) |
859 | { | ||
860 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | ||
861 | if (cfqg->needs_update) { | ||
862 | cfqg->weight = cfqg->new_weight; | ||
863 | cfqg->needs_update = false; | ||
864 | } | ||
865 | } | ||
866 | |||
867 | static void | ||
868 | cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) | ||
869 | { | ||
870 | BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); | ||
871 | |||
872 | cfq_update_group_weight(cfqg); | ||
873 | __cfq_group_service_tree_add(st, cfqg); | ||
874 | st->total_weight += cfqg->weight; | ||
875 | } | ||
876 | |||
877 | static void | ||
878 | cfq_group_notify_queue_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | ||
867 | { | 879 | { |
868 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 880 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
869 | struct cfq_group *__cfqg; | 881 | struct cfq_group *__cfqg; |
@@ -884,13 +896,19 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
884 | cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; | 896 | cfqg->vdisktime = __cfqg->vdisktime + CFQ_IDLE_DELAY; |
885 | } else | 897 | } else |
886 | cfqg->vdisktime = st->min_vdisktime; | 898 | cfqg->vdisktime = st->min_vdisktime; |
899 | cfq_group_service_tree_add(st, cfqg); | ||
900 | } | ||
887 | 901 | ||
888 | __cfq_group_service_tree_add(st, cfqg); | 902 | static void |
889 | st->total_weight += cfqg->weight; | 903 | cfq_group_service_tree_del(struct cfq_rb_root *st, struct cfq_group *cfqg) |
904 | { | ||
905 | st->total_weight -= cfqg->weight; | ||
906 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | ||
907 | cfq_rb_erase(&cfqg->rb_node, st); | ||
890 | } | 908 | } |
891 | 909 | ||
892 | static void | 910 | static void |
893 | cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | 911 | cfq_group_notify_queue_del(struct cfq_data *cfqd, struct cfq_group *cfqg) |
894 | { | 912 | { |
895 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 913 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
896 | 914 | ||
@@ -902,14 +920,13 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
902 | return; | 920 | return; |
903 | 921 | ||
904 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); | 922 | cfq_log_cfqg(cfqd, cfqg, "del_from_rr group"); |
905 | st->total_weight -= cfqg->weight; | 923 | cfq_group_service_tree_del(st, cfqg); |
906 | if (!RB_EMPTY_NODE(&cfqg->rb_node)) | ||
907 | cfq_rb_erase(&cfqg->rb_node, st); | ||
908 | cfqg->saved_workload_slice = 0; | 924 | cfqg->saved_workload_slice = 0; |
909 | cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1); | 925 | cfq_blkiocg_update_dequeue_stats(&cfqg->blkg, 1); |
910 | } | 926 | } |
911 | 927 | ||
912 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | 928 | static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq, |
929 | unsigned int *unaccounted_time) | ||
913 | { | 930 | { |
914 | unsigned int slice_used; | 931 | unsigned int slice_used; |
915 | 932 | ||
@@ -928,8 +945,13 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq) | |||
928 | 1); | 945 | 1); |
929 | } else { | 946 | } else { |
930 | slice_used = jiffies - cfqq->slice_start; | 947 | slice_used = jiffies - cfqq->slice_start; |
931 | if (slice_used > cfqq->allocated_slice) | 948 | if (slice_used > cfqq->allocated_slice) { |
949 | *unaccounted_time = slice_used - cfqq->allocated_slice; | ||
932 | slice_used = cfqq->allocated_slice; | 950 | slice_used = cfqq->allocated_slice; |
951 | } | ||
952 | if (time_after(cfqq->slice_start, cfqq->dispatch_start)) | ||
953 | *unaccounted_time += cfqq->slice_start - | ||
954 | cfqq->dispatch_start; | ||
933 | } | 955 | } |
934 | 956 | ||
935 | return slice_used; | 957 | return slice_used; |
@@ -939,12 +961,12 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
939 | struct cfq_queue *cfqq) | 961 | struct cfq_queue *cfqq) |
940 | { | 962 | { |
941 | struct cfq_rb_root *st = &cfqd->grp_service_tree; | 963 | struct cfq_rb_root *st = &cfqd->grp_service_tree; |
942 | unsigned int used_sl, charge; | 964 | unsigned int used_sl, charge, unaccounted_sl = 0; |
943 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) | 965 | int nr_sync = cfqg->nr_cfqq - cfqg_busy_async_queues(cfqd, cfqg) |
944 | - cfqg->service_tree_idle.count; | 966 | - cfqg->service_tree_idle.count; |
945 | 967 | ||
946 | BUG_ON(nr_sync < 0); | 968 | BUG_ON(nr_sync < 0); |
947 | used_sl = charge = cfq_cfqq_slice_usage(cfqq); | 969 | used_sl = charge = cfq_cfqq_slice_usage(cfqq, &unaccounted_sl); |
948 | 970 | ||
949 | if (iops_mode(cfqd)) | 971 | if (iops_mode(cfqd)) |
950 | charge = cfqq->slice_dispatch; | 972 | charge = cfqq->slice_dispatch; |
@@ -952,9 +974,10 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
952 | charge = cfqq->allocated_slice; | 974 | charge = cfqq->allocated_slice; |
953 | 975 | ||
954 | /* Can't update vdisktime while group is on service tree */ | 976 | /* Can't update vdisktime while group is on service tree */ |
955 | cfq_rb_erase(&cfqg->rb_node, st); | 977 | cfq_group_service_tree_del(st, cfqg); |
956 | cfqg->vdisktime += cfq_scale_slice(charge, cfqg); | 978 | cfqg->vdisktime += cfq_scale_slice(charge, cfqg); |
957 | __cfq_group_service_tree_add(st, cfqg); | 979 | /* If a new weight was requested, update now, off tree */ |
980 | cfq_group_service_tree_add(st, cfqg); | ||
958 | 981 | ||
959 | /* This group is being expired. Save the context */ | 982 | /* This group is being expired. Save the context */ |
960 | if (time_after(cfqd->workload_expires, jiffies)) { | 983 | if (time_after(cfqd->workload_expires, jiffies)) { |
@@ -970,7 +993,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg, | |||
970 | cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u" | 993 | cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u disp=%u charge=%u iops=%u" |
971 | " sect=%u", used_sl, cfqq->slice_dispatch, charge, | 994 | " sect=%u", used_sl, cfqq->slice_dispatch, charge, |
972 | iops_mode(cfqd), cfqq->nr_sectors); | 995 | iops_mode(cfqd), cfqq->nr_sectors); |
973 | cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl); | 996 | cfq_blkiocg_update_timeslice_used(&cfqg->blkg, used_sl, |
997 | unaccounted_sl); | ||
974 | cfq_blkiocg_set_start_empty_time(&cfqg->blkg); | 998 | cfq_blkiocg_set_start_empty_time(&cfqg->blkg); |
975 | } | 999 | } |
976 | 1000 | ||
@@ -985,7 +1009,9 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg) | |||
985 | void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, | 1009 | void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, |
986 | unsigned int weight) | 1010 | unsigned int weight) |
987 | { | 1011 | { |
988 | cfqg_of_blkg(blkg)->weight = weight; | 1012 | struct cfq_group *cfqg = cfqg_of_blkg(blkg); |
1013 | cfqg->new_weight = weight; | ||
1014 | cfqg->needs_update = true; | ||
989 | } | 1015 | } |
990 | 1016 | ||
991 | static struct cfq_group * | 1017 | static struct cfq_group * |
@@ -1187,32 +1213,6 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1187 | int new_cfqq = 1; | 1213 | int new_cfqq = 1; |
1188 | int group_changed = 0; | 1214 | int group_changed = 0; |
1189 | 1215 | ||
1190 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | ||
1191 | if (!cfqd->cfq_group_isolation | ||
1192 | && cfqq_type(cfqq) == SYNC_NOIDLE_WORKLOAD | ||
1193 | && cfqq->cfqg && cfqq->cfqg != &cfqd->root_group) { | ||
1194 | /* Move this cfq to root group */ | ||
1195 | cfq_log_cfqq(cfqd, cfqq, "moving to root group"); | ||
1196 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | ||
1197 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | ||
1198 | cfqq->orig_cfqg = cfqq->cfqg; | ||
1199 | cfqq->cfqg = &cfqd->root_group; | ||
1200 | cfqd->root_group.ref++; | ||
1201 | group_changed = 1; | ||
1202 | } else if (!cfqd->cfq_group_isolation | ||
1203 | && cfqq_type(cfqq) == SYNC_WORKLOAD && cfqq->orig_cfqg) { | ||
1204 | /* cfqq is sequential now needs to go to its original group */ | ||
1205 | BUG_ON(cfqq->cfqg != &cfqd->root_group); | ||
1206 | if (!RB_EMPTY_NODE(&cfqq->rb_node)) | ||
1207 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | ||
1208 | cfq_put_cfqg(cfqq->cfqg); | ||
1209 | cfqq->cfqg = cfqq->orig_cfqg; | ||
1210 | cfqq->orig_cfqg = NULL; | ||
1211 | group_changed = 1; | ||
1212 | cfq_log_cfqq(cfqd, cfqq, "moved to origin group"); | ||
1213 | } | ||
1214 | #endif | ||
1215 | |||
1216 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), | 1216 | service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq), |
1217 | cfqq_type(cfqq)); | 1217 | cfqq_type(cfqq)); |
1218 | if (cfq_class_idle(cfqq)) { | 1218 | if (cfq_class_idle(cfqq)) { |
@@ -1284,7 +1284,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, | |||
1284 | service_tree->count++; | 1284 | service_tree->count++; |
1285 | if ((add_front || !new_cfqq) && !group_changed) | 1285 | if ((add_front || !new_cfqq) && !group_changed) |
1286 | return; | 1286 | return; |
1287 | cfq_group_service_tree_add(cfqd, cfqq->cfqg); | 1287 | cfq_group_notify_queue_add(cfqd, cfqq->cfqg); |
1288 | } | 1288 | } |
1289 | 1289 | ||
1290 | static struct cfq_queue * | 1290 | static struct cfq_queue * |
@@ -1372,6 +1372,8 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1372 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 1372 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
1373 | cfq_mark_cfqq_on_rr(cfqq); | 1373 | cfq_mark_cfqq_on_rr(cfqq); |
1374 | cfqd->busy_queues++; | 1374 | cfqd->busy_queues++; |
1375 | if (cfq_cfqq_sync(cfqq)) | ||
1376 | cfqd->busy_sync_queues++; | ||
1375 | 1377 | ||
1376 | cfq_resort_rr_list(cfqd, cfqq); | 1378 | cfq_resort_rr_list(cfqd, cfqq); |
1377 | } | 1379 | } |
@@ -1395,9 +1397,11 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
1395 | cfqq->p_root = NULL; | 1397 | cfqq->p_root = NULL; |
1396 | } | 1398 | } |
1397 | 1399 | ||
1398 | cfq_group_service_tree_del(cfqd, cfqq->cfqg); | 1400 | cfq_group_notify_queue_del(cfqd, cfqq->cfqg); |
1399 | BUG_ON(!cfqd->busy_queues); | 1401 | BUG_ON(!cfqd->busy_queues); |
1400 | cfqd->busy_queues--; | 1402 | cfqd->busy_queues--; |
1403 | if (cfq_cfqq_sync(cfqq)) | ||
1404 | cfqd->busy_sync_queues--; | ||
1401 | } | 1405 | } |
1402 | 1406 | ||
1403 | /* | 1407 | /* |
@@ -2405,6 +2409,7 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2405 | * Does this cfqq already have too much IO in flight? | 2409 | * Does this cfqq already have too much IO in flight? |
2406 | */ | 2410 | */ |
2407 | if (cfqq->dispatched >= max_dispatch) { | 2411 | if (cfqq->dispatched >= max_dispatch) { |
2412 | bool promote_sync = false; | ||
2408 | /* | 2413 | /* |
2409 | * idle queue must always only have a single IO in flight | 2414 | * idle queue must always only have a single IO in flight |
2410 | */ | 2415 | */ |
@@ -2412,15 +2417,26 @@ static bool cfq_may_dispatch(struct cfq_data *cfqd, struct cfq_queue *cfqq) | |||
2412 | return false; | 2417 | return false; |
2413 | 2418 | ||
2414 | /* | 2419 | /* |
2420 | * If there is only one sync queue | ||
2421 | * we can ignore async queue here and give the sync | ||
2422 | * queue no dispatch limit. The reason is a sync queue can | ||
2423 | * preempt async queue, limiting the sync queue doesn't make | ||
2424 | * sense. This is useful for aiostress test. | ||
2425 | */ | ||
2426 | if (cfq_cfqq_sync(cfqq) && cfqd->busy_sync_queues == 1) | ||
2427 | promote_sync = true; | ||
2428 | |||
2429 | /* | ||
2415 | * We have other queues, don't allow more IO from this one | 2430 | * We have other queues, don't allow more IO from this one |
2416 | */ | 2431 | */ |
2417 | if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq)) | 2432 | if (cfqd->busy_queues > 1 && cfq_slice_used_soon(cfqd, cfqq) && |
2433 | !promote_sync) | ||
2418 | return false; | 2434 | return false; |
2419 | 2435 | ||
2420 | /* | 2436 | /* |
2421 | * Sole queue user, no limit | 2437 | * Sole queue user, no limit |
2422 | */ | 2438 | */ |
2423 | if (cfqd->busy_queues == 1) | 2439 | if (cfqd->busy_queues == 1 || promote_sync) |
2424 | max_dispatch = -1; | 2440 | max_dispatch = -1; |
2425 | else | 2441 | else |
2426 | /* | 2442 | /* |
@@ -2542,7 +2558,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) | |||
2542 | static void cfq_put_queue(struct cfq_queue *cfqq) | 2558 | static void cfq_put_queue(struct cfq_queue *cfqq) |
2543 | { | 2559 | { |
2544 | struct cfq_data *cfqd = cfqq->cfqd; | 2560 | struct cfq_data *cfqd = cfqq->cfqd; |
2545 | struct cfq_group *cfqg, *orig_cfqg; | 2561 | struct cfq_group *cfqg; |
2546 | 2562 | ||
2547 | BUG_ON(cfqq->ref <= 0); | 2563 | BUG_ON(cfqq->ref <= 0); |
2548 | 2564 | ||
@@ -2554,7 +2570,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2554 | BUG_ON(rb_first(&cfqq->sort_list)); | 2570 | BUG_ON(rb_first(&cfqq->sort_list)); |
2555 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); | 2571 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); |
2556 | cfqg = cfqq->cfqg; | 2572 | cfqg = cfqq->cfqg; |
2557 | orig_cfqg = cfqq->orig_cfqg; | ||
2558 | 2573 | ||
2559 | if (unlikely(cfqd->active_queue == cfqq)) { | 2574 | if (unlikely(cfqd->active_queue == cfqq)) { |
2560 | __cfq_slice_expired(cfqd, cfqq, 0); | 2575 | __cfq_slice_expired(cfqd, cfqq, 0); |
@@ -2564,8 +2579,6 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2564 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 2579 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
2565 | kmem_cache_free(cfq_pool, cfqq); | 2580 | kmem_cache_free(cfq_pool, cfqq); |
2566 | cfq_put_cfqg(cfqg); | 2581 | cfq_put_cfqg(cfqg); |
2567 | if (orig_cfqg) | ||
2568 | cfq_put_cfqg(orig_cfqg); | ||
2569 | } | 2582 | } |
2570 | 2583 | ||
2571 | /* | 2584 | /* |
@@ -3613,12 +3626,12 @@ static void cfq_put_request(struct request *rq) | |||
3613 | 3626 | ||
3614 | put_io_context(RQ_CIC(rq)->ioc); | 3627 | put_io_context(RQ_CIC(rq)->ioc); |
3615 | 3628 | ||
3616 | rq->elevator_private = NULL; | 3629 | rq->elevator_private[0] = NULL; |
3617 | rq->elevator_private2 = NULL; | 3630 | rq->elevator_private[1] = NULL; |
3618 | 3631 | ||
3619 | /* Put down rq reference on cfqg */ | 3632 | /* Put down rq reference on cfqg */ |
3620 | cfq_put_cfqg(RQ_CFQG(rq)); | 3633 | cfq_put_cfqg(RQ_CFQG(rq)); |
3621 | rq->elevator_private3 = NULL; | 3634 | rq->elevator_private[2] = NULL; |
3622 | 3635 | ||
3623 | cfq_put_queue(cfqq); | 3636 | cfq_put_queue(cfqq); |
3624 | } | 3637 | } |
@@ -3705,13 +3718,12 @@ new_queue: | |||
3705 | } | 3718 | } |
3706 | 3719 | ||
3707 | cfqq->allocated[rw]++; | 3720 | cfqq->allocated[rw]++; |
3708 | cfqq->ref++; | ||
3709 | rq->elevator_private = cic; | ||
3710 | rq->elevator_private2 = cfqq; | ||
3711 | rq->elevator_private3 = cfq_ref_get_cfqg(cfqq->cfqg); | ||
3712 | 3721 | ||
3722 | cfqq->ref++; | ||
3723 | rq->elevator_private[0] = cic; | ||
3724 | rq->elevator_private[1] = cfqq; | ||
3725 | rq->elevator_private[2] = cfq_ref_get_cfqg(cfqq->cfqg); | ||
3713 | spin_unlock_irqrestore(q->queue_lock, flags); | 3726 | spin_unlock_irqrestore(q->queue_lock, flags); |
3714 | |||
3715 | return 0; | 3727 | return 0; |
3716 | 3728 | ||
3717 | queue_fail: | 3729 | queue_fail: |
@@ -3953,7 +3965,6 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3953 | cfqd->cfq_slice_idle = cfq_slice_idle; | 3965 | cfqd->cfq_slice_idle = cfq_slice_idle; |
3954 | cfqd->cfq_group_idle = cfq_group_idle; | 3966 | cfqd->cfq_group_idle = cfq_group_idle; |
3955 | cfqd->cfq_latency = 1; | 3967 | cfqd->cfq_latency = 1; |
3956 | cfqd->cfq_group_isolation = 0; | ||
3957 | cfqd->hw_tag = -1; | 3968 | cfqd->hw_tag = -1; |
3958 | /* | 3969 | /* |
3959 | * we optimistically start assuming sync ops weren't delayed in last | 3970 | * we optimistically start assuming sync ops weren't delayed in last |
@@ -4029,7 +4040,6 @@ SHOW_FUNCTION(cfq_slice_sync_show, cfqd->cfq_slice[1], 1); | |||
4029 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); | 4040 | SHOW_FUNCTION(cfq_slice_async_show, cfqd->cfq_slice[0], 1); |
4030 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); | 4041 | SHOW_FUNCTION(cfq_slice_async_rq_show, cfqd->cfq_slice_async_rq, 0); |
4031 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); | 4042 | SHOW_FUNCTION(cfq_low_latency_show, cfqd->cfq_latency, 0); |
4032 | SHOW_FUNCTION(cfq_group_isolation_show, cfqd->cfq_group_isolation, 0); | ||
4033 | #undef SHOW_FUNCTION | 4043 | #undef SHOW_FUNCTION |
4034 | 4044 | ||
4035 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ | 4045 | #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ |
@@ -4063,7 +4073,6 @@ STORE_FUNCTION(cfq_slice_async_store, &cfqd->cfq_slice[0], 1, UINT_MAX, 1); | |||
4063 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, | 4073 | STORE_FUNCTION(cfq_slice_async_rq_store, &cfqd->cfq_slice_async_rq, 1, |
4064 | UINT_MAX, 0); | 4074 | UINT_MAX, 0); |
4065 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); | 4075 | STORE_FUNCTION(cfq_low_latency_store, &cfqd->cfq_latency, 0, 1, 0); |
4066 | STORE_FUNCTION(cfq_group_isolation_store, &cfqd->cfq_group_isolation, 0, 1, 0); | ||
4067 | #undef STORE_FUNCTION | 4076 | #undef STORE_FUNCTION |
4068 | 4077 | ||
4069 | #define CFQ_ATTR(name) \ | 4078 | #define CFQ_ATTR(name) \ |
@@ -4081,7 +4090,6 @@ static struct elv_fs_entry cfq_attrs[] = { | |||
4081 | CFQ_ATTR(slice_idle), | 4090 | CFQ_ATTR(slice_idle), |
4082 | CFQ_ATTR(group_idle), | 4091 | CFQ_ATTR(group_idle), |
4083 | CFQ_ATTR(low_latency), | 4092 | CFQ_ATTR(low_latency), |
4084 | CFQ_ATTR(group_isolation), | ||
4085 | __ATTR_NULL | 4093 | __ATTR_NULL |
4086 | }; | 4094 | }; |
4087 | 4095 | ||
@@ -4096,7 +4104,6 @@ static struct elevator_type iosched_cfq = { | |||
4096 | .elevator_add_req_fn = cfq_insert_request, | 4104 | .elevator_add_req_fn = cfq_insert_request, |
4097 | .elevator_activate_req_fn = cfq_activate_request, | 4105 | .elevator_activate_req_fn = cfq_activate_request, |
4098 | .elevator_deactivate_req_fn = cfq_deactivate_request, | 4106 | .elevator_deactivate_req_fn = cfq_deactivate_request, |
4099 | .elevator_queue_empty_fn = cfq_queue_empty, | ||
4100 | .elevator_completed_req_fn = cfq_completed_request, | 4107 | .elevator_completed_req_fn = cfq_completed_request, |
4101 | .elevator_former_req_fn = elv_rb_former_request, | 4108 | .elevator_former_req_fn = elv_rb_former_request, |
4102 | .elevator_latter_req_fn = elv_rb_latter_request, | 4109 | .elevator_latter_req_fn = elv_rb_latter_request, |
diff --git a/block/cfq.h b/block/cfq.h index 54a6d90f8e8c..2a155927e37c 100644 --- a/block/cfq.h +++ b/block/cfq.h | |||
@@ -16,9 +16,9 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg, | |||
16 | } | 16 | } |
17 | 17 | ||
18 | static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg, | 18 | static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg, |
19 | unsigned long time) | 19 | unsigned long time, unsigned long unaccounted_time) |
20 | { | 20 | { |
21 | blkiocg_update_timeslice_used(blkg, time); | 21 | blkiocg_update_timeslice_used(blkg, time, unaccounted_time); |
22 | } | 22 | } |
23 | 23 | ||
24 | static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) | 24 | static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) |
@@ -85,7 +85,7 @@ static inline void cfq_blkiocg_update_dequeue_stats(struct blkio_group *blkg, | |||
85 | unsigned long dequeue) {} | 85 | unsigned long dequeue) {} |
86 | 86 | ||
87 | static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg, | 87 | static inline void cfq_blkiocg_update_timeslice_used(struct blkio_group *blkg, |
88 | unsigned long time) {} | 88 | unsigned long time, unsigned long unaccounted_time) {} |
89 | static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) {} | 89 | static inline void cfq_blkiocg_set_start_empty_time(struct blkio_group *blkg) {} |
90 | static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg, | 90 | static inline void cfq_blkiocg_update_io_remove_stats(struct blkio_group *blkg, |
91 | bool direction, bool sync) {} | 91 | bool direction, bool sync) {} |
diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index b547cbca7b23..5139c0ea1864 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c | |||
@@ -326,14 +326,6 @@ dispatch_request: | |||
326 | return 1; | 326 | return 1; |
327 | } | 327 | } |
328 | 328 | ||
329 | static int deadline_queue_empty(struct request_queue *q) | ||
330 | { | ||
331 | struct deadline_data *dd = q->elevator->elevator_data; | ||
332 | |||
333 | return list_empty(&dd->fifo_list[WRITE]) | ||
334 | && list_empty(&dd->fifo_list[READ]); | ||
335 | } | ||
336 | |||
337 | static void deadline_exit_queue(struct elevator_queue *e) | 329 | static void deadline_exit_queue(struct elevator_queue *e) |
338 | { | 330 | { |
339 | struct deadline_data *dd = e->elevator_data; | 331 | struct deadline_data *dd = e->elevator_data; |
@@ -445,7 +437,6 @@ static struct elevator_type iosched_deadline = { | |||
445 | .elevator_merge_req_fn = deadline_merged_requests, | 437 | .elevator_merge_req_fn = deadline_merged_requests, |
446 | .elevator_dispatch_fn = deadline_dispatch_requests, | 438 | .elevator_dispatch_fn = deadline_dispatch_requests, |
447 | .elevator_add_req_fn = deadline_add_request, | 439 | .elevator_add_req_fn = deadline_add_request, |
448 | .elevator_queue_empty_fn = deadline_queue_empty, | ||
449 | .elevator_former_req_fn = elv_rb_former_request, | 440 | .elevator_former_req_fn = elv_rb_former_request, |
450 | .elevator_latter_req_fn = elv_rb_latter_request, | 441 | .elevator_latter_req_fn = elv_rb_latter_request, |
451 | .elevator_init_fn = deadline_init_queue, | 442 | .elevator_init_fn = deadline_init_queue, |
diff --git a/block/elevator.c b/block/elevator.c index 236e93c1f46c..c387d3168734 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -113,7 +113,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) | |||
113 | } | 113 | } |
114 | EXPORT_SYMBOL(elv_rq_merge_ok); | 114 | EXPORT_SYMBOL(elv_rq_merge_ok); |
115 | 115 | ||
116 | static inline int elv_try_merge(struct request *__rq, struct bio *bio) | 116 | int elv_try_merge(struct request *__rq, struct bio *bio) |
117 | { | 117 | { |
118 | int ret = ELEVATOR_NO_MERGE; | 118 | int ret = ELEVATOR_NO_MERGE; |
119 | 119 | ||
@@ -421,6 +421,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) | |||
421 | struct list_head *entry; | 421 | struct list_head *entry; |
422 | int stop_flags; | 422 | int stop_flags; |
423 | 423 | ||
424 | BUG_ON(rq->cmd_flags & REQ_ON_PLUG); | ||
425 | |||
424 | if (q->last_merge == rq) | 426 | if (q->last_merge == rq) |
425 | q->last_merge = NULL; | 427 | q->last_merge = NULL; |
426 | 428 | ||
@@ -519,6 +521,40 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio) | |||
519 | return ELEVATOR_NO_MERGE; | 521 | return ELEVATOR_NO_MERGE; |
520 | } | 522 | } |
521 | 523 | ||
524 | /* | ||
525 | * Attempt to do an insertion back merge. Only check for the case where | ||
526 | * we can append 'rq' to an existing request, so we can throw 'rq' away | ||
527 | * afterwards. | ||
528 | * | ||
529 | * Returns true if we merged, false otherwise | ||
530 | */ | ||
531 | static bool elv_attempt_insert_merge(struct request_queue *q, | ||
532 | struct request *rq) | ||
533 | { | ||
534 | struct request *__rq; | ||
535 | |||
536 | if (blk_queue_nomerges(q)) | ||
537 | return false; | ||
538 | |||
539 | /* | ||
540 | * First try one-hit cache. | ||
541 | */ | ||
542 | if (q->last_merge && blk_attempt_req_merge(q, q->last_merge, rq)) | ||
543 | return true; | ||
544 | |||
545 | if (blk_queue_noxmerges(q)) | ||
546 | return false; | ||
547 | |||
548 | /* | ||
549 | * See if our hash lookup can find a potential backmerge. | ||
550 | */ | ||
551 | __rq = elv_rqhash_find(q, blk_rq_pos(rq)); | ||
552 | if (__rq && blk_attempt_req_merge(q, __rq, rq)) | ||
553 | return true; | ||
554 | |||
555 | return false; | ||
556 | } | ||
557 | |||
522 | void elv_merged_request(struct request_queue *q, struct request *rq, int type) | 558 | void elv_merged_request(struct request_queue *q, struct request *rq, int type) |
523 | { | 559 | { |
524 | struct elevator_queue *e = q->elevator; | 560 | struct elevator_queue *e = q->elevator; |
@@ -536,14 +572,18 @@ void elv_merge_requests(struct request_queue *q, struct request *rq, | |||
536 | struct request *next) | 572 | struct request *next) |
537 | { | 573 | { |
538 | struct elevator_queue *e = q->elevator; | 574 | struct elevator_queue *e = q->elevator; |
575 | const int next_sorted = next->cmd_flags & REQ_SORTED; | ||
539 | 576 | ||
540 | if (e->ops->elevator_merge_req_fn) | 577 | if (next_sorted && e->ops->elevator_merge_req_fn) |
541 | e->ops->elevator_merge_req_fn(q, rq, next); | 578 | e->ops->elevator_merge_req_fn(q, rq, next); |
542 | 579 | ||
543 | elv_rqhash_reposition(q, rq); | 580 | elv_rqhash_reposition(q, rq); |
544 | elv_rqhash_del(q, next); | ||
545 | 581 | ||
546 | q->nr_sorted--; | 582 | if (next_sorted) { |
583 | elv_rqhash_del(q, next); | ||
584 | q->nr_sorted--; | ||
585 | } | ||
586 | |||
547 | q->last_merge = rq; | 587 | q->last_merge = rq; |
548 | } | 588 | } |
549 | 589 | ||
@@ -617,21 +657,12 @@ void elv_quiesce_end(struct request_queue *q) | |||
617 | 657 | ||
618 | void elv_insert(struct request_queue *q, struct request *rq, int where) | 658 | void elv_insert(struct request_queue *q, struct request *rq, int where) |
619 | { | 659 | { |
620 | int unplug_it = 1; | ||
621 | |||
622 | trace_block_rq_insert(q, rq); | 660 | trace_block_rq_insert(q, rq); |
623 | 661 | ||
624 | rq->q = q; | 662 | rq->q = q; |
625 | 663 | ||
626 | switch (where) { | 664 | switch (where) { |
627 | case ELEVATOR_INSERT_REQUEUE: | 665 | case ELEVATOR_INSERT_REQUEUE: |
628 | /* | ||
629 | * Most requeues happen because of a busy condition, | ||
630 | * don't force unplug of the queue for that case. | ||
631 | * Clear unplug_it and fall through. | ||
632 | */ | ||
633 | unplug_it = 0; | ||
634 | |||
635 | case ELEVATOR_INSERT_FRONT: | 666 | case ELEVATOR_INSERT_FRONT: |
636 | rq->cmd_flags |= REQ_SOFTBARRIER; | 667 | rq->cmd_flags |= REQ_SOFTBARRIER; |
637 | list_add(&rq->queuelist, &q->queue_head); | 668 | list_add(&rq->queuelist, &q->queue_head); |
@@ -654,6 +685,14 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) | |||
654 | __blk_run_queue(q, false); | 685 | __blk_run_queue(q, false); |
655 | break; | 686 | break; |
656 | 687 | ||
688 | case ELEVATOR_INSERT_SORT_MERGE: | ||
689 | /* | ||
690 | * If we succeed in merging this request with one in the | ||
691 | * queue already, we are done - rq has now been freed, | ||
692 | * so no need to do anything further. | ||
693 | */ | ||
694 | if (elv_attempt_insert_merge(q, rq)) | ||
695 | break; | ||
657 | case ELEVATOR_INSERT_SORT: | 696 | case ELEVATOR_INSERT_SORT: |
658 | BUG_ON(rq->cmd_type != REQ_TYPE_FS && | 697 | BUG_ON(rq->cmd_type != REQ_TYPE_FS && |
659 | !(rq->cmd_flags & REQ_DISCARD)); | 698 | !(rq->cmd_flags & REQ_DISCARD)); |
@@ -673,24 +712,21 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) | |||
673 | q->elevator->ops->elevator_add_req_fn(q, rq); | 712 | q->elevator->ops->elevator_add_req_fn(q, rq); |
674 | break; | 713 | break; |
675 | 714 | ||
715 | case ELEVATOR_INSERT_FLUSH: | ||
716 | rq->cmd_flags |= REQ_SOFTBARRIER; | ||
717 | blk_insert_flush(rq); | ||
718 | break; | ||
676 | default: | 719 | default: |
677 | printk(KERN_ERR "%s: bad insertion point %d\n", | 720 | printk(KERN_ERR "%s: bad insertion point %d\n", |
678 | __func__, where); | 721 | __func__, where); |
679 | BUG(); | 722 | BUG(); |
680 | } | 723 | } |
681 | |||
682 | if (unplug_it && blk_queue_plugged(q)) { | ||
683 | int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC] | ||
684 | - queue_in_flight(q); | ||
685 | |||
686 | if (nrq >= q->unplug_thresh) | ||
687 | __generic_unplug_device(q); | ||
688 | } | ||
689 | } | 724 | } |
690 | 725 | ||
691 | void __elv_add_request(struct request_queue *q, struct request *rq, int where, | 726 | void __elv_add_request(struct request_queue *q, struct request *rq, int where) |
692 | int plug) | ||
693 | { | 727 | { |
728 | BUG_ON(rq->cmd_flags & REQ_ON_PLUG); | ||
729 | |||
694 | if (rq->cmd_flags & REQ_SOFTBARRIER) { | 730 | if (rq->cmd_flags & REQ_SOFTBARRIER) { |
695 | /* barriers are scheduling boundary, update end_sector */ | 731 | /* barriers are scheduling boundary, update end_sector */ |
696 | if (rq->cmd_type == REQ_TYPE_FS || | 732 | if (rq->cmd_type == REQ_TYPE_FS || |
@@ -702,38 +738,20 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where, | |||
702 | where == ELEVATOR_INSERT_SORT) | 738 | where == ELEVATOR_INSERT_SORT) |
703 | where = ELEVATOR_INSERT_BACK; | 739 | where = ELEVATOR_INSERT_BACK; |
704 | 740 | ||
705 | if (plug) | ||
706 | blk_plug_device(q); | ||
707 | |||
708 | elv_insert(q, rq, where); | 741 | elv_insert(q, rq, where); |
709 | } | 742 | } |
710 | EXPORT_SYMBOL(__elv_add_request); | 743 | EXPORT_SYMBOL(__elv_add_request); |
711 | 744 | ||
712 | void elv_add_request(struct request_queue *q, struct request *rq, int where, | 745 | void elv_add_request(struct request_queue *q, struct request *rq, int where) |
713 | int plug) | ||
714 | { | 746 | { |
715 | unsigned long flags; | 747 | unsigned long flags; |
716 | 748 | ||
717 | spin_lock_irqsave(q->queue_lock, flags); | 749 | spin_lock_irqsave(q->queue_lock, flags); |
718 | __elv_add_request(q, rq, where, plug); | 750 | __elv_add_request(q, rq, where); |
719 | spin_unlock_irqrestore(q->queue_lock, flags); | 751 | spin_unlock_irqrestore(q->queue_lock, flags); |
720 | } | 752 | } |
721 | EXPORT_SYMBOL(elv_add_request); | 753 | EXPORT_SYMBOL(elv_add_request); |
722 | 754 | ||
723 | int elv_queue_empty(struct request_queue *q) | ||
724 | { | ||
725 | struct elevator_queue *e = q->elevator; | ||
726 | |||
727 | if (!list_empty(&q->queue_head)) | ||
728 | return 0; | ||
729 | |||
730 | if (e->ops->elevator_queue_empty_fn) | ||
731 | return e->ops->elevator_queue_empty_fn(q); | ||
732 | |||
733 | return 1; | ||
734 | } | ||
735 | EXPORT_SYMBOL(elv_queue_empty); | ||
736 | |||
737 | struct request *elv_latter_request(struct request_queue *q, struct request *rq) | 755 | struct request *elv_latter_request(struct request_queue *q, struct request *rq) |
738 | { | 756 | { |
739 | struct elevator_queue *e = q->elevator; | 757 | struct elevator_queue *e = q->elevator; |
@@ -759,7 +777,7 @@ int elv_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask) | |||
759 | if (e->ops->elevator_set_req_fn) | 777 | if (e->ops->elevator_set_req_fn) |
760 | return e->ops->elevator_set_req_fn(q, rq, gfp_mask); | 778 | return e->ops->elevator_set_req_fn(q, rq, gfp_mask); |
761 | 779 | ||
762 | rq->elevator_private = NULL; | 780 | rq->elevator_private[0] = NULL; |
763 | return 0; | 781 | return 0; |
764 | } | 782 | } |
765 | 783 | ||
@@ -785,6 +803,8 @@ void elv_abort_queue(struct request_queue *q) | |||
785 | { | 803 | { |
786 | struct request *rq; | 804 | struct request *rq; |
787 | 805 | ||
806 | blk_abort_flushes(q); | ||
807 | |||
788 | while (!list_empty(&q->queue_head)) { | 808 | while (!list_empty(&q->queue_head)) { |
789 | rq = list_entry_rq(q->queue_head.next); | 809 | rq = list_entry_rq(q->queue_head.next); |
790 | rq->cmd_flags |= REQ_QUIET; | 810 | rq->cmd_flags |= REQ_QUIET; |
diff --git a/block/genhd.c b/block/genhd.c index cbf1112a885c..c91a2dac6b6b 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -1158,14 +1158,14 @@ static int diskstats_show(struct seq_file *seqf, void *v) | |||
1158 | "%u %lu %lu %llu %u %u %u %u\n", | 1158 | "%u %lu %lu %llu %u %u %u %u\n", |
1159 | MAJOR(part_devt(hd)), MINOR(part_devt(hd)), | 1159 | MAJOR(part_devt(hd)), MINOR(part_devt(hd)), |
1160 | disk_name(gp, hd->partno, buf), | 1160 | disk_name(gp, hd->partno, buf), |
1161 | part_stat_read(hd, ios[0]), | 1161 | part_stat_read(hd, ios[READ]), |
1162 | part_stat_read(hd, merges[0]), | 1162 | part_stat_read(hd, merges[READ]), |
1163 | (unsigned long long)part_stat_read(hd, sectors[0]), | 1163 | (unsigned long long)part_stat_read(hd, sectors[READ]), |
1164 | jiffies_to_msecs(part_stat_read(hd, ticks[0])), | 1164 | jiffies_to_msecs(part_stat_read(hd, ticks[READ])), |
1165 | part_stat_read(hd, ios[1]), | 1165 | part_stat_read(hd, ios[WRITE]), |
1166 | part_stat_read(hd, merges[1]), | 1166 | part_stat_read(hd, merges[WRITE]), |
1167 | (unsigned long long)part_stat_read(hd, sectors[1]), | 1167 | (unsigned long long)part_stat_read(hd, sectors[WRITE]), |
1168 | jiffies_to_msecs(part_stat_read(hd, ticks[1])), | 1168 | jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), |
1169 | part_in_flight(hd), | 1169 | part_in_flight(hd), |
1170 | jiffies_to_msecs(part_stat_read(hd, io_ticks)), | 1170 | jiffies_to_msecs(part_stat_read(hd, io_ticks)), |
1171 | jiffies_to_msecs(part_stat_read(hd, time_in_queue)) | 1171 | jiffies_to_msecs(part_stat_read(hd, time_in_queue)) |
@@ -1494,7 +1494,7 @@ void disk_block_events(struct gendisk *disk) | |||
1494 | void disk_unblock_events(struct gendisk *disk) | 1494 | void disk_unblock_events(struct gendisk *disk) |
1495 | { | 1495 | { |
1496 | if (disk->ev) | 1496 | if (disk->ev) |
1497 | __disk_unblock_events(disk, true); | 1497 | __disk_unblock_events(disk, false); |
1498 | } | 1498 | } |
1499 | 1499 | ||
1500 | /** | 1500 | /** |
diff --git a/block/noop-iosched.c b/block/noop-iosched.c index 232c4b38cd37..06389e9ef96d 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c | |||
@@ -39,13 +39,6 @@ static void noop_add_request(struct request_queue *q, struct request *rq) | |||
39 | list_add_tail(&rq->queuelist, &nd->queue); | 39 | list_add_tail(&rq->queuelist, &nd->queue); |
40 | } | 40 | } |
41 | 41 | ||
42 | static int noop_queue_empty(struct request_queue *q) | ||
43 | { | ||
44 | struct noop_data *nd = q->elevator->elevator_data; | ||
45 | |||
46 | return list_empty(&nd->queue); | ||
47 | } | ||
48 | |||
49 | static struct request * | 42 | static struct request * |
50 | noop_former_request(struct request_queue *q, struct request *rq) | 43 | noop_former_request(struct request_queue *q, struct request *rq) |
51 | { | 44 | { |
@@ -90,7 +83,6 @@ static struct elevator_type elevator_noop = { | |||
90 | .elevator_merge_req_fn = noop_merged_requests, | 83 | .elevator_merge_req_fn = noop_merged_requests, |
91 | .elevator_dispatch_fn = noop_dispatch, | 84 | .elevator_dispatch_fn = noop_dispatch, |
92 | .elevator_add_req_fn = noop_add_request, | 85 | .elevator_add_req_fn = noop_add_request, |
93 | .elevator_queue_empty_fn = noop_queue_empty, | ||
94 | .elevator_former_req_fn = noop_former_request, | 86 | .elevator_former_req_fn = noop_former_request, |
95 | .elevator_latter_req_fn = noop_latter_request, | 87 | .elevator_latter_req_fn = noop_latter_request, |
96 | .elevator_init_fn = noop_init_queue, | 88 | .elevator_init_fn = noop_init_queue, |