aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:55:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:55:15 -0400
commit355bbd8cb82e60a592f6cd86ce6dbe5677615cf4 (patch)
tree23678e50ad4687f1656edc972388ee8014e7b89d /block
parent39695224bd84dc4be29abad93a0ec232a16fc519 (diff)
parent746cd1e7e4a555ddaee53b19a46e05c9c61eaf09 (diff)
Merge branch 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block: (29 commits) block: use blkdev_issue_discard in blk_ioctl_discard Make DISCARD_BARRIER and DISCARD_NOBARRIER writes instead of reads block: don't assume device has a request list backing in nr_requests store block: Optimal I/O limit wrapper cfq: choose a new next_req when a request is dispatched Seperate read and write statistics of in_flight requests aoe: end barrier bios with EOPNOTSUPP block: trace bio queueing trial only when it occurs block: enable rq CPU completion affinity by default cfq: fix the log message after dispatched a request block: use printk_once cciss: memory leak in cciss_init_one() splice: update mtime and atime on files block: make blk_iopoll_prep_sched() follow normal 0/1 return convention cfq-iosched: get rid of must_alloc flag block: use interrupts disabled version of raise_softirq_irqoff() block: fix comment in blk-iopoll.c block: adjust default budget for blk-iopoll block: fix long lines in block/blk-iopoll.c block: add blk-iopoll, a NAPI like approach for block devices ...
Diffstat (limited to 'block')
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-barrier.c31
-rw-r--r--block/blk-core.c166
-rw-r--r--block/blk-iopoll.c227
-rw-r--r--block/blk-merge.c51
-rw-r--r--block/blk-settings.c21
-rw-r--r--block/blk-sysfs.c7
-rw-r--r--block/blk.h1
-rw-r--r--block/cfq-iosched.c72
-rw-r--r--block/elevator.c16
-rw-r--r--block/genhd.c22
-rw-r--r--block/ioctl.c49
12 files changed, 504 insertions, 161 deletions
diff --git a/block/Makefile b/block/Makefile
index 6c54ed0ff755..ba74ca6bfa14 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 ioctl.o genhd.o scsi_ioctl.o 8 blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 30022b4e2f63..6593ab39cfe9 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -348,6 +348,9 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
348 clear_bit(BIO_UPTODATE, &bio->bi_flags); 348 clear_bit(BIO_UPTODATE, &bio->bi_flags);
349 } 349 }
350 350
351 if (bio->bi_private)
352 complete(bio->bi_private);
353
351 bio_put(bio); 354 bio_put(bio);
352} 355}
353 356
@@ -357,21 +360,20 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
357 * @sector: start sector 360 * @sector: start sector
358 * @nr_sects: number of sectors to discard 361 * @nr_sects: number of sectors to discard
359 * @gfp_mask: memory allocation flags (for bio_alloc) 362 * @gfp_mask: memory allocation flags (for bio_alloc)
363 * @flags: DISCARD_FL_* flags to control behaviour
360 * 364 *
361 * Description: 365 * Description:
362 * Issue a discard request for the sectors in question. Does not wait. 366 * Issue a discard request for the sectors in question.
363 */ 367 */
364int blkdev_issue_discard(struct block_device *bdev, 368int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
365 sector_t sector, sector_t nr_sects, gfp_t gfp_mask) 369 sector_t nr_sects, gfp_t gfp_mask, int flags)
366{ 370{
367 struct request_queue *q; 371 DECLARE_COMPLETION_ONSTACK(wait);
368 struct bio *bio; 372 struct request_queue *q = bdev_get_queue(bdev);
373 int type = flags & DISCARD_FL_BARRIER ?
374 DISCARD_BARRIER : DISCARD_NOBARRIER;
369 int ret = 0; 375 int ret = 0;
370 376
371 if (bdev->bd_disk == NULL)
372 return -ENXIO;
373
374 q = bdev_get_queue(bdev);
375 if (!q) 377 if (!q)
376 return -ENXIO; 378 return -ENXIO;
377 379
@@ -379,12 +381,14 @@ int blkdev_issue_discard(struct block_device *bdev,
379 return -EOPNOTSUPP; 381 return -EOPNOTSUPP;
380 382
381 while (nr_sects && !ret) { 383 while (nr_sects && !ret) {
382 bio = bio_alloc(gfp_mask, 0); 384 struct bio *bio = bio_alloc(gfp_mask, 0);
383 if (!bio) 385 if (!bio)
384 return -ENOMEM; 386 return -ENOMEM;
385 387
386 bio->bi_end_io = blkdev_discard_end_io; 388 bio->bi_end_io = blkdev_discard_end_io;
387 bio->bi_bdev = bdev; 389 bio->bi_bdev = bdev;
390 if (flags & DISCARD_FL_WAIT)
391 bio->bi_private = &wait;
388 392
389 bio->bi_sector = sector; 393 bio->bi_sector = sector;
390 394
@@ -396,10 +400,13 @@ int blkdev_issue_discard(struct block_device *bdev,
396 bio->bi_size = nr_sects << 9; 400 bio->bi_size = nr_sects << 9;
397 nr_sects = 0; 401 nr_sects = 0;
398 } 402 }
403
399 bio_get(bio); 404 bio_get(bio);
400 submit_bio(DISCARD_BARRIER, bio); 405 submit_bio(type, bio);
406
407 if (flags & DISCARD_FL_WAIT)
408 wait_for_completion(&wait);
401 409
402 /* Check if it failed immediately */
403 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 410 if (bio_flagged(bio, BIO_EOPNOTSUPP))
404 ret = -EOPNOTSUPP; 411 ret = -EOPNOTSUPP;
405 else if (!bio_flagged(bio, BIO_UPTODATE)) 412 else if (!bio_flagged(bio, BIO_UPTODATE))
diff --git a/block/blk-core.c b/block/blk-core.c
index e695634882a6..8135228e4b29 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -69,7 +69,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
69 part_stat_inc(cpu, part, merges[rw]); 69 part_stat_inc(cpu, part, merges[rw]);
70 else { 70 else {
71 part_round_stats(cpu, part); 71 part_round_stats(cpu, part);
72 part_inc_in_flight(part); 72 part_inc_in_flight(part, rw);
73 } 73 }
74 74
75 part_stat_unlock(); 75 part_stat_unlock();
@@ -1031,7 +1031,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
1031 1031
1032 if (part->in_flight) { 1032 if (part->in_flight) {
1033 __part_stat_add(cpu, part, time_in_queue, 1033 __part_stat_add(cpu, part, time_in_queue,
1034 part->in_flight * (now - part->stamp)); 1034 part_in_flight(part) * (now - part->stamp));
1035 __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); 1035 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1036 } 1036 }
1037 part->stamp = now; 1037 part->stamp = now;
@@ -1112,31 +1112,27 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1112 req->cmd_type = REQ_TYPE_FS; 1112 req->cmd_type = REQ_TYPE_FS;
1113 1113
1114 /* 1114 /*
1115 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 1115 * Inherit FAILFAST from bio (for read-ahead, and explicit
1116 * FAILFAST). FAILFAST flags are identical for req and bio.
1116 */ 1117 */
1117 if (bio_rw_ahead(bio)) 1118 if (bio_rw_flagged(bio, BIO_RW_AHEAD))
1118 req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 1119 req->cmd_flags |= REQ_FAILFAST_MASK;
1119 REQ_FAILFAST_DRIVER); 1120 else
1120 if (bio_failfast_dev(bio)) 1121 req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
1121 req->cmd_flags |= REQ_FAILFAST_DEV; 1122
1122 if (bio_failfast_transport(bio)) 1123 if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
1123 req->cmd_flags |= REQ_FAILFAST_TRANSPORT;
1124 if (bio_failfast_driver(bio))
1125 req->cmd_flags |= REQ_FAILFAST_DRIVER;
1126
1127 if (unlikely(bio_discard(bio))) {
1128 req->cmd_flags |= REQ_DISCARD; 1124 req->cmd_flags |= REQ_DISCARD;
1129 if (bio_barrier(bio)) 1125 if (bio_rw_flagged(bio, BIO_RW_BARRIER))
1130 req->cmd_flags |= REQ_SOFTBARRIER; 1126 req->cmd_flags |= REQ_SOFTBARRIER;
1131 req->q->prepare_discard_fn(req->q, req); 1127 req->q->prepare_discard_fn(req->q, req);
1132 } else if (unlikely(bio_barrier(bio))) 1128 } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
1133 req->cmd_flags |= REQ_HARDBARRIER; 1129 req->cmd_flags |= REQ_HARDBARRIER;
1134 1130
1135 if (bio_sync(bio)) 1131 if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
1136 req->cmd_flags |= REQ_RW_SYNC; 1132 req->cmd_flags |= REQ_RW_SYNC;
1137 if (bio_rw_meta(bio)) 1133 if (bio_rw_flagged(bio, BIO_RW_META))
1138 req->cmd_flags |= REQ_RW_META; 1134 req->cmd_flags |= REQ_RW_META;
1139 if (bio_noidle(bio)) 1135 if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
1140 req->cmd_flags |= REQ_NOIDLE; 1136 req->cmd_flags |= REQ_NOIDLE;
1141 1137
1142 req->errors = 0; 1138 req->errors = 0;
@@ -1151,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1151 */ 1147 */
1152static inline bool queue_should_plug(struct request_queue *q) 1148static inline bool queue_should_plug(struct request_queue *q)
1153{ 1149{
1154 return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); 1150 return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
1155} 1151}
1156 1152
1157static int __make_request(struct request_queue *q, struct bio *bio) 1153static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1160,11 +1156,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1160 int el_ret; 1156 int el_ret;
1161 unsigned int bytes = bio->bi_size; 1157 unsigned int bytes = bio->bi_size;
1162 const unsigned short prio = bio_prio(bio); 1158 const unsigned short prio = bio_prio(bio);
1163 const int sync = bio_sync(bio); 1159 const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
1164 const int unplug = bio_unplug(bio); 1160 const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
1161 const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1165 int rw_flags; 1162 int rw_flags;
1166 1163
1167 if (bio_barrier(bio) && bio_has_data(bio) && 1164 if (bio_rw_flagged(bio, BIO_RW_BARRIER) && bio_has_data(bio) &&
1168 (q->next_ordered == QUEUE_ORDERED_NONE)) { 1165 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1169 bio_endio(bio, -EOPNOTSUPP); 1166 bio_endio(bio, -EOPNOTSUPP);
1170 return 0; 1167 return 0;
@@ -1178,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1178 1175
1179 spin_lock_irq(q->queue_lock); 1176 spin_lock_irq(q->queue_lock);
1180 1177
1181 if (unlikely(bio_barrier(bio)) || elv_queue_empty(q)) 1178 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
1182 goto get_rq; 1179 goto get_rq;
1183 1180
1184 el_ret = elv_merge(q, &req, bio); 1181 el_ret = elv_merge(q, &req, bio);
@@ -1191,6 +1188,9 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1191 1188
1192 trace_block_bio_backmerge(q, bio); 1189 trace_block_bio_backmerge(q, bio);
1193 1190
1191 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1192 blk_rq_set_mixed_merge(req);
1193
1194 req->biotail->bi_next = bio; 1194 req->biotail->bi_next = bio;
1195 req->biotail = bio; 1195 req->biotail = bio;
1196 req->__data_len += bytes; 1196 req->__data_len += bytes;
@@ -1210,6 +1210,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1210 1210
1211 trace_block_bio_frontmerge(q, bio); 1211 trace_block_bio_frontmerge(q, bio);
1212 1212
1213 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
1214 blk_rq_set_mixed_merge(req);
1215 req->cmd_flags &= ~REQ_FAILFAST_MASK;
1216 req->cmd_flags |= ff;
1217 }
1218
1213 bio->bi_next = req->bio; 1219 bio->bi_next = req->bio;
1214 req->bio = bio; 1220 req->bio = bio;
1215 1221
@@ -1457,19 +1463,20 @@ static inline void __generic_make_request(struct bio *bio)
1457 if (old_sector != -1) 1463 if (old_sector != -1)
1458 trace_block_remap(q, bio, old_dev, old_sector); 1464 trace_block_remap(q, bio, old_dev, old_sector);
1459 1465
1460 trace_block_bio_queue(q, bio);
1461
1462 old_sector = bio->bi_sector; 1466 old_sector = bio->bi_sector;
1463 old_dev = bio->bi_bdev->bd_dev; 1467 old_dev = bio->bi_bdev->bd_dev;
1464 1468
1465 if (bio_check_eod(bio, nr_sectors)) 1469 if (bio_check_eod(bio, nr_sectors))
1466 goto end_io; 1470 goto end_io;
1467 1471
1468 if (bio_discard(bio) && !q->prepare_discard_fn) { 1472 if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
1473 !q->prepare_discard_fn) {
1469 err = -EOPNOTSUPP; 1474 err = -EOPNOTSUPP;
1470 goto end_io; 1475 goto end_io;
1471 } 1476 }
1472 1477
1478 trace_block_bio_queue(q, bio);
1479
1473 ret = q->make_request_fn(q, bio); 1480 ret = q->make_request_fn(q, bio);
1474 } while (ret); 1481 } while (ret);
1475 1482
@@ -1654,6 +1661,50 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1654} 1661}
1655EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 1662EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1656 1663
1664/**
1665 * blk_rq_err_bytes - determine number of bytes till the next failure boundary
1666 * @rq: request to examine
1667 *
1668 * Description:
1669 * A request could be merge of IOs which require different failure
1670 * handling. This function determines the number of bytes which
1671 * can be failed from the beginning of the request without
1672 * crossing into area which need to be retried further.
1673 *
1674 * Return:
1675 * The number of bytes to fail.
1676 *
1677 * Context:
1678 * queue_lock must be held.
1679 */
1680unsigned int blk_rq_err_bytes(const struct request *rq)
1681{
1682 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1683 unsigned int bytes = 0;
1684 struct bio *bio;
1685
1686 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1687 return blk_rq_bytes(rq);
1688
1689 /*
1690 * Currently the only 'mixing' which can happen is between
1691 * different fastfail types. We can safely fail portions
1692 * which have all the failfast bits that the first one has -
1693 * the ones which are at least as eager to fail as the first
1694 * one.
1695 */
1696 for (bio = rq->bio; bio; bio = bio->bi_next) {
1697 if ((bio->bi_rw & ff) != ff)
1698 break;
1699 bytes += bio->bi_size;
1700 }
1701
1702 /* this could lead to infinite loop */
1703 BUG_ON(blk_rq_bytes(rq) && !bytes);
1704 return bytes;
1705}
1706EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1707
1657static void blk_account_io_completion(struct request *req, unsigned int bytes) 1708static void blk_account_io_completion(struct request *req, unsigned int bytes)
1658{ 1709{
1659 if (blk_do_io_stat(req)) { 1710 if (blk_do_io_stat(req)) {
@@ -1687,7 +1738,7 @@ static void blk_account_io_done(struct request *req)
1687 part_stat_inc(cpu, part, ios[rw]); 1738 part_stat_inc(cpu, part, ios[rw]);
1688 part_stat_add(cpu, part, ticks[rw], duration); 1739 part_stat_add(cpu, part, ticks[rw], duration);
1689 part_round_stats(cpu, part); 1740 part_round_stats(cpu, part);
1690 part_dec_in_flight(part); 1741 part_dec_in_flight(part, rw);
1691 1742
1692 part_stat_unlock(); 1743 part_stat_unlock();
1693 } 1744 }
@@ -1807,8 +1858,15 @@ void blk_dequeue_request(struct request *rq)
1807 * and to it is freed is accounted as io that is in progress at 1858 * and to it is freed is accounted as io that is in progress at
1808 * the driver side. 1859 * the driver side.
1809 */ 1860 */
1810 if (blk_account_rq(rq)) 1861 if (blk_account_rq(rq)) {
1811 q->in_flight[rq_is_sync(rq)]++; 1862 q->in_flight[rq_is_sync(rq)]++;
1863 /*
1864 * Mark this device as supporting hardware queuing, if
1865 * we have more IOs in flight than 4.
1866 */
1867 if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
1868 set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
1869 }
1812} 1870}
1813 1871
1814/** 1872/**
@@ -2000,6 +2058,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2000 if (blk_fs_request(req) || blk_discard_rq(req)) 2058 if (blk_fs_request(req) || blk_discard_rq(req))
2001 req->__sector += total_bytes >> 9; 2059 req->__sector += total_bytes >> 9;
2002 2060
2061 /* mixed attributes always follow the first bio */
2062 if (req->cmd_flags & REQ_MIXED_MERGE) {
2063 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2064 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2065 }
2066
2003 /* 2067 /*
2004 * If total number of sectors is less than the first segment 2068 * If total number of sectors is less than the first segment
2005 * size, something has gone terribly wrong. 2069 * size, something has gone terribly wrong.
@@ -2179,6 +2243,25 @@ bool blk_end_request_cur(struct request *rq, int error)
2179EXPORT_SYMBOL(blk_end_request_cur); 2243EXPORT_SYMBOL(blk_end_request_cur);
2180 2244
2181/** 2245/**
2246 * blk_end_request_err - Finish a request till the next failure boundary.
2247 * @rq: the request to finish till the next failure boundary for
2248 * @error: must be negative errno
2249 *
2250 * Description:
2251 * Complete @rq till the next failure boundary.
2252 *
2253 * Return:
2254 * %false - we are done with this request
2255 * %true - still buffers pending for this request
2256 */
2257bool blk_end_request_err(struct request *rq, int error)
2258{
2259 WARN_ON(error >= 0);
2260 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2261}
2262EXPORT_SYMBOL_GPL(blk_end_request_err);
2263
2264/**
2182 * __blk_end_request - Helper function for drivers to complete the request. 2265 * __blk_end_request - Helper function for drivers to complete the request.
2183 * @rq: the request being processed 2266 * @rq: the request being processed
2184 * @error: %0 for success, < %0 for error 2267 * @error: %0 for success, < %0 for error
@@ -2237,12 +2320,31 @@ bool __blk_end_request_cur(struct request *rq, int error)
2237} 2320}
2238EXPORT_SYMBOL(__blk_end_request_cur); 2321EXPORT_SYMBOL(__blk_end_request_cur);
2239 2322
2323/**
2324 * __blk_end_request_err - Finish a request till the next failure boundary.
2325 * @rq: the request to finish till the next failure boundary for
2326 * @error: must be negative errno
2327 *
2328 * Description:
2329 * Complete @rq till the next failure boundary. Must be called
2330 * with queue lock held.
2331 *
2332 * Return:
2333 * %false - we are done with this request
2334 * %true - still buffers pending for this request
2335 */
2336bool __blk_end_request_err(struct request *rq, int error)
2337{
2338 WARN_ON(error >= 0);
2339 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2340}
2341EXPORT_SYMBOL_GPL(__blk_end_request_err);
2342
2240void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2343void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2241 struct bio *bio) 2344 struct bio *bio)
2242{ 2345{
2243 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and 2346 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
2244 we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ 2347 rq->cmd_flags |= bio->bi_rw & REQ_RW;
2245 rq->cmd_flags |= (bio->bi_rw & 3);
2246 2348
2247 if (bio_has_data(bio)) { 2349 if (bio_has_data(bio)) {
2248 rq->nr_phys_segments = bio_phys_segments(q, bio); 2350 rq->nr_phys_segments = bio_phys_segments(q, bio);
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
new file mode 100644
index 000000000000..ca564202ed7a
--- /dev/null
+++ b/block/blk-iopoll.c
@@ -0,0 +1,227 @@
1/*
2 * Functions related to interrupt-poll handling in the block layer. This
3 * is similar to NAPI for network devices.
4 */
5#include <linux/kernel.h>
6#include <linux/module.h>
7#include <linux/init.h>
8#include <linux/bio.h>
9#include <linux/blkdev.h>
10#include <linux/interrupt.h>
11#include <linux/cpu.h>
12#include <linux/blk-iopoll.h>
13#include <linux/delay.h>
14
15#include "blk.h"
16
17int blk_iopoll_enabled = 1;
18EXPORT_SYMBOL(blk_iopoll_enabled);
19
20static unsigned int blk_iopoll_budget __read_mostly = 256;
21
22static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
23
24/**
25 * blk_iopoll_sched - Schedule a run of the iopoll handler
26 * @iop: The parent iopoll structure
27 *
28 * Description:
29 * Add this blk_iopoll structure to the pending poll list and trigger the
30 * raise of the blk iopoll softirq. The driver must already have gotten a
31 * succesful return from blk_iopoll_sched_prep() before calling this.
32 **/
33void blk_iopoll_sched(struct blk_iopoll *iop)
34{
35 unsigned long flags;
36
37 local_irq_save(flags);
38 list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
39 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
40 local_irq_restore(flags);
41}
42EXPORT_SYMBOL(blk_iopoll_sched);
43
44/**
45 * __blk_iopoll_complete - Mark this @iop as un-polled again
46 * @iop: The parent iopoll structure
47 *
48 * Description:
49 * See blk_iopoll_complete(). This function must be called with interrupts
50 * disabled.
51 **/
52void __blk_iopoll_complete(struct blk_iopoll *iop)
53{
54 list_del(&iop->list);
55 smp_mb__before_clear_bit();
56 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
57}
58EXPORT_SYMBOL(__blk_iopoll_complete);
59
60/**
61 * blk_iopoll_complete - Mark this @iop as un-polled again
62 * @iop: The parent iopoll structure
63 *
64 * Description:
65 * If a driver consumes less than the assigned budget in its run of the
66 * iopoll handler, it'll end the polled mode by calling this function. The
67 * iopoll handler will not be invoked again before blk_iopoll_sched_prep()
68 * is called.
69 **/
70void blk_iopoll_complete(struct blk_iopoll *iopoll)
71{
72 unsigned long flags;
73
74 local_irq_save(flags);
75 __blk_iopoll_complete(iopoll);
76 local_irq_restore(flags);
77}
78EXPORT_SYMBOL(blk_iopoll_complete);
79
80static void blk_iopoll_softirq(struct softirq_action *h)
81{
82 struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
83 int rearm = 0, budget = blk_iopoll_budget;
84 unsigned long start_time = jiffies;
85
86 local_irq_disable();
87
88 while (!list_empty(list)) {
89 struct blk_iopoll *iop;
90 int work, weight;
91
92 /*
93 * If softirq window is exhausted then punt.
94 */
95 if (budget <= 0 || time_after(jiffies, start_time)) {
96 rearm = 1;
97 break;
98 }
99
100 local_irq_enable();
101
102 /* Even though interrupts have been re-enabled, this
103 * access is safe because interrupts can only add new
104 * entries to the tail of this list, and only ->poll()
105 * calls can remove this head entry from the list.
106 */
107 iop = list_entry(list->next, struct blk_iopoll, list);
108
109 weight = iop->weight;
110 work = 0;
111 if (test_bit(IOPOLL_F_SCHED, &iop->state))
112 work = iop->poll(iop, weight);
113
114 budget -= work;
115
116 local_irq_disable();
117
118 /*
119 * Drivers must not modify the iopoll state, if they
120 * consume their assigned weight (or more, some drivers can't
121 * easily just stop processing, they have to complete an
122 * entire mask of commands).In such cases this code
123 * still "owns" the iopoll instance and therefore can
124 * move the instance around on the list at-will.
125 */
126 if (work >= weight) {
127 if (blk_iopoll_disable_pending(iop))
128 __blk_iopoll_complete(iop);
129 else
130 list_move_tail(&iop->list, list);
131 }
132 }
133
134 if (rearm)
135 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
136
137 local_irq_enable();
138}
139
140/**
141 * blk_iopoll_disable - Disable iopoll on this @iop
142 * @iop: The parent iopoll structure
143 *
144 * Description:
145 * Disable io polling and wait for any pending callbacks to have completed.
146 **/
147void blk_iopoll_disable(struct blk_iopoll *iop)
148{
149 set_bit(IOPOLL_F_DISABLE, &iop->state);
150 while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
151 msleep(1);
152 clear_bit(IOPOLL_F_DISABLE, &iop->state);
153}
154EXPORT_SYMBOL(blk_iopoll_disable);
155
156/**
157 * blk_iopoll_enable - Enable iopoll on this @iop
158 * @iop: The parent iopoll structure
159 *
160 * Description:
161 * Enable iopoll on this @iop. Note that the handler run will not be
162 * scheduled, it will only mark it as active.
163 **/
164void blk_iopoll_enable(struct blk_iopoll *iop)
165{
166 BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
167 smp_mb__before_clear_bit();
168 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
169}
170EXPORT_SYMBOL(blk_iopoll_enable);
171
172/**
173 * blk_iopoll_init - Initialize this @iop
174 * @iop: The parent iopoll structure
175 * @weight: The default weight (or command completion budget)
176 * @poll_fn: The handler to invoke
177 *
178 * Description:
179 * Initialize this blk_iopoll structure. Before being actively used, the
180 * driver must call blk_iopoll_enable().
181 **/
182void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
183{
184 memset(iop, 0, sizeof(*iop));
185 INIT_LIST_HEAD(&iop->list);
186 iop->weight = weight;
187 iop->poll = poll_fn;
188 set_bit(IOPOLL_F_SCHED, &iop->state);
189}
190EXPORT_SYMBOL(blk_iopoll_init);
191
192static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
193 unsigned long action, void *hcpu)
194{
195 /*
196 * If a CPU goes away, splice its entries to the current CPU
197 * and trigger a run of the softirq
198 */
199 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
200 int cpu = (unsigned long) hcpu;
201
202 local_irq_disable();
203 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
204 &__get_cpu_var(blk_cpu_iopoll));
205 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
206 local_irq_enable();
207 }
208
209 return NOTIFY_OK;
210}
211
212static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = {
213 .notifier_call = blk_iopoll_cpu_notify,
214};
215
216static __init int blk_iopoll_setup(void)
217{
218 int i;
219
220 for_each_possible_cpu(i)
221 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
222
223 open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
224 register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
225 return 0;
226}
227subsys_initcall(blk_iopoll_setup);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e1999679a4d5..99cb5cf1f447 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -311,6 +311,36 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
311 return 1; 311 return 1;
312} 312}
313 313
314/**
315 * blk_rq_set_mixed_merge - mark a request as mixed merge
316 * @rq: request to mark as mixed merge
317 *
318 * Description:
319 * @rq is about to be mixed merged. Make sure the attributes
320 * which can be mixed are set in each bio and mark @rq as mixed
321 * merged.
322 */
323void blk_rq_set_mixed_merge(struct request *rq)
324{
325 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
326 struct bio *bio;
327
328 if (rq->cmd_flags & REQ_MIXED_MERGE)
329 return;
330
331 /*
332 * @rq will no longer represent mixable attributes for all the
333 * contained bios. It will just track those of the first one.
334 * Distributes the attributs to each bio.
335 */
336 for (bio = rq->bio; bio; bio = bio->bi_next) {
337 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) &&
338 (bio->bi_rw & REQ_FAILFAST_MASK) != ff);
339 bio->bi_rw |= ff;
340 }
341 rq->cmd_flags |= REQ_MIXED_MERGE;
342}
343
314static void blk_account_io_merge(struct request *req) 344static void blk_account_io_merge(struct request *req)
315{ 345{
316 if (blk_do_io_stat(req)) { 346 if (blk_do_io_stat(req)) {
@@ -321,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
321 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 351 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
322 352
323 part_round_stats(cpu, part); 353 part_round_stats(cpu, part);
324 part_dec_in_flight(part); 354 part_dec_in_flight(part, rq_data_dir(req));
325 355
326 part_stat_unlock(); 356 part_stat_unlock();
327 } 357 }
@@ -350,12 +380,6 @@ static int attempt_merge(struct request_queue *q, struct request *req,
350 if (blk_integrity_rq(req) != blk_integrity_rq(next)) 380 if (blk_integrity_rq(req) != blk_integrity_rq(next))
351 return 0; 381 return 0;
352 382
353 /* don't merge requests of different failfast settings */
354 if (blk_failfast_dev(req) != blk_failfast_dev(next) ||
355 blk_failfast_transport(req) != blk_failfast_transport(next) ||
356 blk_failfast_driver(req) != blk_failfast_driver(next))
357 return 0;
358
359 /* 383 /*
360 * If we are allowed to merge, then append bio list 384 * If we are allowed to merge, then append bio list
361 * from next to rq and release next. merge_requests_fn 385 * from next to rq and release next. merge_requests_fn
@@ -366,6 +390,19 @@ static int attempt_merge(struct request_queue *q, struct request *req,
366 return 0; 390 return 0;
367 391
368 /* 392 /*
393 * If failfast settings disagree or any of the two is already
394 * a mixed merge, mark both as mixed before proceeding. This
395 * makes sure that all involved bios have mixable attributes
396 * set properly.
397 */
398 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
399 (req->cmd_flags & REQ_FAILFAST_MASK) !=
400 (next->cmd_flags & REQ_FAILFAST_MASK)) {
401 blk_rq_set_mixed_merge(req);
402 blk_rq_set_mixed_merge(next);
403 }
404
405 /*
369 * At this point we have either done a back merge 406 * At this point we have either done a back merge
370 * or front merge. We need the smaller start_time of 407 * or front merge. We need the smaller start_time of
371 * the merged requests to be the current request 408 * the merged requests to be the current request
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 476d87065073..83413ff83739 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -428,6 +428,25 @@ void blk_queue_io_min(struct request_queue *q, unsigned int min)
428EXPORT_SYMBOL(blk_queue_io_min); 428EXPORT_SYMBOL(blk_queue_io_min);
429 429
430/** 430/**
431 * blk_limits_io_opt - set optimal request size for a device
432 * @limits: the queue limits
433 * @opt: smallest I/O size in bytes
434 *
435 * Description:
436 * Storage devices may report an optimal I/O size, which is the
437 * device's preferred unit for sustained I/O. This is rarely reported
438 * for disk drives. For RAID arrays it is usually the stripe width or
439 * the internal track size. A properly aligned multiple of
440 * optimal_io_size is the preferred request size for workloads where
441 * sustained throughput is desired.
442 */
443void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt)
444{
445 limits->io_opt = opt;
446}
447EXPORT_SYMBOL(blk_limits_io_opt);
448
449/**
431 * blk_queue_io_opt - set optimal request size for the queue 450 * blk_queue_io_opt - set optimal request size for the queue
432 * @q: the request queue for the device 451 * @q: the request queue for the device
433 * @opt: optimal request size in bytes 452 * @opt: optimal request size in bytes
@@ -442,7 +461,7 @@ EXPORT_SYMBOL(blk_queue_io_min);
442 */ 461 */
443void blk_queue_io_opt(struct request_queue *q, unsigned int opt) 462void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
444{ 463{
445 q->limits.io_opt = opt; 464 blk_limits_io_opt(&q->limits, opt);
446} 465}
447EXPORT_SYMBOL(blk_queue_io_opt); 466EXPORT_SYMBOL(blk_queue_io_opt);
448 467
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d3aa2aadb3e0..b78c9c3e2670 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -40,7 +40,12 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
40{ 40{
41 struct request_list *rl = &q->rq; 41 struct request_list *rl = &q->rq;
42 unsigned long nr; 42 unsigned long nr;
43 int ret = queue_var_store(&nr, page, count); 43 int ret;
44
45 if (!q->request_fn)
46 return -EINVAL;
47
48 ret = queue_var_store(&nr, page, count);
44 if (nr < BLKDEV_MIN_RQ) 49 if (nr < BLKDEV_MIN_RQ)
45 nr = BLKDEV_MIN_RQ; 50 nr = BLKDEV_MIN_RQ;
46 51
diff --git a/block/blk.h b/block/blk.h
index 3fae6add5430..5ee3d7e72feb 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -104,6 +104,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
104int attempt_back_merge(struct request_queue *q, struct request *rq); 104int attempt_back_merge(struct request_queue *q, struct request *rq);
105int attempt_front_merge(struct request_queue *q, struct request *rq); 105int attempt_front_merge(struct request_queue *q, struct request *rq);
106void blk_recalc_rq_segments(struct request *rq); 106void blk_recalc_rq_segments(struct request *rq);
107void blk_rq_set_mixed_merge(struct request *rq);
107 108
108void blk_queue_congestion_threshold(struct request_queue *q); 109void blk_queue_congestion_threshold(struct request_queue *q);
109 110
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fd7080ed7935..0e3814b662af 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -134,13 +134,8 @@ struct cfq_data {
134 struct rb_root prio_trees[CFQ_PRIO_LISTS]; 134 struct rb_root prio_trees[CFQ_PRIO_LISTS];
135 135
136 unsigned int busy_queues; 136 unsigned int busy_queues;
137 /*
138 * Used to track any pending rt requests so we can pre-empt current
139 * non-RT cfqq in service when this value is non-zero.
140 */
141 unsigned int busy_rt_queues;
142 137
143 int rq_in_driver; 138 int rq_in_driver[2];
144 int sync_flight; 139 int sync_flight;
145 140
146 /* 141 /*
@@ -191,7 +186,6 @@ enum cfqq_state_flags {
191 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ 186 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
192 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ 187 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
193 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */ 188 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
194 CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */
195 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ 189 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
196 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ 190 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
197 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ 191 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
@@ -218,7 +212,6 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
218CFQ_CFQQ_FNS(on_rr); 212CFQ_CFQQ_FNS(on_rr);
219CFQ_CFQQ_FNS(wait_request); 213CFQ_CFQQ_FNS(wait_request);
220CFQ_CFQQ_FNS(must_dispatch); 214CFQ_CFQQ_FNS(must_dispatch);
221CFQ_CFQQ_FNS(must_alloc);
222CFQ_CFQQ_FNS(must_alloc_slice); 215CFQ_CFQQ_FNS(must_alloc_slice);
223CFQ_CFQQ_FNS(fifo_expire); 216CFQ_CFQQ_FNS(fifo_expire);
224CFQ_CFQQ_FNS(idle_window); 217CFQ_CFQQ_FNS(idle_window);
@@ -239,6 +232,11 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
239static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, 232static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
240 struct io_context *); 233 struct io_context *);
241 234
235static inline int rq_in_driver(struct cfq_data *cfqd)
236{
237 return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1];
238}
239
242static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, 240static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
243 int is_sync) 241 int is_sync)
244{ 242{
@@ -257,7 +255,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
257 */ 255 */
258static inline int cfq_bio_sync(struct bio *bio) 256static inline int cfq_bio_sync(struct bio *bio)
259{ 257{
260 if (bio_data_dir(bio) == READ || bio_sync(bio)) 258 if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO))
261 return 1; 259 return 1;
262 260
263 return 0; 261 return 0;
@@ -648,8 +646,6 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
648 BUG_ON(cfq_cfqq_on_rr(cfqq)); 646 BUG_ON(cfq_cfqq_on_rr(cfqq));
649 cfq_mark_cfqq_on_rr(cfqq); 647 cfq_mark_cfqq_on_rr(cfqq);
650 cfqd->busy_queues++; 648 cfqd->busy_queues++;
651 if (cfq_class_rt(cfqq))
652 cfqd->busy_rt_queues++;
653 649
654 cfq_resort_rr_list(cfqd, cfqq); 650 cfq_resort_rr_list(cfqd, cfqq);
655} 651}
@@ -673,8 +669,6 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
673 669
674 BUG_ON(!cfqd->busy_queues); 670 BUG_ON(!cfqd->busy_queues);
675 cfqd->busy_queues--; 671 cfqd->busy_queues--;
676 if (cfq_class_rt(cfqq))
677 cfqd->busy_rt_queues--;
678} 672}
679 673
680/* 674/*
@@ -760,9 +754,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
760{ 754{
761 struct cfq_data *cfqd = q->elevator->elevator_data; 755 struct cfq_data *cfqd = q->elevator->elevator_data;
762 756
763 cfqd->rq_in_driver++; 757 cfqd->rq_in_driver[rq_is_sync(rq)]++;
764 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", 758 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
765 cfqd->rq_in_driver); 759 rq_in_driver(cfqd));
766 760
767 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); 761 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
768} 762}
@@ -770,11 +764,12 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
770static void cfq_deactivate_request(struct request_queue *q, struct request *rq) 764static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
771{ 765{
772 struct cfq_data *cfqd = q->elevator->elevator_data; 766 struct cfq_data *cfqd = q->elevator->elevator_data;
767 const int sync = rq_is_sync(rq);
773 768
774 WARN_ON(!cfqd->rq_in_driver); 769 WARN_ON(!cfqd->rq_in_driver[sync]);
775 cfqd->rq_in_driver--; 770 cfqd->rq_in_driver[sync]--;
776 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", 771 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
777 cfqd->rq_in_driver); 772 rq_in_driver(cfqd));
778} 773}
779 774
780static void cfq_remove_request(struct request *rq) 775static void cfq_remove_request(struct request *rq)
@@ -1080,7 +1075,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1080 /* 1075 /*
1081 * still requests with the driver, don't idle 1076 * still requests with the driver, don't idle
1082 */ 1077 */
1083 if (cfqd->rq_in_driver) 1078 if (rq_in_driver(cfqd))
1084 return; 1079 return;
1085 1080
1086 /* 1081 /*
@@ -1115,6 +1110,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
1115 1110
1116 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert"); 1111 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
1117 1112
1113 cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
1118 cfq_remove_request(rq); 1114 cfq_remove_request(rq);
1119 cfqq->dispatched++; 1115 cfqq->dispatched++;
1120 elv_dispatch_sort(q, rq); 1116 elv_dispatch_sort(q, rq);
@@ -1179,20 +1175,6 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1179 goto expire; 1175 goto expire;
1180 1176
1181 /* 1177 /*
1182 * If we have a RT cfqq waiting, then we pre-empt the current non-rt
1183 * cfqq.
1184 */
1185 if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) {
1186 /*
1187 * We simulate this as cfqq timed out so that it gets to bank
1188 * the remaining of its time slice.
1189 */
1190 cfq_log_cfqq(cfqd, cfqq, "preempt");
1191 cfq_slice_expired(cfqd, 1);
1192 goto new_queue;
1193 }
1194
1195 /*
1196 * The active queue has requests and isn't expired, allow it to 1178 * The active queue has requests and isn't expired, allow it to
1197 * dispatch. 1179 * dispatch.
1198 */ 1180 */
@@ -1312,6 +1294,12 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
1312 return 0; 1294 return 0;
1313 1295
1314 /* 1296 /*
1297 * Drain async requests before we start sync IO
1298 */
1299 if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
1300 return 0;
1301
1302 /*
1315 * If this is an async queue and we have sync IO in flight, let it wait 1303 * If this is an async queue and we have sync IO in flight, let it wait
1316 */ 1304 */
1317 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) 1305 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
@@ -1362,7 +1350,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
1362 cfq_slice_expired(cfqd, 0); 1350 cfq_slice_expired(cfqd, 0);
1363 } 1351 }
1364 1352
1365 cfq_log(cfqd, "dispatched a request"); 1353 cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
1366 return 1; 1354 return 1;
1367} 1355}
1368 1356
@@ -2130,11 +2118,11 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
2130 */ 2118 */
2131static void cfq_update_hw_tag(struct cfq_data *cfqd) 2119static void cfq_update_hw_tag(struct cfq_data *cfqd)
2132{ 2120{
2133 if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) 2121 if (rq_in_driver(cfqd) > cfqd->rq_in_driver_peak)
2134 cfqd->rq_in_driver_peak = cfqd->rq_in_driver; 2122 cfqd->rq_in_driver_peak = rq_in_driver(cfqd);
2135 2123
2136 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && 2124 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
2137 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) 2125 rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN)
2138 return; 2126 return;
2139 2127
2140 if (cfqd->hw_tag_samples++ < 50) 2128 if (cfqd->hw_tag_samples++ < 50)
@@ -2161,9 +2149,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
2161 2149
2162 cfq_update_hw_tag(cfqd); 2150 cfq_update_hw_tag(cfqd);
2163 2151
2164 WARN_ON(!cfqd->rq_in_driver); 2152 WARN_ON(!cfqd->rq_in_driver[sync]);
2165 WARN_ON(!cfqq->dispatched); 2153 WARN_ON(!cfqq->dispatched);
2166 cfqd->rq_in_driver--; 2154 cfqd->rq_in_driver[sync]--;
2167 cfqq->dispatched--; 2155 cfqq->dispatched--;
2168 2156
2169 if (cfq_cfqq_sync(cfqq)) 2157 if (cfq_cfqq_sync(cfqq))
@@ -2197,7 +2185,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
2197 cfq_arm_slice_timer(cfqd); 2185 cfq_arm_slice_timer(cfqd);
2198 } 2186 }
2199 2187
2200 if (!cfqd->rq_in_driver) 2188 if (!rq_in_driver(cfqd))
2201 cfq_schedule_dispatch(cfqd); 2189 cfq_schedule_dispatch(cfqd);
2202} 2190}
2203 2191
@@ -2229,8 +2217,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
2229 2217
2230static inline int __cfq_may_queue(struct cfq_queue *cfqq) 2218static inline int __cfq_may_queue(struct cfq_queue *cfqq)
2231{ 2219{
2232 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 2220 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
2233 !cfq_cfqq_must_alloc_slice(cfqq)) {
2234 cfq_mark_cfqq_must_alloc_slice(cfqq); 2221 cfq_mark_cfqq_must_alloc_slice(cfqq);
2235 return ELV_MQUEUE_MUST; 2222 return ELV_MQUEUE_MUST;
2236 } 2223 }
@@ -2317,7 +2304,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
2317 } 2304 }
2318 2305
2319 cfqq->allocated[rw]++; 2306 cfqq->allocated[rw]++;
2320 cfq_clear_cfqq_must_alloc(cfqq);
2321 atomic_inc(&cfqq->ref); 2307 atomic_inc(&cfqq->ref);
2322 2308
2323 spin_unlock_irqrestore(q->queue_lock, flags); 2309 spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/block/elevator.c b/block/elevator.c
index 2d511f9105e1..1975b619c86d 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -79,7 +79,8 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
79 /* 79 /*
80 * Don't merge file system requests and discard requests 80 * Don't merge file system requests and discard requests
81 */ 81 */
82 if (bio_discard(bio) != bio_discard(rq->bio)) 82 if (bio_rw_flagged(bio, BIO_RW_DISCARD) !=
83 bio_rw_flagged(rq->bio, BIO_RW_DISCARD))
83 return 0; 84 return 0;
84 85
85 /* 86 /*
@@ -100,19 +101,6 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
100 if (bio_integrity(bio) != blk_integrity_rq(rq)) 101 if (bio_integrity(bio) != blk_integrity_rq(rq))
101 return 0; 102 return 0;
102 103
103 /*
104 * Don't merge if failfast settings don't match.
105 *
106 * FIXME: The negation in front of each condition is necessary
107 * because bio and request flags use different bit positions
108 * and the accessors return those bits directly. This
109 * ugliness will soon go away.
110 */
111 if (!bio_failfast_dev(bio) != !blk_failfast_dev(rq) ||
112 !bio_failfast_transport(bio) != !blk_failfast_transport(rq) ||
113 !bio_failfast_driver(bio) != !blk_failfast_driver(rq))
114 return 0;
115
116 if (!elv_iosched_allow_merge(rq, bio)) 104 if (!elv_iosched_allow_merge(rq, bio))
117 return 0; 105 return 0;
118 106
diff --git a/block/genhd.c b/block/genhd.c
index f4c64c2b303a..5b76bf55d05c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -869,6 +869,7 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
869static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); 869static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
870static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 870static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
871static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 871static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
872static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
872#ifdef CONFIG_FAIL_MAKE_REQUEST 873#ifdef CONFIG_FAIL_MAKE_REQUEST
873static struct device_attribute dev_attr_fail = 874static struct device_attribute dev_attr_fail =
874 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 875 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -888,6 +889,7 @@ static struct attribute *disk_attrs[] = {
888 &dev_attr_alignment_offset.attr, 889 &dev_attr_alignment_offset.attr,
889 &dev_attr_capability.attr, 890 &dev_attr_capability.attr,
890 &dev_attr_stat.attr, 891 &dev_attr_stat.attr,
892 &dev_attr_inflight.attr,
891#ifdef CONFIG_FAIL_MAKE_REQUEST 893#ifdef CONFIG_FAIL_MAKE_REQUEST
892 &dev_attr_fail.attr, 894 &dev_attr_fail.attr,
893#endif 895#endif
@@ -1053,7 +1055,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1053 part_stat_read(hd, merges[1]), 1055 part_stat_read(hd, merges[1]),
1054 (unsigned long long)part_stat_read(hd, sectors[1]), 1056 (unsigned long long)part_stat_read(hd, sectors[1]),
1055 jiffies_to_msecs(part_stat_read(hd, ticks[1])), 1057 jiffies_to_msecs(part_stat_read(hd, ticks[1])),
1056 hd->in_flight, 1058 part_in_flight(hd),
1057 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1059 jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1058 jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 1060 jiffies_to_msecs(part_stat_read(hd, time_in_queue))
1059 ); 1061 );
@@ -1215,6 +1217,16 @@ void put_disk(struct gendisk *disk)
1215 1217
1216EXPORT_SYMBOL(put_disk); 1218EXPORT_SYMBOL(put_disk);
1217 1219
1220static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1221{
1222 char event[] = "DISK_RO=1";
1223 char *envp[] = { event, NULL };
1224
1225 if (!ro)
1226 event[8] = '0';
1227 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1228}
1229
1218void set_device_ro(struct block_device *bdev, int flag) 1230void set_device_ro(struct block_device *bdev, int flag)
1219{ 1231{
1220 bdev->bd_part->policy = flag; 1232 bdev->bd_part->policy = flag;
@@ -1227,8 +1239,12 @@ void set_disk_ro(struct gendisk *disk, int flag)
1227 struct disk_part_iter piter; 1239 struct disk_part_iter piter;
1228 struct hd_struct *part; 1240 struct hd_struct *part;
1229 1241
1230 disk_part_iter_init(&piter, disk, 1242 if (disk->part0.policy != flag) {
1231 DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0); 1243 set_disk_ro_uevent(disk, flag);
1244 disk->part0.policy = flag;
1245 }
1246
1247 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
1232 while ((part = disk_part_iter_next(&piter))) 1248 while ((part = disk_part_iter_next(&piter)))
1233 part->policy = flag; 1249 part->policy = flag;
1234 disk_part_iter_exit(&piter); 1250 disk_part_iter_exit(&piter);
diff --git a/block/ioctl.c b/block/ioctl.c
index 500e4c73cc52..d3e6b5827a34 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -112,22 +112,9 @@ static int blkdev_reread_part(struct block_device *bdev)
112 return res; 112 return res;
113} 113}
114 114
115static void blk_ioc_discard_endio(struct bio *bio, int err)
116{
117 if (err) {
118 if (err == -EOPNOTSUPP)
119 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
120 clear_bit(BIO_UPTODATE, &bio->bi_flags);
121 }
122 complete(bio->bi_private);
123}
124
125static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, 115static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
126 uint64_t len) 116 uint64_t len)
127{ 117{
128 struct request_queue *q = bdev_get_queue(bdev);
129 int ret = 0;
130
131 if (start & 511) 118 if (start & 511)
132 return -EINVAL; 119 return -EINVAL;
133 if (len & 511) 120 if (len & 511)
@@ -137,40 +124,8 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
137 124
138 if (start + len > (bdev->bd_inode->i_size >> 9)) 125 if (start + len > (bdev->bd_inode->i_size >> 9))
139 return -EINVAL; 126 return -EINVAL;
140 127 return blkdev_issue_discard(bdev, start, len, GFP_KERNEL,
141 if (!q->prepare_discard_fn) 128 DISCARD_FL_WAIT);
142 return -EOPNOTSUPP;
143
144 while (len && !ret) {
145 DECLARE_COMPLETION_ONSTACK(wait);
146 struct bio *bio;
147
148 bio = bio_alloc(GFP_KERNEL, 0);
149
150 bio->bi_end_io = blk_ioc_discard_endio;
151 bio->bi_bdev = bdev;
152 bio->bi_private = &wait;
153 bio->bi_sector = start;
154
155 if (len > queue_max_hw_sectors(q)) {
156 bio->bi_size = queue_max_hw_sectors(q) << 9;
157 len -= queue_max_hw_sectors(q);
158 start += queue_max_hw_sectors(q);
159 } else {
160 bio->bi_size = len << 9;
161 len = 0;
162 }
163 submit_bio(DISCARD_NOBARRIER, bio);
164
165 wait_for_completion(&wait);
166
167 if (bio_flagged(bio, BIO_EOPNOTSUPP))
168 ret = -EOPNOTSUPP;
169 else if (!bio_flagged(bio, BIO_UPTODATE))
170 ret = -EIO;
171 bio_put(bio);
172 }
173 return ret;
174} 129}
175 130
176static int put_ushort(unsigned long arg, unsigned short val) 131static int put_ushort(unsigned long arg, unsigned short val)