aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:55:15 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2009-09-14 20:55:15 -0400
commit355bbd8cb82e60a592f6cd86ce6dbe5677615cf4 (patch)
tree23678e50ad4687f1656edc972388ee8014e7b89d
parent39695224bd84dc4be29abad93a0ec232a16fc519 (diff)
parent746cd1e7e4a555ddaee53b19a46e05c9c61eaf09 (diff)
Merge branch 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.32' of git://git.kernel.dk/linux-2.6-block: (29 commits) block: use blkdev_issue_discard in blk_ioctl_discard Make DISCARD_BARRIER and DISCARD_NOBARRIER writes instead of reads block: don't assume device has a request list backing in nr_requests store block: Optimal I/O limit wrapper cfq: choose a new next_req when a request is dispatched Seperate read and write statistics of in_flight requests aoe: end barrier bios with EOPNOTSUPP block: trace bio queueing trial only when it occurs block: enable rq CPU completion affinity by default cfq: fix the log message after dispatched a request block: use printk_once cciss: memory leak in cciss_init_one() splice: update mtime and atime on files block: make blk_iopoll_prep_sched() follow normal 0/1 return convention cfq-iosched: get rid of must_alloc flag block: use interrupts disabled version of raise_softirq_irqoff() block: fix comment in blk-iopoll.c block: adjust default budget for blk-iopoll block: fix long lines in block/blk-iopoll.c block: add blk-iopoll, a NAPI like approach for block devices ...
-rw-r--r--block/Makefile2
-rw-r--r--block/blk-barrier.c31
-rw-r--r--block/blk-core.c166
-rw-r--r--block/blk-iopoll.c227
-rw-r--r--block/blk-merge.c51
-rw-r--r--block/blk-settings.c21
-rw-r--r--block/blk-sysfs.c7
-rw-r--r--block/blk.h1
-rw-r--r--block/cfq-iosched.c72
-rw-r--r--block/elevator.c16
-rw-r--r--block/genhd.c22
-rw-r--r--block/ioctl.c49
-rw-r--r--drivers/block/aoe/aoeblk.c3
-rw-r--r--drivers/block/cciss.c4
-rw-r--r--drivers/block/loop.c2
-rw-r--r--drivers/block/paride/pcd.c12
-rw-r--r--drivers/block/sx8.c4
-rw-r--r--drivers/block/viodasd.c12
-rw-r--r--drivers/md/dm-raid1.c2
-rw-r--r--drivers/md/dm-stripe.c4
-rw-r--r--drivers/md/dm.c28
-rw-r--r--drivers/md/linear.c2
-rw-r--r--drivers/md/multipath.c4
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1.c14
-rw-r--r--drivers/md/raid10.c6
-rw-r--r--drivers/md/raid5.c2
-rw-r--r--drivers/scsi/scsi_lib.c6
-rw-r--r--drivers/staging/dst/dcore.c5
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/gfs2/rgrp.c6
-rw-r--r--fs/partitions/check.c12
-rw-r--r--fs/splice.c8
-rw-r--r--include/linux/bio.h69
-rw-r--r--include/linux/blk-iopoll.h48
-rw-r--r--include/linux/blkdev.h44
-rw-r--r--include/linux/fs.h4
-rw-r--r--include/linux/genhd.h21
-rw-r--r--include/linux/interrupt.h1
-rw-r--r--kernel/sysctl.c10
-rw-r--r--mm/swapfile.c6
42 files changed, 729 insertions, 284 deletions
diff --git a/block/Makefile b/block/Makefile
index 6c54ed0ff75..ba74ca6bfa1 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -5,7 +5,7 @@
5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ 5obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ 6 blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ 7 blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
8 ioctl.o genhd.o scsi_ioctl.o 8 blk-iopoll.o ioctl.o genhd.o scsi_ioctl.o
9 9
10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o 10obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o 11obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index 30022b4e2f6..6593ab39cfe 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -348,6 +348,9 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
348 clear_bit(BIO_UPTODATE, &bio->bi_flags); 348 clear_bit(BIO_UPTODATE, &bio->bi_flags);
349 } 349 }
350 350
351 if (bio->bi_private)
352 complete(bio->bi_private);
353
351 bio_put(bio); 354 bio_put(bio);
352} 355}
353 356
@@ -357,21 +360,20 @@ static void blkdev_discard_end_io(struct bio *bio, int err)
357 * @sector: start sector 360 * @sector: start sector
358 * @nr_sects: number of sectors to discard 361 * @nr_sects: number of sectors to discard
359 * @gfp_mask: memory allocation flags (for bio_alloc) 362 * @gfp_mask: memory allocation flags (for bio_alloc)
363 * @flags: DISCARD_FL_* flags to control behaviour
360 * 364 *
361 * Description: 365 * Description:
362 * Issue a discard request for the sectors in question. Does not wait. 366 * Issue a discard request for the sectors in question.
363 */ 367 */
364int blkdev_issue_discard(struct block_device *bdev, 368int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
365 sector_t sector, sector_t nr_sects, gfp_t gfp_mask) 369 sector_t nr_sects, gfp_t gfp_mask, int flags)
366{ 370{
367 struct request_queue *q; 371 DECLARE_COMPLETION_ONSTACK(wait);
368 struct bio *bio; 372 struct request_queue *q = bdev_get_queue(bdev);
373 int type = flags & DISCARD_FL_BARRIER ?
374 DISCARD_BARRIER : DISCARD_NOBARRIER;
369 int ret = 0; 375 int ret = 0;
370 376
371 if (bdev->bd_disk == NULL)
372 return -ENXIO;
373
374 q = bdev_get_queue(bdev);
375 if (!q) 377 if (!q)
376 return -ENXIO; 378 return -ENXIO;
377 379
@@ -379,12 +381,14 @@ int blkdev_issue_discard(struct block_device *bdev,
379 return -EOPNOTSUPP; 381 return -EOPNOTSUPP;
380 382
381 while (nr_sects && !ret) { 383 while (nr_sects && !ret) {
382 bio = bio_alloc(gfp_mask, 0); 384 struct bio *bio = bio_alloc(gfp_mask, 0);
383 if (!bio) 385 if (!bio)
384 return -ENOMEM; 386 return -ENOMEM;
385 387
386 bio->bi_end_io = blkdev_discard_end_io; 388 bio->bi_end_io = blkdev_discard_end_io;
387 bio->bi_bdev = bdev; 389 bio->bi_bdev = bdev;
390 if (flags & DISCARD_FL_WAIT)
391 bio->bi_private = &wait;
388 392
389 bio->bi_sector = sector; 393 bio->bi_sector = sector;
390 394
@@ -396,10 +400,13 @@ int blkdev_issue_discard(struct block_device *bdev,
396 bio->bi_size = nr_sects << 9; 400 bio->bi_size = nr_sects << 9;
397 nr_sects = 0; 401 nr_sects = 0;
398 } 402 }
403
399 bio_get(bio); 404 bio_get(bio);
400 submit_bio(DISCARD_BARRIER, bio); 405 submit_bio(type, bio);
406
407 if (flags & DISCARD_FL_WAIT)
408 wait_for_completion(&wait);
401 409
402 /* Check if it failed immediately */
403 if (bio_flagged(bio, BIO_EOPNOTSUPP)) 410 if (bio_flagged(bio, BIO_EOPNOTSUPP))
404 ret = -EOPNOTSUPP; 411 ret = -EOPNOTSUPP;
405 else if (!bio_flagged(bio, BIO_UPTODATE)) 412 else if (!bio_flagged(bio, BIO_UPTODATE))
diff --git a/block/blk-core.c b/block/blk-core.c
index e695634882a..8135228e4b2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -69,7 +69,7 @@ static void drive_stat_acct(struct request *rq, int new_io)
69 part_stat_inc(cpu, part, merges[rw]); 69 part_stat_inc(cpu, part, merges[rw]);
70 else { 70 else {
71 part_round_stats(cpu, part); 71 part_round_stats(cpu, part);
72 part_inc_in_flight(part); 72 part_inc_in_flight(part, rw);
73 } 73 }
74 74
75 part_stat_unlock(); 75 part_stat_unlock();
@@ -1031,7 +1031,7 @@ static void part_round_stats_single(int cpu, struct hd_struct *part,
1031 1031
1032 if (part->in_flight) { 1032 if (part->in_flight) {
1033 __part_stat_add(cpu, part, time_in_queue, 1033 __part_stat_add(cpu, part, time_in_queue,
1034 part->in_flight * (now - part->stamp)); 1034 part_in_flight(part) * (now - part->stamp));
1035 __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); 1035 __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
1036 } 1036 }
1037 part->stamp = now; 1037 part->stamp = now;
@@ -1112,31 +1112,27 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1112 req->cmd_type = REQ_TYPE_FS; 1112 req->cmd_type = REQ_TYPE_FS;
1113 1113
1114 /* 1114 /*
1115 * inherit FAILFAST from bio (for read-ahead, and explicit FAILFAST) 1115 * Inherit FAILFAST from bio (for read-ahead, and explicit
1116 * FAILFAST). FAILFAST flags are identical for req and bio.
1116 */ 1117 */
1117 if (bio_rw_ahead(bio)) 1118 if (bio_rw_flagged(bio, BIO_RW_AHEAD))
1118 req->cmd_flags |= (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 1119 req->cmd_flags |= REQ_FAILFAST_MASK;
1119 REQ_FAILFAST_DRIVER); 1120 else
1120 if (bio_failfast_dev(bio)) 1121 req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK;
1121 req->cmd_flags |= REQ_FAILFAST_DEV; 1122
1122 if (bio_failfast_transport(bio)) 1123 if (unlikely(bio_rw_flagged(bio, BIO_RW_DISCARD))) {
1123 req->cmd_flags |= REQ_FAILFAST_TRANSPORT;
1124 if (bio_failfast_driver(bio))
1125 req->cmd_flags |= REQ_FAILFAST_DRIVER;
1126
1127 if (unlikely(bio_discard(bio))) {
1128 req->cmd_flags |= REQ_DISCARD; 1124 req->cmd_flags |= REQ_DISCARD;
1129 if (bio_barrier(bio)) 1125 if (bio_rw_flagged(bio, BIO_RW_BARRIER))
1130 req->cmd_flags |= REQ_SOFTBARRIER; 1126 req->cmd_flags |= REQ_SOFTBARRIER;
1131 req->q->prepare_discard_fn(req->q, req); 1127 req->q->prepare_discard_fn(req->q, req);
1132 } else if (unlikely(bio_barrier(bio))) 1128 } else if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)))
1133 req->cmd_flags |= REQ_HARDBARRIER; 1129 req->cmd_flags |= REQ_HARDBARRIER;
1134 1130
1135 if (bio_sync(bio)) 1131 if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
1136 req->cmd_flags |= REQ_RW_SYNC; 1132 req->cmd_flags |= REQ_RW_SYNC;
1137 if (bio_rw_meta(bio)) 1133 if (bio_rw_flagged(bio, BIO_RW_META))
1138 req->cmd_flags |= REQ_RW_META; 1134 req->cmd_flags |= REQ_RW_META;
1139 if (bio_noidle(bio)) 1135 if (bio_rw_flagged(bio, BIO_RW_NOIDLE))
1140 req->cmd_flags |= REQ_NOIDLE; 1136 req->cmd_flags |= REQ_NOIDLE;
1141 1137
1142 req->errors = 0; 1138 req->errors = 0;
@@ -1151,7 +1147,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
1151 */ 1147 */
1152static inline bool queue_should_plug(struct request_queue *q) 1148static inline bool queue_should_plug(struct request_queue *q)
1153{ 1149{
1154 return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); 1150 return !(blk_queue_nonrot(q) && blk_queue_queuing(q));
1155} 1151}
1156 1152
1157static int __make_request(struct request_queue *q, struct bio *bio) 1153static int __make_request(struct request_queue *q, struct bio *bio)
@@ -1160,11 +1156,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1160 int el_ret; 1156 int el_ret;
1161 unsigned int bytes = bio->bi_size; 1157 unsigned int bytes = bio->bi_size;
1162 const unsigned short prio = bio_prio(bio); 1158 const unsigned short prio = bio_prio(bio);
1163 const int sync = bio_sync(bio); 1159 const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
1164 const int unplug = bio_unplug(bio); 1160 const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG);
1161 const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK;
1165 int rw_flags; 1162 int rw_flags;
1166 1163
1167 if (bio_barrier(bio) && bio_has_data(bio) && 1164 if (bio_rw_flagged(bio, BIO_RW_BARRIER) && bio_has_data(bio) &&
1168 (q->next_ordered == QUEUE_ORDERED_NONE)) { 1165 (q->next_ordered == QUEUE_ORDERED_NONE)) {
1169 bio_endio(bio, -EOPNOTSUPP); 1166 bio_endio(bio, -EOPNOTSUPP);
1170 return 0; 1167 return 0;
@@ -1178,7 +1175,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1178 1175
1179 spin_lock_irq(q->queue_lock); 1176 spin_lock_irq(q->queue_lock);
1180 1177
1181 if (unlikely(bio_barrier(bio)) || elv_queue_empty(q)) 1178 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q))
1182 goto get_rq; 1179 goto get_rq;
1183 1180
1184 el_ret = elv_merge(q, &req, bio); 1181 el_ret = elv_merge(q, &req, bio);
@@ -1191,6 +1188,9 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1191 1188
1192 trace_block_bio_backmerge(q, bio); 1189 trace_block_bio_backmerge(q, bio);
1193 1190
1191 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1192 blk_rq_set_mixed_merge(req);
1193
1194 req->biotail->bi_next = bio; 1194 req->biotail->bi_next = bio;
1195 req->biotail = bio; 1195 req->biotail = bio;
1196 req->__data_len += bytes; 1196 req->__data_len += bytes;
@@ -1210,6 +1210,12 @@ static int __make_request(struct request_queue *q, struct bio *bio)
1210 1210
1211 trace_block_bio_frontmerge(q, bio); 1211 trace_block_bio_frontmerge(q, bio);
1212 1212
1213 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) {
1214 blk_rq_set_mixed_merge(req);
1215 req->cmd_flags &= ~REQ_FAILFAST_MASK;
1216 req->cmd_flags |= ff;
1217 }
1218
1213 bio->bi_next = req->bio; 1219 bio->bi_next = req->bio;
1214 req->bio = bio; 1220 req->bio = bio;
1215 1221
@@ -1457,19 +1463,20 @@ static inline void __generic_make_request(struct bio *bio)
1457 if (old_sector != -1) 1463 if (old_sector != -1)
1458 trace_block_remap(q, bio, old_dev, old_sector); 1464 trace_block_remap(q, bio, old_dev, old_sector);
1459 1465
1460 trace_block_bio_queue(q, bio);
1461
1462 old_sector = bio->bi_sector; 1466 old_sector = bio->bi_sector;
1463 old_dev = bio->bi_bdev->bd_dev; 1467 old_dev = bio->bi_bdev->bd_dev;
1464 1468
1465 if (bio_check_eod(bio, nr_sectors)) 1469 if (bio_check_eod(bio, nr_sectors))
1466 goto end_io; 1470 goto end_io;
1467 1471
1468 if (bio_discard(bio) && !q->prepare_discard_fn) { 1472 if (bio_rw_flagged(bio, BIO_RW_DISCARD) &&
1473 !q->prepare_discard_fn) {
1469 err = -EOPNOTSUPP; 1474 err = -EOPNOTSUPP;
1470 goto end_io; 1475 goto end_io;
1471 } 1476 }
1472 1477
1478 trace_block_bio_queue(q, bio);
1479
1473 ret = q->make_request_fn(q, bio); 1480 ret = q->make_request_fn(q, bio);
1474 } while (ret); 1481 } while (ret);
1475 1482
@@ -1654,6 +1661,50 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
1654} 1661}
1655EXPORT_SYMBOL_GPL(blk_insert_cloned_request); 1662EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
1656 1663
1664/**
1665 * blk_rq_err_bytes - determine number of bytes till the next failure boundary
1666 * @rq: request to examine
1667 *
1668 * Description:
1669 * A request could be merge of IOs which require different failure
1670 * handling. This function determines the number of bytes which
1671 * can be failed from the beginning of the request without
1672 * crossing into area which need to be retried further.
1673 *
1674 * Return:
1675 * The number of bytes to fail.
1676 *
1677 * Context:
1678 * queue_lock must be held.
1679 */
1680unsigned int blk_rq_err_bytes(const struct request *rq)
1681{
1682 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
1683 unsigned int bytes = 0;
1684 struct bio *bio;
1685
1686 if (!(rq->cmd_flags & REQ_MIXED_MERGE))
1687 return blk_rq_bytes(rq);
1688
1689 /*
1690 * Currently the only 'mixing' which can happen is between
1691 * different fastfail types. We can safely fail portions
1692 * which have all the failfast bits that the first one has -
1693 * the ones which are at least as eager to fail as the first
1694 * one.
1695 */
1696 for (bio = rq->bio; bio; bio = bio->bi_next) {
1697 if ((bio->bi_rw & ff) != ff)
1698 break;
1699 bytes += bio->bi_size;
1700 }
1701
1702 /* this could lead to infinite loop */
1703 BUG_ON(blk_rq_bytes(rq) && !bytes);
1704 return bytes;
1705}
1706EXPORT_SYMBOL_GPL(blk_rq_err_bytes);
1707
1657static void blk_account_io_completion(struct request *req, unsigned int bytes) 1708static void blk_account_io_completion(struct request *req, unsigned int bytes)
1658{ 1709{
1659 if (blk_do_io_stat(req)) { 1710 if (blk_do_io_stat(req)) {
@@ -1687,7 +1738,7 @@ static void blk_account_io_done(struct request *req)
1687 part_stat_inc(cpu, part, ios[rw]); 1738 part_stat_inc(cpu, part, ios[rw]);
1688 part_stat_add(cpu, part, ticks[rw], duration); 1739 part_stat_add(cpu, part, ticks[rw], duration);
1689 part_round_stats(cpu, part); 1740 part_round_stats(cpu, part);
1690 part_dec_in_flight(part); 1741 part_dec_in_flight(part, rw);
1691 1742
1692 part_stat_unlock(); 1743 part_stat_unlock();
1693 } 1744 }
@@ -1807,8 +1858,15 @@ void blk_dequeue_request(struct request *rq)
1807 * and to it is freed is accounted as io that is in progress at 1858 * and to it is freed is accounted as io that is in progress at
1808 * the driver side. 1859 * the driver side.
1809 */ 1860 */
1810 if (blk_account_rq(rq)) 1861 if (blk_account_rq(rq)) {
1811 q->in_flight[rq_is_sync(rq)]++; 1862 q->in_flight[rq_is_sync(rq)]++;
1863 /*
1864 * Mark this device as supporting hardware queuing, if
1865 * we have more IOs in flight than 4.
1866 */
1867 if (!blk_queue_queuing(q) && queue_in_flight(q) > 4)
1868 set_bit(QUEUE_FLAG_CQ, &q->queue_flags);
1869 }
1812} 1870}
1813 1871
1814/** 1872/**
@@ -2000,6 +2058,12 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2000 if (blk_fs_request(req) || blk_discard_rq(req)) 2058 if (blk_fs_request(req) || blk_discard_rq(req))
2001 req->__sector += total_bytes >> 9; 2059 req->__sector += total_bytes >> 9;
2002 2060
2061 /* mixed attributes always follow the first bio */
2062 if (req->cmd_flags & REQ_MIXED_MERGE) {
2063 req->cmd_flags &= ~REQ_FAILFAST_MASK;
2064 req->cmd_flags |= req->bio->bi_rw & REQ_FAILFAST_MASK;
2065 }
2066
2003 /* 2067 /*
2004 * If total number of sectors is less than the first segment 2068 * If total number of sectors is less than the first segment
2005 * size, something has gone terribly wrong. 2069 * size, something has gone terribly wrong.
@@ -2179,6 +2243,25 @@ bool blk_end_request_cur(struct request *rq, int error)
2179EXPORT_SYMBOL(blk_end_request_cur); 2243EXPORT_SYMBOL(blk_end_request_cur);
2180 2244
2181/** 2245/**
2246 * blk_end_request_err - Finish a request till the next failure boundary.
2247 * @rq: the request to finish till the next failure boundary for
2248 * @error: must be negative errno
2249 *
2250 * Description:
2251 * Complete @rq till the next failure boundary.
2252 *
2253 * Return:
2254 * %false - we are done with this request
2255 * %true - still buffers pending for this request
2256 */
2257bool blk_end_request_err(struct request *rq, int error)
2258{
2259 WARN_ON(error >= 0);
2260 return blk_end_request(rq, error, blk_rq_err_bytes(rq));
2261}
2262EXPORT_SYMBOL_GPL(blk_end_request_err);
2263
2264/**
2182 * __blk_end_request - Helper function for drivers to complete the request. 2265 * __blk_end_request - Helper function for drivers to complete the request.
2183 * @rq: the request being processed 2266 * @rq: the request being processed
2184 * @error: %0 for success, < %0 for error 2267 * @error: %0 for success, < %0 for error
@@ -2237,12 +2320,31 @@ bool __blk_end_request_cur(struct request *rq, int error)
2237} 2320}
2238EXPORT_SYMBOL(__blk_end_request_cur); 2321EXPORT_SYMBOL(__blk_end_request_cur);
2239 2322
2323/**
2324 * __blk_end_request_err - Finish a request till the next failure boundary.
2325 * @rq: the request to finish till the next failure boundary for
2326 * @error: must be negative errno
2327 *
2328 * Description:
2329 * Complete @rq till the next failure boundary. Must be called
2330 * with queue lock held.
2331 *
2332 * Return:
2333 * %false - we are done with this request
2334 * %true - still buffers pending for this request
2335 */
2336bool __blk_end_request_err(struct request *rq, int error)
2337{
2338 WARN_ON(error >= 0);
2339 return __blk_end_request(rq, error, blk_rq_err_bytes(rq));
2340}
2341EXPORT_SYMBOL_GPL(__blk_end_request_err);
2342
2240void blk_rq_bio_prep(struct request_queue *q, struct request *rq, 2343void blk_rq_bio_prep(struct request_queue *q, struct request *rq,
2241 struct bio *bio) 2344 struct bio *bio)
2242{ 2345{
2243 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw, and 2346 /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */
2244 we want BIO_RW_AHEAD (bit 1) to imply REQ_FAILFAST (bit 1). */ 2347 rq->cmd_flags |= bio->bi_rw & REQ_RW;
2245 rq->cmd_flags |= (bio->bi_rw & 3);
2246 2348
2247 if (bio_has_data(bio)) { 2349 if (bio_has_data(bio)) {
2248 rq->nr_phys_segments = bio_phys_segments(q, bio); 2350 rq->nr_phys_segments = bio_phys_segments(q, bio);
diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c
new file mode 100644
index 00000000000..ca564202ed7
--- /dev/null
+++ b/block/blk-iopoll.c
@@ -0,0 +1,227 @@
1/*
2 * Functions related to interrupt-poll handling in the block layer. This
3 * is similar to NAPI for network devices.
4 */
5#include <linux/kernel.h>
6#include <linux/module.h>
7#include <linux/init.h>
8#include <linux/bio.h>
9#include <linux/blkdev.h>
10#include <linux/interrupt.h>
11#include <linux/cpu.h>
12#include <linux/blk-iopoll.h>
13#include <linux/delay.h>
14
15#include "blk.h"
16
17int blk_iopoll_enabled = 1;
18EXPORT_SYMBOL(blk_iopoll_enabled);
19
20static unsigned int blk_iopoll_budget __read_mostly = 256;
21
22static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll);
23
24/**
25 * blk_iopoll_sched - Schedule a run of the iopoll handler
26 * @iop: The parent iopoll structure
27 *
28 * Description:
29 * Add this blk_iopoll structure to the pending poll list and trigger the
30 * raise of the blk iopoll softirq. The driver must already have gotten a
31 * succesful return from blk_iopoll_sched_prep() before calling this.
32 **/
33void blk_iopoll_sched(struct blk_iopoll *iop)
34{
35 unsigned long flags;
36
37 local_irq_save(flags);
38 list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll));
39 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
40 local_irq_restore(flags);
41}
42EXPORT_SYMBOL(blk_iopoll_sched);
43
44/**
45 * __blk_iopoll_complete - Mark this @iop as un-polled again
46 * @iop: The parent iopoll structure
47 *
48 * Description:
49 * See blk_iopoll_complete(). This function must be called with interrupts
50 * disabled.
51 **/
52void __blk_iopoll_complete(struct blk_iopoll *iop)
53{
54 list_del(&iop->list);
55 smp_mb__before_clear_bit();
56 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
57}
58EXPORT_SYMBOL(__blk_iopoll_complete);
59
60/**
61 * blk_iopoll_complete - Mark this @iop as un-polled again
62 * @iop: The parent iopoll structure
63 *
64 * Description:
65 * If a driver consumes less than the assigned budget in its run of the
66 * iopoll handler, it'll end the polled mode by calling this function. The
67 * iopoll handler will not be invoked again before blk_iopoll_sched_prep()
68 * is called.
69 **/
70void blk_iopoll_complete(struct blk_iopoll *iopoll)
71{
72 unsigned long flags;
73
74 local_irq_save(flags);
75 __blk_iopoll_complete(iopoll);
76 local_irq_restore(flags);
77}
78EXPORT_SYMBOL(blk_iopoll_complete);
79
80static void blk_iopoll_softirq(struct softirq_action *h)
81{
82 struct list_head *list = &__get_cpu_var(blk_cpu_iopoll);
83 int rearm = 0, budget = blk_iopoll_budget;
84 unsigned long start_time = jiffies;
85
86 local_irq_disable();
87
88 while (!list_empty(list)) {
89 struct blk_iopoll *iop;
90 int work, weight;
91
92 /*
93 * If softirq window is exhausted then punt.
94 */
95 if (budget <= 0 || time_after(jiffies, start_time)) {
96 rearm = 1;
97 break;
98 }
99
100 local_irq_enable();
101
102 /* Even though interrupts have been re-enabled, this
103 * access is safe because interrupts can only add new
104 * entries to the tail of this list, and only ->poll()
105 * calls can remove this head entry from the list.
106 */
107 iop = list_entry(list->next, struct blk_iopoll, list);
108
109 weight = iop->weight;
110 work = 0;
111 if (test_bit(IOPOLL_F_SCHED, &iop->state))
112 work = iop->poll(iop, weight);
113
114 budget -= work;
115
116 local_irq_disable();
117
118 /*
119 * Drivers must not modify the iopoll state, if they
120 * consume their assigned weight (or more, some drivers can't
121 * easily just stop processing, they have to complete an
122 * entire mask of commands).In such cases this code
123 * still "owns" the iopoll instance and therefore can
124 * move the instance around on the list at-will.
125 */
126 if (work >= weight) {
127 if (blk_iopoll_disable_pending(iop))
128 __blk_iopoll_complete(iop);
129 else
130 list_move_tail(&iop->list, list);
131 }
132 }
133
134 if (rearm)
135 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
136
137 local_irq_enable();
138}
139
140/**
141 * blk_iopoll_disable - Disable iopoll on this @iop
142 * @iop: The parent iopoll structure
143 *
144 * Description:
145 * Disable io polling and wait for any pending callbacks to have completed.
146 **/
147void blk_iopoll_disable(struct blk_iopoll *iop)
148{
149 set_bit(IOPOLL_F_DISABLE, &iop->state);
150 while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state))
151 msleep(1);
152 clear_bit(IOPOLL_F_DISABLE, &iop->state);
153}
154EXPORT_SYMBOL(blk_iopoll_disable);
155
156/**
157 * blk_iopoll_enable - Enable iopoll on this @iop
158 * @iop: The parent iopoll structure
159 *
160 * Description:
161 * Enable iopoll on this @iop. Note that the handler run will not be
162 * scheduled, it will only mark it as active.
163 **/
164void blk_iopoll_enable(struct blk_iopoll *iop)
165{
166 BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state));
167 smp_mb__before_clear_bit();
168 clear_bit_unlock(IOPOLL_F_SCHED, &iop->state);
169}
170EXPORT_SYMBOL(blk_iopoll_enable);
171
172/**
173 * blk_iopoll_init - Initialize this @iop
174 * @iop: The parent iopoll structure
175 * @weight: The default weight (or command completion budget)
176 * @poll_fn: The handler to invoke
177 *
178 * Description:
179 * Initialize this blk_iopoll structure. Before being actively used, the
180 * driver must call blk_iopoll_enable().
181 **/
182void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn)
183{
184 memset(iop, 0, sizeof(*iop));
185 INIT_LIST_HEAD(&iop->list);
186 iop->weight = weight;
187 iop->poll = poll_fn;
188 set_bit(IOPOLL_F_SCHED, &iop->state);
189}
190EXPORT_SYMBOL(blk_iopoll_init);
191
192static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self,
193 unsigned long action, void *hcpu)
194{
195 /*
196 * If a CPU goes away, splice its entries to the current CPU
197 * and trigger a run of the softirq
198 */
199 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
200 int cpu = (unsigned long) hcpu;
201
202 local_irq_disable();
203 list_splice_init(&per_cpu(blk_cpu_iopoll, cpu),
204 &__get_cpu_var(blk_cpu_iopoll));
205 __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ);
206 local_irq_enable();
207 }
208
209 return NOTIFY_OK;
210}
211
212static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = {
213 .notifier_call = blk_iopoll_cpu_notify,
214};
215
216static __init int blk_iopoll_setup(void)
217{
218 int i;
219
220 for_each_possible_cpu(i)
221 INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i));
222
223 open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq);
224 register_hotcpu_notifier(&blk_iopoll_cpu_notifier);
225 return 0;
226}
227subsys_initcall(blk_iopoll_setup);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e1999679a4d..99cb5cf1f44 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -311,6 +311,36 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
311 return 1; 311 return 1;
312} 312}
313 313
314/**
315 * blk_rq_set_mixed_merge - mark a request as mixed merge
316 * @rq: request to mark as mixed merge
317 *
318 * Description:
319 * @rq is about to be mixed merged. Make sure the attributes
320 * which can be mixed are set in each bio and mark @rq as mixed
321 * merged.
322 */
323void blk_rq_set_mixed_merge(struct request *rq)
324{
325 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK;
326 struct bio *bio;
327
328 if (rq->cmd_flags & REQ_MIXED_MERGE)
329 return;
330
331 /*
332 * @rq will no longer represent mixable attributes for all the
333 * contained bios. It will just track those of the first one.
334 * Distributes the attributs to each bio.
335 */
336 for (bio = rq->bio; bio; bio = bio->bi_next) {
337 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) &&
338 (bio->bi_rw & REQ_FAILFAST_MASK) != ff);
339 bio->bi_rw |= ff;
340 }
341 rq->cmd_flags |= REQ_MIXED_MERGE;
342}
343
314static void blk_account_io_merge(struct request *req) 344static void blk_account_io_merge(struct request *req)
315{ 345{
316 if (blk_do_io_stat(req)) { 346 if (blk_do_io_stat(req)) {
@@ -321,7 +351,7 @@ static void blk_account_io_merge(struct request *req)
321 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); 351 part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req));
322 352
323 part_round_stats(cpu, part); 353 part_round_stats(cpu, part);
324 part_dec_in_flight(part); 354 part_dec_in_flight(part, rq_data_dir(req));
325 355
326 part_stat_unlock(); 356 part_stat_unlock();
327 } 357 }
@@ -350,12 +380,6 @@ static int attempt_merge(struct request_queue *q, struct request *req,
350 if (blk_integrity_rq(req) != blk_integrity_rq(next)) 380 if (blk_integrity_rq(req) != blk_integrity_rq(next))
351 return 0; 381 return 0;
352 382
353 /* don't merge requests of different failfast settings */
354 if (blk_failfast_dev(req) != blk_failfast_dev(next) ||
355 blk_failfast_transport(req) != blk_failfast_transport(next) ||
356 blk_failfast_driver(req) != blk_failfast_driver(next))
357 return 0;
358
359 /* 383 /*
360 * If we are allowed to merge, then append bio list 384 * If we are allowed to merge, then append bio list
361 * from next to rq and release next. merge_requests_fn 385 * from next to rq and release next. merge_requests_fn
@@ -366,6 +390,19 @@ static int attempt_merge(struct request_queue *q, struct request *req,
366 return 0; 390 return 0;
367 391
368 /* 392 /*
393 * If failfast settings disagree or any of the two is already
394 * a mixed merge, mark both as mixed before proceeding. This
395 * makes sure that all involved bios have mixable attributes
396 * set properly.
397 */
398 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE ||
399 (req->cmd_flags & REQ_FAILFAST_MASK) !=
400 (next->cmd_flags & REQ_FAILFAST_MASK)) {
401 blk_rq_set_mixed_merge(req);
402 blk_rq_set_mixed_merge(next);
403 }
404
405 /*
369 * At this point we have either done a back merge 406 * At this point we have either done a back merge
370 * or front merge. We need the smaller start_time of 407 * or front merge. We need the smaller start_time of
371 * the merged requests to be the current request 408 * the merged requests to be the current request
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 476d8706507..83413ff8373 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -428,6 +428,25 @@ void blk_queue_io_min(struct request_queue *q, unsigned int min)
428EXPORT_SYMBOL(blk_queue_io_min); 428EXPORT_SYMBOL(blk_queue_io_min);
429 429
430/** 430/**
431 * blk_limits_io_opt - set optimal request size for a device
432 * @limits: the queue limits
433 * @opt: smallest I/O size in bytes
434 *
435 * Description:
436 * Storage devices may report an optimal I/O size, which is the
437 * device's preferred unit for sustained I/O. This is rarely reported
438 * for disk drives. For RAID arrays it is usually the stripe width or
439 * the internal track size. A properly aligned multiple of
440 * optimal_io_size is the preferred request size for workloads where
441 * sustained throughput is desired.
442 */
443void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt)
444{
445 limits->io_opt = opt;
446}
447EXPORT_SYMBOL(blk_limits_io_opt);
448
449/**
431 * blk_queue_io_opt - set optimal request size for the queue 450 * blk_queue_io_opt - set optimal request size for the queue
432 * @q: the request queue for the device 451 * @q: the request queue for the device
433 * @opt: optimal request size in bytes 452 * @opt: optimal request size in bytes
@@ -442,7 +461,7 @@ EXPORT_SYMBOL(blk_queue_io_min);
442 */ 461 */
443void blk_queue_io_opt(struct request_queue *q, unsigned int opt) 462void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
444{ 463{
445 q->limits.io_opt = opt; 464 blk_limits_io_opt(&q->limits, opt);
446} 465}
447EXPORT_SYMBOL(blk_queue_io_opt); 466EXPORT_SYMBOL(blk_queue_io_opt);
448 467
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d3aa2aadb3e..b78c9c3e267 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -40,7 +40,12 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
40{ 40{
41 struct request_list *rl = &q->rq; 41 struct request_list *rl = &q->rq;
42 unsigned long nr; 42 unsigned long nr;
43 int ret = queue_var_store(&nr, page, count); 43 int ret;
44
45 if (!q->request_fn)
46 return -EINVAL;
47
48 ret = queue_var_store(&nr, page, count);
44 if (nr < BLKDEV_MIN_RQ) 49 if (nr < BLKDEV_MIN_RQ)
45 nr = BLKDEV_MIN_RQ; 50 nr = BLKDEV_MIN_RQ;
46 51
diff --git a/block/blk.h b/block/blk.h
index 3fae6add543..5ee3d7e72fe 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -104,6 +104,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
104int attempt_back_merge(struct request_queue *q, struct request *rq); 104int attempt_back_merge(struct request_queue *q, struct request *rq);
105int attempt_front_merge(struct request_queue *q, struct request *rq); 105int attempt_front_merge(struct request_queue *q, struct request *rq);
106void blk_recalc_rq_segments(struct request *rq); 106void blk_recalc_rq_segments(struct request *rq);
107void blk_rq_set_mixed_merge(struct request *rq);
107 108
108void blk_queue_congestion_threshold(struct request_queue *q); 109void blk_queue_congestion_threshold(struct request_queue *q);
109 110
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index fd7080ed793..0e3814b662a 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -134,13 +134,8 @@ struct cfq_data {
134 struct rb_root prio_trees[CFQ_PRIO_LISTS]; 134 struct rb_root prio_trees[CFQ_PRIO_LISTS];
135 135
136 unsigned int busy_queues; 136 unsigned int busy_queues;
137 /*
138 * Used to track any pending rt requests so we can pre-empt current
139 * non-RT cfqq in service when this value is non-zero.
140 */
141 unsigned int busy_rt_queues;
142 137
143 int rq_in_driver; 138 int rq_in_driver[2];
144 int sync_flight; 139 int sync_flight;
145 140
146 /* 141 /*
@@ -191,7 +186,6 @@ enum cfqq_state_flags {
191 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ 186 CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */
192 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ 187 CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */
193 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */ 188 CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */
194 CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */
195 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ 189 CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */
196 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ 190 CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */
197 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ 191 CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */
@@ -218,7 +212,6 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \
218CFQ_CFQQ_FNS(on_rr); 212CFQ_CFQQ_FNS(on_rr);
219CFQ_CFQQ_FNS(wait_request); 213CFQ_CFQQ_FNS(wait_request);
220CFQ_CFQQ_FNS(must_dispatch); 214CFQ_CFQQ_FNS(must_dispatch);
221CFQ_CFQQ_FNS(must_alloc);
222CFQ_CFQQ_FNS(must_alloc_slice); 215CFQ_CFQQ_FNS(must_alloc_slice);
223CFQ_CFQQ_FNS(fifo_expire); 216CFQ_CFQQ_FNS(fifo_expire);
224CFQ_CFQQ_FNS(idle_window); 217CFQ_CFQQ_FNS(idle_window);
@@ -239,6 +232,11 @@ static struct cfq_queue *cfq_get_queue(struct cfq_data *, int,
239static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *, 232static struct cfq_io_context *cfq_cic_lookup(struct cfq_data *,
240 struct io_context *); 233 struct io_context *);
241 234
235static inline int rq_in_driver(struct cfq_data *cfqd)
236{
237 return cfqd->rq_in_driver[0] + cfqd->rq_in_driver[1];
238}
239
242static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic, 240static inline struct cfq_queue *cic_to_cfqq(struct cfq_io_context *cic,
243 int is_sync) 241 int is_sync)
244{ 242{
@@ -257,7 +255,7 @@ static inline void cic_set_cfqq(struct cfq_io_context *cic,
257 */ 255 */
258static inline int cfq_bio_sync(struct bio *bio) 256static inline int cfq_bio_sync(struct bio *bio)
259{ 257{
260 if (bio_data_dir(bio) == READ || bio_sync(bio)) 258 if (bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO))
261 return 1; 259 return 1;
262 260
263 return 0; 261 return 0;
@@ -648,8 +646,6 @@ static void cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
648 BUG_ON(cfq_cfqq_on_rr(cfqq)); 646 BUG_ON(cfq_cfqq_on_rr(cfqq));
649 cfq_mark_cfqq_on_rr(cfqq); 647 cfq_mark_cfqq_on_rr(cfqq);
650 cfqd->busy_queues++; 648 cfqd->busy_queues++;
651 if (cfq_class_rt(cfqq))
652 cfqd->busy_rt_queues++;
653 649
654 cfq_resort_rr_list(cfqd, cfqq); 650 cfq_resort_rr_list(cfqd, cfqq);
655} 651}
@@ -673,8 +669,6 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
673 669
674 BUG_ON(!cfqd->busy_queues); 670 BUG_ON(!cfqd->busy_queues);
675 cfqd->busy_queues--; 671 cfqd->busy_queues--;
676 if (cfq_class_rt(cfqq))
677 cfqd->busy_rt_queues--;
678} 672}
679 673
680/* 674/*
@@ -760,9 +754,9 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
760{ 754{
761 struct cfq_data *cfqd = q->elevator->elevator_data; 755 struct cfq_data *cfqd = q->elevator->elevator_data;
762 756
763 cfqd->rq_in_driver++; 757 cfqd->rq_in_driver[rq_is_sync(rq)]++;
764 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d", 758 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "activate rq, drv=%d",
765 cfqd->rq_in_driver); 759 rq_in_driver(cfqd));
766 760
767 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq); 761 cfqd->last_position = blk_rq_pos(rq) + blk_rq_sectors(rq);
768} 762}
@@ -770,11 +764,12 @@ static void cfq_activate_request(struct request_queue *q, struct request *rq)
770static void cfq_deactivate_request(struct request_queue *q, struct request *rq) 764static void cfq_deactivate_request(struct request_queue *q, struct request *rq)
771{ 765{
772 struct cfq_data *cfqd = q->elevator->elevator_data; 766 struct cfq_data *cfqd = q->elevator->elevator_data;
767 const int sync = rq_is_sync(rq);
773 768
774 WARN_ON(!cfqd->rq_in_driver); 769 WARN_ON(!cfqd->rq_in_driver[sync]);
775 cfqd->rq_in_driver--; 770 cfqd->rq_in_driver[sync]--;
776 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d", 771 cfq_log_cfqq(cfqd, RQ_CFQQ(rq), "deactivate rq, drv=%d",
777 cfqd->rq_in_driver); 772 rq_in_driver(cfqd));
778} 773}
779 774
780static void cfq_remove_request(struct request *rq) 775static void cfq_remove_request(struct request *rq)
@@ -1080,7 +1075,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
1080 /* 1075 /*
1081 * still requests with the driver, don't idle 1076 * still requests with the driver, don't idle
1082 */ 1077 */
1083 if (cfqd->rq_in_driver) 1078 if (rq_in_driver(cfqd))
1084 return; 1079 return;
1085 1080
1086 /* 1081 /*
@@ -1115,6 +1110,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
1115 1110
1116 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert"); 1111 cfq_log_cfqq(cfqd, cfqq, "dispatch_insert");
1117 1112
1113 cfqq->next_rq = cfq_find_next_rq(cfqd, cfqq, rq);
1118 cfq_remove_request(rq); 1114 cfq_remove_request(rq);
1119 cfqq->dispatched++; 1115 cfqq->dispatched++;
1120 elv_dispatch_sort(q, rq); 1116 elv_dispatch_sort(q, rq);
@@ -1179,20 +1175,6 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
1179 goto expire; 1175 goto expire;
1180 1176
1181 /* 1177 /*
1182 * If we have a RT cfqq waiting, then we pre-empt the current non-rt
1183 * cfqq.
1184 */
1185 if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) {
1186 /*
1187 * We simulate this as cfqq timed out so that it gets to bank
1188 * the remaining of its time slice.
1189 */
1190 cfq_log_cfqq(cfqd, cfqq, "preempt");
1191 cfq_slice_expired(cfqd, 1);
1192 goto new_queue;
1193 }
1194
1195 /*
1196 * The active queue has requests and isn't expired, allow it to 1178 * The active queue has requests and isn't expired, allow it to
1197 * dispatch. 1179 * dispatch.
1198 */ 1180 */
@@ -1312,6 +1294,12 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
1312 return 0; 1294 return 0;
1313 1295
1314 /* 1296 /*
1297 * Drain async requests before we start sync IO
1298 */
1299 if (cfq_cfqq_idle_window(cfqq) && cfqd->rq_in_driver[BLK_RW_ASYNC])
1300 return 0;
1301
1302 /*
1315 * If this is an async queue and we have sync IO in flight, let it wait 1303 * If this is an async queue and we have sync IO in flight, let it wait
1316 */ 1304 */
1317 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) 1305 if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq))
@@ -1362,7 +1350,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
1362 cfq_slice_expired(cfqd, 0); 1350 cfq_slice_expired(cfqd, 0);
1363 } 1351 }
1364 1352
1365 cfq_log(cfqd, "dispatched a request"); 1353 cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
1366 return 1; 1354 return 1;
1367} 1355}
1368 1356
@@ -2130,11 +2118,11 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
2130 */ 2118 */
2131static void cfq_update_hw_tag(struct cfq_data *cfqd) 2119static void cfq_update_hw_tag(struct cfq_data *cfqd)
2132{ 2120{
2133 if (cfqd->rq_in_driver > cfqd->rq_in_driver_peak) 2121 if (rq_in_driver(cfqd) > cfqd->rq_in_driver_peak)
2134 cfqd->rq_in_driver_peak = cfqd->rq_in_driver; 2122 cfqd->rq_in_driver_peak = rq_in_driver(cfqd);
2135 2123
2136 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN && 2124 if (cfqd->rq_queued <= CFQ_HW_QUEUE_MIN &&
2137 cfqd->rq_in_driver <= CFQ_HW_QUEUE_MIN) 2125 rq_in_driver(cfqd) <= CFQ_HW_QUEUE_MIN)
2138 return; 2126 return;
2139 2127
2140 if (cfqd->hw_tag_samples++ < 50) 2128 if (cfqd->hw_tag_samples++ < 50)
@@ -2161,9 +2149,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
2161 2149
2162 cfq_update_hw_tag(cfqd); 2150 cfq_update_hw_tag(cfqd);
2163 2151
2164 WARN_ON(!cfqd->rq_in_driver); 2152 WARN_ON(!cfqd->rq_in_driver[sync]);
2165 WARN_ON(!cfqq->dispatched); 2153 WARN_ON(!cfqq->dispatched);
2166 cfqd->rq_in_driver--; 2154 cfqd->rq_in_driver[sync]--;
2167 cfqq->dispatched--; 2155 cfqq->dispatched--;
2168 2156
2169 if (cfq_cfqq_sync(cfqq)) 2157 if (cfq_cfqq_sync(cfqq))
@@ -2197,7 +2185,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
2197 cfq_arm_slice_timer(cfqd); 2185 cfq_arm_slice_timer(cfqd);
2198 } 2186 }
2199 2187
2200 if (!cfqd->rq_in_driver) 2188 if (!rq_in_driver(cfqd))
2201 cfq_schedule_dispatch(cfqd); 2189 cfq_schedule_dispatch(cfqd);
2202} 2190}
2203 2191
@@ -2229,8 +2217,7 @@ static void cfq_prio_boost(struct cfq_queue *cfqq)
2229 2217
2230static inline int __cfq_may_queue(struct cfq_queue *cfqq) 2218static inline int __cfq_may_queue(struct cfq_queue *cfqq)
2231{ 2219{
2232 if ((cfq_cfqq_wait_request(cfqq) || cfq_cfqq_must_alloc(cfqq)) && 2220 if (cfq_cfqq_wait_request(cfqq) && !cfq_cfqq_must_alloc_slice(cfqq)) {
2233 !cfq_cfqq_must_alloc_slice(cfqq)) {
2234 cfq_mark_cfqq_must_alloc_slice(cfqq); 2221 cfq_mark_cfqq_must_alloc_slice(cfqq);
2235 return ELV_MQUEUE_MUST; 2222 return ELV_MQUEUE_MUST;
2236 } 2223 }
@@ -2317,7 +2304,6 @@ cfq_set_request(struct request_queue *q, struct request *rq, gfp_t gfp_mask)
2317 } 2304 }
2318 2305
2319 cfqq->allocated[rw]++; 2306 cfqq->allocated[rw]++;
2320 cfq_clear_cfqq_must_alloc(cfqq);
2321 atomic_inc(&cfqq->ref); 2307 atomic_inc(&cfqq->ref);
2322 2308
2323 spin_unlock_irqrestore(q->queue_lock, flags); 2309 spin_unlock_irqrestore(q->queue_lock, flags);
diff --git a/block/elevator.c b/block/elevator.c
index 2d511f9105e..1975b619c86 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -79,7 +79,8 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
79 /* 79 /*
80 * Don't merge file system requests and discard requests 80 * Don't merge file system requests and discard requests
81 */ 81 */
82 if (bio_discard(bio) != bio_discard(rq->bio)) 82 if (bio_rw_flagged(bio, BIO_RW_DISCARD) !=
83 bio_rw_flagged(rq->bio, BIO_RW_DISCARD))
83 return 0; 84 return 0;
84 85
85 /* 86 /*
@@ -100,19 +101,6 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
100 if (bio_integrity(bio) != blk_integrity_rq(rq)) 101 if (bio_integrity(bio) != blk_integrity_rq(rq))
101 return 0; 102 return 0;
102 103
103 /*
104 * Don't merge if failfast settings don't match.
105 *
106 * FIXME: The negation in front of each condition is necessary
107 * because bio and request flags use different bit positions
108 * and the accessors return those bits directly. This
109 * ugliness will soon go away.
110 */
111 if (!bio_failfast_dev(bio) != !blk_failfast_dev(rq) ||
112 !bio_failfast_transport(bio) != !blk_failfast_transport(rq) ||
113 !bio_failfast_driver(bio) != !blk_failfast_driver(rq))
114 return 0;
115
116 if (!elv_iosched_allow_merge(rq, bio)) 104 if (!elv_iosched_allow_merge(rq, bio))
117 return 0; 105 return 0;
118 106
diff --git a/block/genhd.c b/block/genhd.c
index f4c64c2b303..5b76bf55d05 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -869,6 +869,7 @@ static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
869static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL); 869static DEVICE_ATTR(alignment_offset, S_IRUGO, disk_alignment_offset_show, NULL);
870static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL); 870static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
871static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 871static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
872static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
872#ifdef CONFIG_FAIL_MAKE_REQUEST 873#ifdef CONFIG_FAIL_MAKE_REQUEST
873static struct device_attribute dev_attr_fail = 874static struct device_attribute dev_attr_fail =
874 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 875 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -888,6 +889,7 @@ static struct attribute *disk_attrs[] = {
888 &dev_attr_alignment_offset.attr, 889 &dev_attr_alignment_offset.attr,
889 &dev_attr_capability.attr, 890 &dev_attr_capability.attr,
890 &dev_attr_stat.attr, 891 &dev_attr_stat.attr,
892 &dev_attr_inflight.attr,
891#ifdef CONFIG_FAIL_MAKE_REQUEST 893#ifdef CONFIG_FAIL_MAKE_REQUEST
892 &dev_attr_fail.attr, 894 &dev_attr_fail.attr,
893#endif 895#endif
@@ -1053,7 +1055,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
1053 part_stat_read(hd, merges[1]), 1055 part_stat_read(hd, merges[1]),
1054 (unsigned long long)part_stat_read(hd, sectors[1]), 1056 (unsigned long long)part_stat_read(hd, sectors[1]),
1055 jiffies_to_msecs(part_stat_read(hd, ticks[1])), 1057 jiffies_to_msecs(part_stat_read(hd, ticks[1])),
1056 hd->in_flight, 1058 part_in_flight(hd),
1057 jiffies_to_msecs(part_stat_read(hd, io_ticks)), 1059 jiffies_to_msecs(part_stat_read(hd, io_ticks)),
1058 jiffies_to_msecs(part_stat_read(hd, time_in_queue)) 1060 jiffies_to_msecs(part_stat_read(hd, time_in_queue))
1059 ); 1061 );
@@ -1215,6 +1217,16 @@ void put_disk(struct gendisk *disk)
1215 1217
1216EXPORT_SYMBOL(put_disk); 1218EXPORT_SYMBOL(put_disk);
1217 1219
1220static void set_disk_ro_uevent(struct gendisk *gd, int ro)
1221{
1222 char event[] = "DISK_RO=1";
1223 char *envp[] = { event, NULL };
1224
1225 if (!ro)
1226 event[8] = '0';
1227 kobject_uevent_env(&disk_to_dev(gd)->kobj, KOBJ_CHANGE, envp);
1228}
1229
1218void set_device_ro(struct block_device *bdev, int flag) 1230void set_device_ro(struct block_device *bdev, int flag)
1219{ 1231{
1220 bdev->bd_part->policy = flag; 1232 bdev->bd_part->policy = flag;
@@ -1227,8 +1239,12 @@ void set_disk_ro(struct gendisk *disk, int flag)
1227 struct disk_part_iter piter; 1239 struct disk_part_iter piter;
1228 struct hd_struct *part; 1240 struct hd_struct *part;
1229 1241
1230 disk_part_iter_init(&piter, disk, 1242 if (disk->part0.policy != flag) {
1231 DISK_PITER_INCL_EMPTY | DISK_PITER_INCL_PART0); 1243 set_disk_ro_uevent(disk, flag);
1244 disk->part0.policy = flag;
1245 }
1246
1247 disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY);
1232 while ((part = disk_part_iter_next(&piter))) 1248 while ((part = disk_part_iter_next(&piter)))
1233 part->policy = flag; 1249 part->policy = flag;
1234 disk_part_iter_exit(&piter); 1250 disk_part_iter_exit(&piter);
diff --git a/block/ioctl.c b/block/ioctl.c
index 500e4c73cc5..d3e6b5827a3 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -112,22 +112,9 @@ static int blkdev_reread_part(struct block_device *bdev)
112 return res; 112 return res;
113} 113}
114 114
115static void blk_ioc_discard_endio(struct bio *bio, int err)
116{
117 if (err) {
118 if (err == -EOPNOTSUPP)
119 set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
120 clear_bit(BIO_UPTODATE, &bio->bi_flags);
121 }
122 complete(bio->bi_private);
123}
124
125static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, 115static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
126 uint64_t len) 116 uint64_t len)
127{ 117{
128 struct request_queue *q = bdev_get_queue(bdev);
129 int ret = 0;
130
131 if (start & 511) 118 if (start & 511)
132 return -EINVAL; 119 return -EINVAL;
133 if (len & 511) 120 if (len & 511)
@@ -137,40 +124,8 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
137 124
138 if (start + len > (bdev->bd_inode->i_size >> 9)) 125 if (start + len > (bdev->bd_inode->i_size >> 9))
139 return -EINVAL; 126 return -EINVAL;
140 127 return blkdev_issue_discard(bdev, start, len, GFP_KERNEL,
141 if (!q->prepare_discard_fn) 128 DISCARD_FL_WAIT);
142 return -EOPNOTSUPP;
143
144 while (len && !ret) {
145 DECLARE_COMPLETION_ONSTACK(wait);
146 struct bio *bio;
147
148 bio = bio_alloc(GFP_KERNEL, 0);
149
150 bio->bi_end_io = blk_ioc_discard_endio;
151 bio->bi_bdev = bdev;
152 bio->bi_private = &wait;
153 bio->bi_sector = start;
154
155 if (len > queue_max_hw_sectors(q)) {
156 bio->bi_size = queue_max_hw_sectors(q) << 9;
157 len -= queue_max_hw_sectors(q);
158 start += queue_max_hw_sectors(q);
159 } else {
160 bio->bi_size = len << 9;
161 len = 0;
162 }
163 submit_bio(DISCARD_NOBARRIER, bio);
164
165 wait_for_completion(&wait);
166
167 if (bio_flagged(bio, BIO_EOPNOTSUPP))
168 ret = -EOPNOTSUPP;
169 else if (!bio_flagged(bio, BIO_UPTODATE))
170 ret = -EIO;
171 bio_put(bio);
172 }
173 return ret;
174} 129}
175 130
176static int put_ushort(unsigned long arg, unsigned short val) 131static int put_ushort(unsigned long arg, unsigned short val)
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 95d344971ed..b6cd571adbf 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -172,6 +172,9 @@ aoeblk_make_request(struct request_queue *q, struct bio *bio)
172 BUG(); 172 BUG();
173 bio_endio(bio, -ENXIO); 173 bio_endio(bio, -ENXIO);
174 return 0; 174 return 0;
175 } else if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
176 bio_endio(bio, -EOPNOTSUPP);
177 return 0;
175 } else if (bio->bi_io_vec == NULL) { 178 } else if (bio->bi_io_vec == NULL) {
176 printk(KERN_ERR "aoe: bi_io_vec is NULL\n"); 179 printk(KERN_ERR "aoe: bi_io_vec is NULL\n");
177 BUG(); 180 BUG();
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index a52cc7fe45e..0589dfbbd7d 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -3889,7 +3889,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
3889 int j = 0; 3889 int j = 0;
3890 int rc; 3890 int rc;
3891 int dac, return_code; 3891 int dac, return_code;
3892 InquiryData_struct *inq_buff = NULL; 3892 InquiryData_struct *inq_buff;
3893 3893
3894 if (reset_devices) { 3894 if (reset_devices) {
3895 /* Reset the controller with a PCI power-cycle */ 3895 /* Reset the controller with a PCI power-cycle */
@@ -4029,6 +4029,7 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
4029 printk(KERN_WARNING "cciss: unable to determine firmware" 4029 printk(KERN_WARNING "cciss: unable to determine firmware"
4030 " version of controller\n"); 4030 " version of controller\n");
4031 } 4031 }
4032 kfree(inq_buff);
4032 4033
4033 cciss_procinit(i); 4034 cciss_procinit(i);
4034 4035
@@ -4045,7 +4046,6 @@ static int __devinit cciss_init_one(struct pci_dev *pdev,
4045 return 1; 4046 return 1;
4046 4047
4047clean4: 4048clean4:
4048 kfree(inq_buff);
4049 kfree(hba[i]->cmd_pool_bits); 4049 kfree(hba[i]->cmd_pool_bits);
4050 if (hba[i]->cmd_pool) 4050 if (hba[i]->cmd_pool)
4051 pci_free_consistent(hba[i]->pdev, 4051 pci_free_consistent(hba[i]->pdev,
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 5757188cd1f..bbb79441d89 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -475,7 +475,7 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio)
475 pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset; 475 pos = ((loff_t) bio->bi_sector << 9) + lo->lo_offset;
476 476
477 if (bio_rw(bio) == WRITE) { 477 if (bio_rw(bio) == WRITE) {
478 int barrier = bio_barrier(bio); 478 bool barrier = bio_rw_flagged(bio, BIO_RW_BARRIER);
479 struct file *file = lo->lo_backing_file; 479 struct file *file = lo->lo_backing_file;
480 480
481 if (barrier) { 481 if (barrier) {
diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c
index 911dfd98d81..9f3518c515a 100644
--- a/drivers/block/paride/pcd.c
+++ b/drivers/block/paride/pcd.c
@@ -219,8 +219,6 @@ static int pcd_sector; /* address of next requested sector */
219static int pcd_count; /* number of blocks still to do */ 219static int pcd_count; /* number of blocks still to do */
220static char *pcd_buf; /* buffer for request in progress */ 220static char *pcd_buf; /* buffer for request in progress */
221 221
222static int pcd_warned; /* Have we logged a phase warning ? */
223
224/* kernel glue structures */ 222/* kernel glue structures */
225 223
226static int pcd_block_open(struct block_device *bdev, fmode_t mode) 224static int pcd_block_open(struct block_device *bdev, fmode_t mode)
@@ -417,12 +415,10 @@ static int pcd_completion(struct pcd_unit *cd, char *buf, char *fun)
417 printk 415 printk
418 ("%s: %s: Unexpected phase %d, d=%d, k=%d\n", 416 ("%s: %s: Unexpected phase %d, d=%d, k=%d\n",
419 cd->name, fun, p, d, k); 417 cd->name, fun, p, d, k);
420 if ((verbose < 2) && !pcd_warned) { 418 if (verbose < 2)
421 pcd_warned = 1; 419 printk_once(
422 printk 420 "%s: WARNING: ATAPI phase errors\n",
423 ("%s: WARNING: ATAPI phase errors\n", 421 cd->name);
424 cd->name);
425 }
426 mdelay(1); 422 mdelay(1);
427 } 423 }
428 if (k++ > PCD_TMO) { 424 if (k++ > PCD_TMO) {
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index da403b6a7f4..f5cd2e83ebc 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -1564,15 +1564,13 @@ static int carm_init_shm(struct carm_host *host)
1564 1564
1565static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent) 1565static int carm_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
1566{ 1566{
1567 static unsigned int printed_version;
1568 struct carm_host *host; 1567 struct carm_host *host;
1569 unsigned int pci_dac; 1568 unsigned int pci_dac;
1570 int rc; 1569 int rc;
1571 struct request_queue *q; 1570 struct request_queue *q;
1572 unsigned int i; 1571 unsigned int i;
1573 1572
1574 if (!printed_version++) 1573 printk_once(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
1575 printk(KERN_DEBUG DRV_NAME " version " DRV_VERSION "\n");
1576 1574
1577 rc = pci_enable_device(pdev); 1575 rc = pci_enable_device(pdev);
1578 if (rc) 1576 if (rc)
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index 390d69bb7c4..b441ce3832e 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -416,15 +416,9 @@ retry:
416 goto retry; 416 goto retry;
417 } 417 }
418 if (we.max_disk > (MAX_DISKNO - 1)) { 418 if (we.max_disk > (MAX_DISKNO - 1)) {
419 static int warned; 419 printk_once(VIOD_KERN_INFO
420 420 "Only examining the first %d of %d disks connected\n",
421 if (warned == 0) { 421 MAX_DISKNO, we.max_disk + 1);
422 warned++;
423 printk(VIOD_KERN_INFO
424 "Only examining the first %d "
425 "of %d disks connected\n",
426 MAX_DISKNO, we.max_disk + 1);
427 }
428 } 422 }
429 423
430 /* Send the close event to OS/400. We DON'T expect a response */ 424 /* Send the close event to OS/400. We DON'T expect a response */
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 33f179e66bf..cc9dc79b078 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1129,7 +1129,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
1129 if (error == -EOPNOTSUPP) 1129 if (error == -EOPNOTSUPP)
1130 goto out; 1130 goto out;
1131 1131
1132 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio)) 1132 if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
1133 goto out; 1133 goto out;
1134 1134
1135 if (unlikely(error)) { 1135 if (unlikely(error)) {
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 3e563d25173..e0efc1adcaf 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -285,7 +285,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
285 if (!error) 285 if (!error)
286 return 0; /* I/O complete */ 286 return 0; /* I/O complete */
287 287
288 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio)) 288 if ((error == -EWOULDBLOCK) && bio_rw_flagged(bio, BIO_RW_AHEAD))
289 return error; 289 return error;
290 290
291 if (error == -EOPNOTSUPP) 291 if (error == -EOPNOTSUPP)
@@ -336,7 +336,7 @@ static void stripe_io_hints(struct dm_target *ti,
336 unsigned chunk_size = (sc->chunk_mask + 1) << 9; 336 unsigned chunk_size = (sc->chunk_mask + 1) << 9;
337 337
338 blk_limits_io_min(limits, chunk_size); 338 blk_limits_io_min(limits, chunk_size);
339 limits->io_opt = chunk_size * sc->stripes; 339 blk_limits_io_opt(limits, chunk_size * sc->stripes);
340} 340}
341 341
342static struct target_type stripe_target = { 342static struct target_type stripe_target = {
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b4845b14740..eee28fac210 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -130,7 +130,7 @@ struct mapped_device {
130 /* 130 /*
131 * A list of ios that arrived while we were suspended. 131 * A list of ios that arrived while we were suspended.
132 */ 132 */
133 atomic_t pending; 133 atomic_t pending[2];
134 wait_queue_head_t wait; 134 wait_queue_head_t wait;
135 struct work_struct work; 135 struct work_struct work;
136 struct bio_list deferred; 136 struct bio_list deferred;
@@ -453,13 +453,14 @@ static void start_io_acct(struct dm_io *io)
453{ 453{
454 struct mapped_device *md = io->md; 454 struct mapped_device *md = io->md;
455 int cpu; 455 int cpu;
456 int rw = bio_data_dir(io->bio);
456 457
457 io->start_time = jiffies; 458 io->start_time = jiffies;
458 459
459 cpu = part_stat_lock(); 460 cpu = part_stat_lock();
460 part_round_stats(cpu, &dm_disk(md)->part0); 461 part_round_stats(cpu, &dm_disk(md)->part0);
461 part_stat_unlock(); 462 part_stat_unlock();
462 dm_disk(md)->part0.in_flight = atomic_inc_return(&md->pending); 463 dm_disk(md)->part0.in_flight[rw] = atomic_inc_return(&md->pending[rw]);
463} 464}
464 465
465static void end_io_acct(struct dm_io *io) 466static void end_io_acct(struct dm_io *io)
@@ -479,8 +480,9 @@ static void end_io_acct(struct dm_io *io)
479 * After this is decremented the bio must not be touched if it is 480 * After this is decremented the bio must not be touched if it is
480 * a barrier. 481 * a barrier.
481 */ 482 */
482 dm_disk(md)->part0.in_flight = pending = 483 dm_disk(md)->part0.in_flight[rw] = pending =
483 atomic_dec_return(&md->pending); 484 atomic_dec_return(&md->pending[rw]);
485 pending += atomic_read(&md->pending[rw^0x1]);
484 486
485 /* nudge anyone waiting on suspend queue */ 487 /* nudge anyone waiting on suspend queue */
486 if (!pending) 488 if (!pending)
@@ -586,7 +588,7 @@ static void dec_pending(struct dm_io *io, int error)
586 */ 588 */
587 spin_lock_irqsave(&md->deferred_lock, flags); 589 spin_lock_irqsave(&md->deferred_lock, flags);
588 if (__noflush_suspending(md)) { 590 if (__noflush_suspending(md)) {
589 if (!bio_barrier(io->bio)) 591 if (!bio_rw_flagged(io->bio, BIO_RW_BARRIER))
590 bio_list_add_head(&md->deferred, 592 bio_list_add_head(&md->deferred,
591 io->bio); 593 io->bio);
592 } else 594 } else
@@ -598,7 +600,7 @@ static void dec_pending(struct dm_io *io, int error)
598 io_error = io->error; 600 io_error = io->error;
599 bio = io->bio; 601 bio = io->bio;
600 602
601 if (bio_barrier(bio)) { 603 if (bio_rw_flagged(bio, BIO_RW_BARRIER)) {
602 /* 604 /*
603 * There can be just one barrier request so we use 605 * There can be just one barrier request so we use
604 * a per-device variable for error reporting. 606 * a per-device variable for error reporting.
@@ -1209,7 +1211,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio)
1209 1211
1210 ci.map = dm_get_table(md); 1212 ci.map = dm_get_table(md);
1211 if (unlikely(!ci.map)) { 1213 if (unlikely(!ci.map)) {
1212 if (!bio_barrier(bio)) 1214 if (!bio_rw_flagged(bio, BIO_RW_BARRIER))
1213 bio_io_error(bio); 1215 bio_io_error(bio);
1214 else 1216 else
1215 if (!md->barrier_error) 1217 if (!md->barrier_error)
@@ -1321,7 +1323,7 @@ static int _dm_request(struct request_queue *q, struct bio *bio)
1321 * we have to queue this io for later. 1323 * we have to queue this io for later.
1322 */ 1324 */
1323 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || 1325 if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) ||
1324 unlikely(bio_barrier(bio))) { 1326 unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
1325 up_read(&md->io_lock); 1327 up_read(&md->io_lock);
1326 1328
1327 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && 1329 if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) &&
@@ -1344,7 +1346,7 @@ static int dm_make_request(struct request_queue *q, struct bio *bio)
1344{ 1346{
1345 struct mapped_device *md = q->queuedata; 1347 struct mapped_device *md = q->queuedata;
1346 1348
1347 if (unlikely(bio_barrier(bio))) { 1349 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
1348 bio_endio(bio, -EOPNOTSUPP); 1350 bio_endio(bio, -EOPNOTSUPP);
1349 return 0; 1351 return 0;
1350 } 1352 }
@@ -1785,7 +1787,8 @@ static struct mapped_device *alloc_dev(int minor)
1785 if (!md->disk) 1787 if (!md->disk)
1786 goto bad_disk; 1788 goto bad_disk;
1787 1789
1788 atomic_set(&md->pending, 0); 1790 atomic_set(&md->pending[0], 0);
1791 atomic_set(&md->pending[1], 0);
1789 init_waitqueue_head(&md->wait); 1792 init_waitqueue_head(&md->wait);
1790 INIT_WORK(&md->work, dm_wq_work); 1793 INIT_WORK(&md->work, dm_wq_work);
1791 init_waitqueue_head(&md->eventq); 1794 init_waitqueue_head(&md->eventq);
@@ -2088,7 +2091,8 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible)
2088 break; 2091 break;
2089 } 2092 }
2090 spin_unlock_irqrestore(q->queue_lock, flags); 2093 spin_unlock_irqrestore(q->queue_lock, flags);
2091 } else if (!atomic_read(&md->pending)) 2094 } else if (!atomic_read(&md->pending[0]) &&
2095 !atomic_read(&md->pending[1]))
2092 break; 2096 break;
2093 2097
2094 if (interruptible == TASK_INTERRUPTIBLE && 2098 if (interruptible == TASK_INTERRUPTIBLE &&
@@ -2164,7 +2168,7 @@ static void dm_wq_work(struct work_struct *work)
2164 if (dm_request_based(md)) 2168 if (dm_request_based(md))
2165 generic_make_request(c); 2169 generic_make_request(c);
2166 else { 2170 else {
2167 if (bio_barrier(c)) 2171 if (bio_rw_flagged(c, BIO_RW_BARRIER))
2168 process_barrier(md, c); 2172 process_barrier(md, c);
2169 else 2173 else
2170 __split_and_process_bio(md, c); 2174 __split_and_process_bio(md, c);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 5fe39c2a3d2..ea484290544 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -288,7 +288,7 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
288 sector_t start_sector; 288 sector_t start_sector;
289 int cpu; 289 int cpu;
290 290
291 if (unlikely(bio_barrier(bio))) { 291 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
292 bio_endio(bio, -EOPNOTSUPP); 292 bio_endio(bio, -EOPNOTSUPP);
293 return 0; 293 return 0;
294 } 294 }
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 7140909f666..89e76819f61 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -90,7 +90,7 @@ static void multipath_end_request(struct bio *bio, int error)
90 90
91 if (uptodate) 91 if (uptodate)
92 multipath_end_bh_io(mp_bh, 0); 92 multipath_end_bh_io(mp_bh, 0);
93 else if (!bio_rw_ahead(bio)) { 93 else if (!bio_rw_flagged(bio, BIO_RW_AHEAD)) {
94 /* 94 /*
95 * oops, IO error: 95 * oops, IO error:
96 */ 96 */
@@ -144,7 +144,7 @@ static int multipath_make_request (struct request_queue *q, struct bio * bio)
144 const int rw = bio_data_dir(bio); 144 const int rw = bio_data_dir(bio);
145 int cpu; 145 int cpu;
146 146
147 if (unlikely(bio_barrier(bio))) { 147 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
148 bio_endio(bio, -EOPNOTSUPP); 148 bio_endio(bio, -EOPNOTSUPP);
149 return 0; 149 return 0;
150 } 150 }
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 898e2bdfee4..f845ed98fec 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -448,7 +448,7 @@ static int raid0_make_request(struct request_queue *q, struct bio *bio)
448 const int rw = bio_data_dir(bio); 448 const int rw = bio_data_dir(bio);
449 int cpu; 449 int cpu;
450 450
451 if (unlikely(bio_barrier(bio))) { 451 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
452 bio_endio(bio, -EOPNOTSUPP); 452 bio_endio(bio, -EOPNOTSUPP);
453 return 0; 453 return 0;
454 } 454 }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 8726fd7ebce..ff7ed333599 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -782,8 +782,9 @@ static int make_request(struct request_queue *q, struct bio * bio)
782 struct bio_list bl; 782 struct bio_list bl;
783 struct page **behind_pages = NULL; 783 struct page **behind_pages = NULL;
784 const int rw = bio_data_dir(bio); 784 const int rw = bio_data_dir(bio);
785 const int do_sync = bio_sync(bio); 785 const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
786 int cpu, do_barriers; 786 int cpu;
787 bool do_barriers;
787 mdk_rdev_t *blocked_rdev; 788 mdk_rdev_t *blocked_rdev;
788 789
789 /* 790 /*
@@ -797,7 +798,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
797 798
798 md_write_start(mddev, bio); /* wait on superblock update early */ 799 md_write_start(mddev, bio); /* wait on superblock update early */
799 800
800 if (unlikely(!mddev->barriers_work && bio_barrier(bio))) { 801 if (unlikely(!mddev->barriers_work &&
802 bio_rw_flagged(bio, BIO_RW_BARRIER))) {
801 if (rw == WRITE) 803 if (rw == WRITE)
802 md_write_end(mddev); 804 md_write_end(mddev);
803 bio_endio(bio, -EOPNOTSUPP); 805 bio_endio(bio, -EOPNOTSUPP);
@@ -925,7 +927,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
925 atomic_set(&r1_bio->remaining, 0); 927 atomic_set(&r1_bio->remaining, 0);
926 atomic_set(&r1_bio->behind_remaining, 0); 928 atomic_set(&r1_bio->behind_remaining, 0);
927 929
928 do_barriers = bio_barrier(bio); 930 do_barriers = bio_rw_flagged(bio, BIO_RW_BARRIER);
929 if (do_barriers) 931 if (do_barriers)
930 set_bit(R1BIO_Barrier, &r1_bio->state); 932 set_bit(R1BIO_Barrier, &r1_bio->state);
931 933
@@ -1600,7 +1602,7 @@ static void raid1d(mddev_t *mddev)
1600 * We already have a nr_pending reference on these rdevs. 1602 * We already have a nr_pending reference on these rdevs.
1601 */ 1603 */
1602 int i; 1604 int i;
1603 const int do_sync = bio_sync(r1_bio->master_bio); 1605 const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
1604 clear_bit(R1BIO_BarrierRetry, &r1_bio->state); 1606 clear_bit(R1BIO_BarrierRetry, &r1_bio->state);
1605 clear_bit(R1BIO_Barrier, &r1_bio->state); 1607 clear_bit(R1BIO_Barrier, &r1_bio->state);
1606 for (i=0; i < conf->raid_disks; i++) 1608 for (i=0; i < conf->raid_disks; i++)
@@ -1654,7 +1656,7 @@ static void raid1d(mddev_t *mddev)
1654 (unsigned long long)r1_bio->sector); 1656 (unsigned long long)r1_bio->sector);
1655 raid_end_bio_io(r1_bio); 1657 raid_end_bio_io(r1_bio);
1656 } else { 1658 } else {
1657 const int do_sync = bio_sync(r1_bio->master_bio); 1659 const bool do_sync = bio_rw_flagged(r1_bio->master_bio, BIO_RW_SYNCIO);
1658 r1_bio->bios[r1_bio->read_disk] = 1660 r1_bio->bios[r1_bio->read_disk] =
1659 mddev->ro ? IO_BLOCKED : NULL; 1661 mddev->ro ? IO_BLOCKED : NULL;
1660 r1_bio->read_disk = disk; 1662 r1_bio->read_disk = disk;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3d9020cf6f6..d0a2152e064 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -796,12 +796,12 @@ static int make_request(struct request_queue *q, struct bio * bio)
796 int i; 796 int i;
797 int chunk_sects = conf->chunk_mask + 1; 797 int chunk_sects = conf->chunk_mask + 1;
798 const int rw = bio_data_dir(bio); 798 const int rw = bio_data_dir(bio);
799 const int do_sync = bio_sync(bio); 799 const bool do_sync = bio_rw_flagged(bio, BIO_RW_SYNCIO);
800 struct bio_list bl; 800 struct bio_list bl;
801 unsigned long flags; 801 unsigned long flags;
802 mdk_rdev_t *blocked_rdev; 802 mdk_rdev_t *blocked_rdev;
803 803
804 if (unlikely(bio_barrier(bio))) { 804 if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) {
805 bio_endio(bio, -EOPNOTSUPP); 805 bio_endio(bio, -EOPNOTSUPP);
806 return 0; 806 return 0;
807 } 807 }
@@ -1610,7 +1610,7 @@ static void raid10d(mddev_t *mddev)
1610 raid_end_bio_io(r10_bio); 1610 raid_end_bio_io(r10_bio);
1611 bio_put(bio); 1611 bio_put(bio);
1612 } else { 1612 } else {
1613 const int do_sync = bio_sync(r10_bio->master_bio); 1613 const bool do_sync = bio_rw_flagged(r10_bio->master_bio, BIO_RW_SYNCIO);
1614 bio_put(bio); 1614 bio_put(bio);
1615 rdev = conf->mirrors[mirror].rdev; 1615 rdev = conf->mirrors[mirror].rdev;
1616 if (printk_ratelimit()) 1616 if (printk_ratelimit())
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b8a2c5dc67b..826eb346735 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -3606,7 +3606,7 @@ static int make_request(struct request_queue *q, struct bio * bi)
3606 const int rw = bio_data_dir(bi); 3606 const int rw = bio_data_dir(bi);
3607 int cpu, remaining; 3607 int cpu, remaining;
3608 3608
3609 if (unlikely(bio_barrier(bi))) { 3609 if (unlikely(bio_rw_flagged(bi, BIO_RW_BARRIER))) {
3610 bio_endio(bi, -EOPNOTSUPP); 3610 bio_endio(bi, -EOPNOTSUPP);
3611 return 0; 3611 return 0;
3612 } 3612 }
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 662024d8694..5987da85710 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -898,8 +898,10 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
898 scsi_print_sense("", cmd); 898 scsi_print_sense("", cmd);
899 scsi_print_command(cmd); 899 scsi_print_command(cmd);
900 } 900 }
901 blk_end_request_all(req, -EIO); 901 if (blk_end_request_err(req, -EIO))
902 scsi_next_command(cmd); 902 scsi_requeue_command(q, cmd);
903 else
904 scsi_next_command(cmd);
903 break; 905 break;
904 case ACTION_REPREP: 906 case ACTION_REPREP:
905 /* Unprep the request and put it back at the head of the queue. 907 /* Unprep the request and put it back at the head of the queue.
diff --git a/drivers/staging/dst/dcore.c b/drivers/staging/dst/dcore.c
index 84724187ec3..ac8577358ba 100644
--- a/drivers/staging/dst/dcore.c
+++ b/drivers/staging/dst/dcore.c
@@ -112,8 +112,9 @@ static int dst_request(struct request_queue *q, struct bio *bio)
112 * I worked with. 112 * I worked with.
113 * 113 *
114 * Empty barriers are not allowed anyway, see 51fd77bd9f512 114 * Empty barriers are not allowed anyway, see 51fd77bd9f512
115 * for example, although later it was changed to bio_discard() 115 * for example, although later it was changed to
116 * only, which does not work in this case. 116 * bio_rw_flagged(bio, BIO_RW_DISCARD) only, which does not
117 * work in this case.
117 */ 118 */
118 //err = -EOPNOTSUPP; 119 //err = -EOPNOTSUPP;
119 err = 0; 120 err = 0;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 72a2b9c28e9..535f85ba104 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1511,7 +1511,8 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
1511static void btrfs_issue_discard(struct block_device *bdev, 1511static void btrfs_issue_discard(struct block_device *bdev,
1512 u64 start, u64 len) 1512 u64 start, u64 len)
1513{ 1513{
1514 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 1514 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL,
1515 DISCARD_FL_BARRIER);
1515} 1516}
1516#endif 1517#endif
1517 1518
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5dbefd11b4a..5cf405b0828 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -260,7 +260,7 @@ loop_lock:
260 num_run++; 260 num_run++;
261 batch_run++; 261 batch_run++;
262 262
263 if (bio_sync(cur)) 263 if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
264 num_sync_run++; 264 num_sync_run++;
265 265
266 if (need_resched()) { 266 if (need_resched()) {
@@ -2903,7 +2903,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
2903 bio->bi_rw |= rw; 2903 bio->bi_rw |= rw;
2904 2904
2905 spin_lock(&device->io_lock); 2905 spin_lock(&device->io_lock);
2906 if (bio_sync(bio)) 2906 if (bio_rw_flagged(bio, BIO_RW_SYNCIO))
2907 pending_bios = &device->pending_sync_bios; 2907 pending_bios = &device->pending_sync_bios;
2908 else 2908 else
2909 pending_bios = &device->pending_bios; 2909 pending_bios = &device->pending_bios;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 18d3a28554a..28c590b7c9d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -857,7 +857,8 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
857 goto start_new_extent; 857 goto start_new_extent;
858 if ((start + nr_sects) != blk) { 858 if ((start + nr_sects) != blk) {
859 rv = blkdev_issue_discard(bdev, start, 859 rv = blkdev_issue_discard(bdev, start,
860 nr_sects, GFP_NOFS); 860 nr_sects, GFP_NOFS,
861 DISCARD_FL_BARRIER);
861 if (rv) 862 if (rv)
862 goto fail; 863 goto fail;
863 nr_sects = 0; 864 nr_sects = 0;
@@ -871,7 +872,8 @@ start_new_extent:
871 } 872 }
872 } 873 }
873 if (nr_sects) { 874 if (nr_sects) {
874 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS); 875 rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS,
876 DISCARD_FL_BARRIER);
875 if (rv) 877 if (rv)
876 goto fail; 878 goto fail;
877 } 879 }
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index ea4e6cb29e1..619ba99dfe3 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -248,11 +248,19 @@ ssize_t part_stat_show(struct device *dev,
248 part_stat_read(p, merges[WRITE]), 248 part_stat_read(p, merges[WRITE]),
249 (unsigned long long)part_stat_read(p, sectors[WRITE]), 249 (unsigned long long)part_stat_read(p, sectors[WRITE]),
250 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), 250 jiffies_to_msecs(part_stat_read(p, ticks[WRITE])),
251 p->in_flight, 251 part_in_flight(p),
252 jiffies_to_msecs(part_stat_read(p, io_ticks)), 252 jiffies_to_msecs(part_stat_read(p, io_ticks)),
253 jiffies_to_msecs(part_stat_read(p, time_in_queue))); 253 jiffies_to_msecs(part_stat_read(p, time_in_queue)));
254} 254}
255 255
256ssize_t part_inflight_show(struct device *dev,
257 struct device_attribute *attr, char *buf)
258{
259 struct hd_struct *p = dev_to_part(dev);
260
261 return sprintf(buf, "%8u %8u\n", p->in_flight[0], p->in_flight[1]);
262}
263
256#ifdef CONFIG_FAIL_MAKE_REQUEST 264#ifdef CONFIG_FAIL_MAKE_REQUEST
257ssize_t part_fail_show(struct device *dev, 265ssize_t part_fail_show(struct device *dev,
258 struct device_attribute *attr, char *buf) 266 struct device_attribute *attr, char *buf)
@@ -281,6 +289,7 @@ static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL);
281static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); 289static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL);
282static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL); 290static DEVICE_ATTR(alignment_offset, S_IRUGO, part_alignment_offset_show, NULL);
283static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); 291static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
292static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
284#ifdef CONFIG_FAIL_MAKE_REQUEST 293#ifdef CONFIG_FAIL_MAKE_REQUEST
285static struct device_attribute dev_attr_fail = 294static struct device_attribute dev_attr_fail =
286 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store); 295 __ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -292,6 +301,7 @@ static struct attribute *part_attrs[] = {
292 &dev_attr_size.attr, 301 &dev_attr_size.attr,
293 &dev_attr_alignment_offset.attr, 302 &dev_attr_alignment_offset.attr,
294 &dev_attr_stat.attr, 303 &dev_attr_stat.attr,
304 &dev_attr_inflight.attr,
295#ifdef CONFIG_FAIL_MAKE_REQUEST 305#ifdef CONFIG_FAIL_MAKE_REQUEST
296 &dev_attr_fail.attr, 306 &dev_attr_fail.attr,
297#endif 307#endif
diff --git a/fs/splice.c b/fs/splice.c
index 819023733f8..7394e9e1753 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -502,8 +502,10 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos,
502 len = left; 502 len = left;
503 503
504 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 504 ret = __generic_file_splice_read(in, ppos, pipe, len, flags);
505 if (ret > 0) 505 if (ret > 0) {
506 *ppos += ret; 506 *ppos += ret;
507 file_accessed(in);
508 }
507 509
508 return ret; 510 return ret;
509} 511}
@@ -963,8 +965,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
963 965
964 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 966 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
965 ret = file_remove_suid(out); 967 ret = file_remove_suid(out);
966 if (!ret) 968 if (!ret) {
969 file_update_time(out);
967 ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); 970 ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
971 }
968 mutex_unlock(&inode->i_mutex); 972 mutex_unlock(&inode->i_mutex);
969 } while (ret > 0); 973 } while (ret > 0);
970 splice_from_pipe_end(pipe, &sd); 974 splice_from_pipe_end(pipe, &sd);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2892b710771..5be93f18d84 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -142,56 +142,51 @@ struct bio {
142 * 142 *
143 * bit 0 -- data direction 143 * bit 0 -- data direction
144 * If not set, bio is a read from device. If set, it's a write to device. 144 * If not set, bio is a read from device. If set, it's a write to device.
145 * bit 1 -- rw-ahead when set 145 * bit 1 -- fail fast device errors
146 * bit 2 -- barrier 146 * bit 2 -- fail fast transport errors
147 * bit 3 -- fail fast driver errors
148 * bit 4 -- rw-ahead when set
149 * bit 5 -- barrier
147 * Insert a serialization point in the IO queue, forcing previously 150 * Insert a serialization point in the IO queue, forcing previously
148 * submitted IO to be completed before this one is issued. 151 * submitted IO to be completed before this one is issued.
149 * bit 3 -- synchronous I/O hint. 152 * bit 6 -- synchronous I/O hint.
150 * bit 4 -- Unplug the device immediately after submitting this bio. 153 * bit 7 -- Unplug the device immediately after submitting this bio.
151 * bit 5 -- metadata request 154 * bit 8 -- metadata request
152 * Used for tracing to differentiate metadata and data IO. May also 155 * Used for tracing to differentiate metadata and data IO. May also
153 * get some preferential treatment in the IO scheduler 156 * get some preferential treatment in the IO scheduler
154 * bit 6 -- discard sectors 157 * bit 9 -- discard sectors
155 * Informs the lower level device that this range of sectors is no longer 158 * Informs the lower level device that this range of sectors is no longer
156 * used by the file system and may thus be freed by the device. Used 159 * used by the file system and may thus be freed by the device. Used
157 * for flash based storage. 160 * for flash based storage.
158 * bit 7 -- fail fast device errors
159 * bit 8 -- fail fast transport errors
160 * bit 9 -- fail fast driver errors
161 * Don't want driver retries for any fast fail whatever the reason. 161 * Don't want driver retries for any fast fail whatever the reason.
162 * bit 10 -- Tell the IO scheduler not to wait for more requests after this 162 * bit 10 -- Tell the IO scheduler not to wait for more requests after this
163 one has been submitted, even if it is a SYNC request. 163 one has been submitted, even if it is a SYNC request.
164 */ 164 */
165#define BIO_RW 0 /* Must match RW in req flags (blkdev.h) */ 165enum bio_rw_flags {
166#define BIO_RW_AHEAD 1 /* Must match FAILFAST in req flags */ 166 BIO_RW,
167#define BIO_RW_BARRIER 2 167 BIO_RW_FAILFAST_DEV,
168#define BIO_RW_SYNCIO 3 168 BIO_RW_FAILFAST_TRANSPORT,
169#define BIO_RW_UNPLUG 4 169 BIO_RW_FAILFAST_DRIVER,
170#define BIO_RW_META 5 170 /* above flags must match REQ_* */
171#define BIO_RW_DISCARD 6 171 BIO_RW_AHEAD,
172#define BIO_RW_FAILFAST_DEV 7 172 BIO_RW_BARRIER,
173#define BIO_RW_FAILFAST_TRANSPORT 8 173 BIO_RW_SYNCIO,
174#define BIO_RW_FAILFAST_DRIVER 9 174 BIO_RW_UNPLUG,
175#define BIO_RW_NOIDLE 10 175 BIO_RW_META,
176 176 BIO_RW_DISCARD,
177#define bio_rw_flagged(bio, flag) ((bio)->bi_rw & (1 << (flag))) 177 BIO_RW_NOIDLE,
178};
178 179
179/* 180/*
180 * Old defines, these should eventually be replaced by direct usage of 181 * First four bits must match between bio->bi_rw and rq->cmd_flags, make
181 * bio_rw_flagged() 182 * that explicit here.
182 */ 183 */
183#define bio_barrier(bio) bio_rw_flagged(bio, BIO_RW_BARRIER) 184#define BIO_RW_RQ_MASK 0xf
184#define bio_sync(bio) bio_rw_flagged(bio, BIO_RW_SYNCIO) 185
185#define bio_unplug(bio) bio_rw_flagged(bio, BIO_RW_UNPLUG) 186static inline bool bio_rw_flagged(struct bio *bio, enum bio_rw_flags flag)
186#define bio_failfast_dev(bio) bio_rw_flagged(bio, BIO_RW_FAILFAST_DEV) 187{
187#define bio_failfast_transport(bio) \ 188 return (bio->bi_rw & (1 << flag)) != 0;
188 bio_rw_flagged(bio, BIO_RW_FAILFAST_TRANSPORT) 189}
189#define bio_failfast_driver(bio) \
190 bio_rw_flagged(bio, BIO_RW_FAILFAST_DRIVER)
191#define bio_rw_ahead(bio) bio_rw_flagged(bio, BIO_RW_AHEAD)
192#define bio_rw_meta(bio) bio_rw_flagged(bio, BIO_RW_META)
193#define bio_discard(bio) bio_rw_flagged(bio, BIO_RW_DISCARD)
194#define bio_noidle(bio) bio_rw_flagged(bio, BIO_RW_NOIDLE)
195 190
196/* 191/*
197 * upper 16 bits of bi_rw define the io priority of this bio 192 * upper 16 bits of bi_rw define the io priority of this bio
@@ -216,7 +211,7 @@ struct bio {
216#define bio_offset(bio) bio_iovec((bio))->bv_offset 211#define bio_offset(bio) bio_iovec((bio))->bv_offset
217#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx) 212#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
218#define bio_sectors(bio) ((bio)->bi_size >> 9) 213#define bio_sectors(bio) ((bio)->bi_size >> 9)
219#define bio_empty_barrier(bio) (bio_barrier(bio) && !bio_has_data(bio) && !bio_discard(bio)) 214#define bio_empty_barrier(bio) (bio_rw_flagged(bio, BIO_RW_BARRIER) && !bio_has_data(bio) && !bio_rw_flagged(bio, BIO_RW_DISCARD))
220 215
221static inline unsigned int bio_cur_bytes(struct bio *bio) 216static inline unsigned int bio_cur_bytes(struct bio *bio)
222{ 217{
diff --git a/include/linux/blk-iopoll.h b/include/linux/blk-iopoll.h
new file mode 100644
index 00000000000..308734d3d4a
--- /dev/null
+++ b/include/linux/blk-iopoll.h
@@ -0,0 +1,48 @@
1#ifndef BLK_IOPOLL_H
2#define BLK_IOPOLL_H
3
4struct blk_iopoll;
5typedef int (blk_iopoll_fn)(struct blk_iopoll *, int);
6
7struct blk_iopoll {
8 struct list_head list;
9 unsigned long state;
10 unsigned long data;
11 int weight;
12 int max;
13 blk_iopoll_fn *poll;
14};
15
16enum {
17 IOPOLL_F_SCHED = 0,
18 IOPOLL_F_DISABLE = 1,
19};
20
21/*
22 * Returns 0 if we successfully set the IOPOLL_F_SCHED bit, indicating
23 * that we were the first to acquire this iop for scheduling. If this iop
24 * is currently disabled, return "failure".
25 */
26static inline int blk_iopoll_sched_prep(struct blk_iopoll *iop)
27{
28 if (!test_bit(IOPOLL_F_DISABLE, &iop->state))
29 return test_and_set_bit(IOPOLL_F_SCHED, &iop->state);
30
31 return 1;
32}
33
34static inline int blk_iopoll_disable_pending(struct blk_iopoll *iop)
35{
36 return test_bit(IOPOLL_F_DISABLE, &iop->state);
37}
38
39extern void blk_iopoll_sched(struct blk_iopoll *);
40extern void blk_iopoll_init(struct blk_iopoll *, int, blk_iopoll_fn *);
41extern void blk_iopoll_complete(struct blk_iopoll *);
42extern void __blk_iopoll_complete(struct blk_iopoll *);
43extern void blk_iopoll_enable(struct blk_iopoll *);
44extern void blk_iopoll_disable(struct blk_iopoll *);
45
46extern int blk_iopoll_enabled;
47
48#endif
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 69103e053c9..e23a86cae5a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -86,13 +86,14 @@ enum {
86}; 86};
87 87
88/* 88/*
89 * request type modified bits. first two bits match BIO_RW* bits, important 89 * request type modified bits. first four bits match BIO_RW* bits, important
90 */ 90 */
91enum rq_flag_bits { 91enum rq_flag_bits {
92 __REQ_RW, /* not set, read. set, write */ 92 __REQ_RW, /* not set, read. set, write */
93 __REQ_FAILFAST_DEV, /* no driver retries of device errors */ 93 __REQ_FAILFAST_DEV, /* no driver retries of device errors */
94 __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ 94 __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
95 __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ 95 __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */
96 /* above flags must match BIO_RW_* */
96 __REQ_DISCARD, /* request to discard sectors */ 97 __REQ_DISCARD, /* request to discard sectors */
97 __REQ_SORTED, /* elevator knows about this request */ 98 __REQ_SORTED, /* elevator knows about this request */
98 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ 99 __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */
@@ -114,6 +115,7 @@ enum rq_flag_bits {
114 __REQ_INTEGRITY, /* integrity metadata has been remapped */ 115 __REQ_INTEGRITY, /* integrity metadata has been remapped */
115 __REQ_NOIDLE, /* Don't anticipate more IO after this one */ 116 __REQ_NOIDLE, /* Don't anticipate more IO after this one */
116 __REQ_IO_STAT, /* account I/O stat */ 117 __REQ_IO_STAT, /* account I/O stat */
118 __REQ_MIXED_MERGE, /* merge of different types, fail separately */
117 __REQ_NR_BITS, /* stops here */ 119 __REQ_NR_BITS, /* stops here */
118}; 120};
119 121
@@ -142,6 +144,10 @@ enum rq_flag_bits {
142#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) 144#define REQ_INTEGRITY (1 << __REQ_INTEGRITY)
143#define REQ_NOIDLE (1 << __REQ_NOIDLE) 145#define REQ_NOIDLE (1 << __REQ_NOIDLE)
144#define REQ_IO_STAT (1 << __REQ_IO_STAT) 146#define REQ_IO_STAT (1 << __REQ_IO_STAT)
147#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE)
148
149#define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \
150 REQ_FAILFAST_DRIVER)
145 151
146#define BLK_MAX_CDB 16 152#define BLK_MAX_CDB 16
147 153
@@ -453,10 +459,12 @@ struct request_queue
453#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ 459#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */
454#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ 460#define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */
455#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ 461#define QUEUE_FLAG_IO_STAT 15 /* do IO stats */
462#define QUEUE_FLAG_CQ 16 /* hardware does queuing */
456 463
457#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ 464#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
458 (1 << QUEUE_FLAG_CLUSTER) | \ 465 (1 << QUEUE_FLAG_CLUSTER) | \
459 (1 << QUEUE_FLAG_STACKABLE)) 466 (1 << QUEUE_FLAG_STACKABLE) | \
467 (1 << QUEUE_FLAG_SAME_COMP))
460 468
461static inline int queue_is_locked(struct request_queue *q) 469static inline int queue_is_locked(struct request_queue *q)
462{ 470{
@@ -575,6 +583,7 @@ enum {
575 583
576#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) 584#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
577#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) 585#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
586#define blk_queue_queuing(q) test_bit(QUEUE_FLAG_CQ, &(q)->queue_flags)
578#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) 587#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
579#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) 588#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
580#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) 589#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
@@ -828,11 +837,13 @@ static inline void blk_run_address_space(struct address_space *mapping)
828} 837}
829 838
830/* 839/*
831 * blk_rq_pos() : the current sector 840 * blk_rq_pos() : the current sector
832 * blk_rq_bytes() : bytes left in the entire request 841 * blk_rq_bytes() : bytes left in the entire request
833 * blk_rq_cur_bytes() : bytes left in the current segment 842 * blk_rq_cur_bytes() : bytes left in the current segment
834 * blk_rq_sectors() : sectors left in the entire request 843 * blk_rq_err_bytes() : bytes left till the next error boundary
835 * blk_rq_cur_sectors() : sectors left in the current segment 844 * blk_rq_sectors() : sectors left in the entire request
845 * blk_rq_cur_sectors() : sectors left in the current segment
846 * blk_rq_err_sectors() : sectors left till the next error boundary
836 */ 847 */
837static inline sector_t blk_rq_pos(const struct request *rq) 848static inline sector_t blk_rq_pos(const struct request *rq)
838{ 849{
@@ -849,6 +860,8 @@ static inline int blk_rq_cur_bytes(const struct request *rq)
849 return rq->bio ? bio_cur_bytes(rq->bio) : 0; 860 return rq->bio ? bio_cur_bytes(rq->bio) : 0;
850} 861}
851 862
863extern unsigned int blk_rq_err_bytes(const struct request *rq);
864
852static inline unsigned int blk_rq_sectors(const struct request *rq) 865static inline unsigned int blk_rq_sectors(const struct request *rq)
853{ 866{
854 return blk_rq_bytes(rq) >> 9; 867 return blk_rq_bytes(rq) >> 9;
@@ -859,6 +872,11 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
859 return blk_rq_cur_bytes(rq) >> 9; 872 return blk_rq_cur_bytes(rq) >> 9;
860} 873}
861 874
875static inline unsigned int blk_rq_err_sectors(const struct request *rq)
876{
877 return blk_rq_err_bytes(rq) >> 9;
878}
879
862/* 880/*
863 * Request issue related functions. 881 * Request issue related functions.
864 */ 882 */
@@ -885,10 +903,12 @@ extern bool blk_end_request(struct request *rq, int error,
885 unsigned int nr_bytes); 903 unsigned int nr_bytes);
886extern void blk_end_request_all(struct request *rq, int error); 904extern void blk_end_request_all(struct request *rq, int error);
887extern bool blk_end_request_cur(struct request *rq, int error); 905extern bool blk_end_request_cur(struct request *rq, int error);
906extern bool blk_end_request_err(struct request *rq, int error);
888extern bool __blk_end_request(struct request *rq, int error, 907extern bool __blk_end_request(struct request *rq, int error,
889 unsigned int nr_bytes); 908 unsigned int nr_bytes);
890extern void __blk_end_request_all(struct request *rq, int error); 909extern void __blk_end_request_all(struct request *rq, int error);
891extern bool __blk_end_request_cur(struct request *rq, int error); 910extern bool __blk_end_request_cur(struct request *rq, int error);
911extern bool __blk_end_request_err(struct request *rq, int error);
892 912
893extern void blk_complete_request(struct request *); 913extern void blk_complete_request(struct request *);
894extern void __blk_complete_request(struct request *); 914extern void __blk_complete_request(struct request *);
@@ -915,6 +935,7 @@ extern void blk_queue_alignment_offset(struct request_queue *q,
915 unsigned int alignment); 935 unsigned int alignment);
916extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); 936extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
917extern void blk_queue_io_min(struct request_queue *q, unsigned int min); 937extern void blk_queue_io_min(struct request_queue *q, unsigned int min);
938extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
918extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt); 939extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
919extern void blk_set_default_limits(struct queue_limits *lim); 940extern void blk_set_default_limits(struct queue_limits *lim);
920extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, 941extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
@@ -977,15 +998,18 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
977} 998}
978 999
979extern int blkdev_issue_flush(struct block_device *, sector_t *); 1000extern int blkdev_issue_flush(struct block_device *, sector_t *);
980extern int blkdev_issue_discard(struct block_device *, 1001#define DISCARD_FL_WAIT 0x01 /* wait for completion */
981 sector_t sector, sector_t nr_sects, gfp_t); 1002#define DISCARD_FL_BARRIER 0x02 /* issue DISCARD_BARRIER request */
1003extern int blkdev_issue_discard(struct block_device *, sector_t sector,
1004 sector_t nr_sects, gfp_t, int flags);
982 1005
983static inline int sb_issue_discard(struct super_block *sb, 1006static inline int sb_issue_discard(struct super_block *sb,
984 sector_t block, sector_t nr_blocks) 1007 sector_t block, sector_t nr_blocks)
985{ 1008{
986 block <<= (sb->s_blocksize_bits - 9); 1009 block <<= (sb->s_blocksize_bits - 9);
987 nr_blocks <<= (sb->s_blocksize_bits - 9); 1010 nr_blocks <<= (sb->s_blocksize_bits - 9);
988 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL); 1011 return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL,
1012 DISCARD_FL_BARRIER);
989} 1013}
990 1014
991extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); 1015extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 37f53216998..b21cf6b9c80 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -161,8 +161,8 @@ struct inodes_stat_t {
161 * These aren't really reads or writes, they pass down information about 161 * These aren't really reads or writes, they pass down information about
162 * parts of device that are now unused by the file system. 162 * parts of device that are now unused by the file system.
163 */ 163 */
164#define DISCARD_NOBARRIER (1 << BIO_RW_DISCARD) 164#define DISCARD_NOBARRIER (WRITE | (1 << BIO_RW_DISCARD))
165#define DISCARD_BARRIER ((1 << BIO_RW_DISCARD) | (1 << BIO_RW_BARRIER)) 165#define DISCARD_BARRIER (DISCARD_NOBARRIER | (1 << BIO_RW_BARRIER))
166 166
167#define SEL_IN 1 167#define SEL_IN 1
168#define SEL_OUT 2 168#define SEL_OUT 2
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 45fc320a53c..44263cb2712 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -98,7 +98,7 @@ struct hd_struct {
98 int make_it_fail; 98 int make_it_fail;
99#endif 99#endif
100 unsigned long stamp; 100 unsigned long stamp;
101 int in_flight; 101 int in_flight[2];
102#ifdef CONFIG_SMP 102#ifdef CONFIG_SMP
103 struct disk_stats *dkstats; 103 struct disk_stats *dkstats;
104#else 104#else
@@ -322,18 +322,23 @@ static inline void free_part_stats(struct hd_struct *part)
322#define part_stat_sub(cpu, gendiskp, field, subnd) \ 322#define part_stat_sub(cpu, gendiskp, field, subnd) \
323 part_stat_add(cpu, gendiskp, field, -subnd) 323 part_stat_add(cpu, gendiskp, field, -subnd)
324 324
325static inline void part_inc_in_flight(struct hd_struct *part) 325static inline void part_inc_in_flight(struct hd_struct *part, int rw)
326{ 326{
327 part->in_flight++; 327 part->in_flight[rw]++;
328 if (part->partno) 328 if (part->partno)
329 part_to_disk(part)->part0.in_flight++; 329 part_to_disk(part)->part0.in_flight[rw]++;
330} 330}
331 331
332static inline void part_dec_in_flight(struct hd_struct *part) 332static inline void part_dec_in_flight(struct hd_struct *part, int rw)
333{ 333{
334 part->in_flight--; 334 part->in_flight[rw]--;
335 if (part->partno) 335 if (part->partno)
336 part_to_disk(part)->part0.in_flight--; 336 part_to_disk(part)->part0.in_flight[rw]--;
337}
338
339static inline int part_in_flight(struct hd_struct *part)
340{
341 return part->in_flight[0] + part->in_flight[1];
337} 342}
338 343
339/* block/blk-core.c */ 344/* block/blk-core.c */
@@ -546,6 +551,8 @@ extern ssize_t part_size_show(struct device *dev,
546 struct device_attribute *attr, char *buf); 551 struct device_attribute *attr, char *buf);
547extern ssize_t part_stat_show(struct device *dev, 552extern ssize_t part_stat_show(struct device *dev,
548 struct device_attribute *attr, char *buf); 553 struct device_attribute *attr, char *buf);
554extern ssize_t part_inflight_show(struct device *dev,
555 struct device_attribute *attr, char *buf);
549#ifdef CONFIG_FAIL_MAKE_REQUEST 556#ifdef CONFIG_FAIL_MAKE_REQUEST
550extern ssize_t part_fail_show(struct device *dev, 557extern ssize_t part_fail_show(struct device *dev,
551 struct device_attribute *attr, char *buf); 558 struct device_attribute *attr, char *buf);
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 1ac57e522a1..8e9e151f811 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -348,6 +348,7 @@ enum
348 NET_TX_SOFTIRQ, 348 NET_TX_SOFTIRQ,
349 NET_RX_SOFTIRQ, 349 NET_RX_SOFTIRQ,
350 BLOCK_SOFTIRQ, 350 BLOCK_SOFTIRQ,
351 BLOCK_IOPOLL_SOFTIRQ,
351 TASKLET_SOFTIRQ, 352 TASKLET_SOFTIRQ,
352 SCHED_SOFTIRQ, 353 SCHED_SOFTIRQ,
353 HRTIMER_SOFTIRQ, 354 HRTIMER_SOFTIRQ,
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3125cff1c57..6bb59f70740 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -91,6 +91,7 @@ extern int sysctl_nr_trim_pages;
91#ifdef CONFIG_RCU_TORTURE_TEST 91#ifdef CONFIG_RCU_TORTURE_TEST
92extern int rcutorture_runnable; 92extern int rcutorture_runnable;
93#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */ 93#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
94extern int blk_iopoll_enabled;
94 95
95/* Constants used for minimum and maximum */ 96/* Constants used for minimum and maximum */
96#ifdef CONFIG_DETECT_SOFTLOCKUP 97#ifdef CONFIG_DETECT_SOFTLOCKUP
@@ -997,7 +998,14 @@ static struct ctl_table kern_table[] = {
997 .proc_handler = &proc_dointvec, 998 .proc_handler = &proc_dointvec,
998 }, 999 },
999#endif 1000#endif
1000 1001 {
1002 .ctl_name = CTL_UNNUMBERED,
1003 .procname = "blk_iopoll",
1004 .data = &blk_iopoll_enabled,
1005 .maxlen = sizeof(int),
1006 .mode = 0644,
1007 .proc_handler = &proc_dointvec,
1008 },
1001/* 1009/*
1002 * NOTE: do not add new entries to this table unless you have read 1010 * NOTE: do not add new entries to this table unless you have read
1003 * Documentation/sysctl/ctl_unnumbered.txt 1011 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8ffdc0d23c5..74f1102e874 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -161,7 +161,8 @@ static int discard_swap(struct swap_info_struct *si)
161 } 161 }
162 162
163 err = blkdev_issue_discard(si->bdev, start_block, 163 err = blkdev_issue_discard(si->bdev, start_block,
164 nr_blocks, GFP_KERNEL); 164 nr_blocks, GFP_KERNEL,
165 DISCARD_FL_BARRIER);
165 if (err) 166 if (err)
166 break; 167 break;
167 168
@@ -200,7 +201,8 @@ static void discard_swap_cluster(struct swap_info_struct *si,
200 start_block <<= PAGE_SHIFT - 9; 201 start_block <<= PAGE_SHIFT - 9;
201 nr_blocks <<= PAGE_SHIFT - 9; 202 nr_blocks <<= PAGE_SHIFT - 9;
202 if (blkdev_issue_discard(si->bdev, start_block, 203 if (blkdev_issue_discard(si->bdev, start_block,
203 nr_blocks, GFP_NOIO)) 204 nr_blocks, GFP_NOIO,
205 DISCARD_FL_BARRIER))
204 break; 206 break;
205 } 207 }
206 208