aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 20:04:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 20:04:23 -0400
commitce40be7a820bb393ac4ac69865f018d2f4038cf0 (patch)
treeb1fe5a93346eb06f22b1c303d63ec5456d7212ab /block
parentba0a5a36f60e4c1152af3a2ae2813251974405bf (diff)
parent02f3939e1a9357b7c370a4a69717cf9c02452737 (diff)
Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block
Pull block IO update from Jens Axboe: "Core block IO bits for 3.7. Not a huge round this time, it contains: - First series from Kent cleaning up and generalizing bio allocation and freeing. - WRITE_SAME support from Martin. - Mikulas patches to prevent O_DIRECT crashes when someone changes the block size of a device. - Make bio_split() work on data-less bio's (like trim/discards). - A few other minor fixups." Fixed up silent semantic mis-merge as per Mikulas Patocka and Andrew Morton. It is due to the VM no longer using a prio-tree (see commit 6b2dbba8b6ac: "mm: replace vma prio_tree with an interval tree"). So make set_blocksize() use mapping_mapped() instead of open-coding the internal VM knowledge that has changed. * 'for-3.7/core' of git://git.kernel.dk/linux-block: (26 commits) block: makes bio_split support bio without data scatterlist: refactor the sg_nents scatterlist: add sg_nents fs: fix include/percpu-rwsem.h export error percpu-rw-semaphore: fix documentation typos fs/block_dev.c:1644:5: sparse: symbol 'blkdev_mmap' was not declared blockdev: turn a rw semaphore into a percpu rw semaphore Fix a crash when block device is read and block size is changed at the same time block: fix request_queue->flags initialization block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue() block: ioctl to zero block ranges block: Make blkdev_issue_zeroout use WRITE SAME block: Implement support for WRITE SAME block: Consolidate command flag and queue limit checks for merges block: Clean up special command handling logic block/blk-tag.c: Remove useless kfree block: remove the duplicated setting for congestion_threshold block: reject invalid queue attribute values block: Add bio_clone_bioset(), bio_clone_kmalloc() block: Consolidate bio_alloc_bioset(), bio_kmalloc() ...
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c51
-rw-r--r--block/blk-lib.c104
-rw-r--r--block/blk-merge.c53
-rw-r--r--block/blk-settings.c16
-rw-r--r--block/blk-sysfs.c44
-rw-r--r--block/blk-tag.c6
-rw-r--r--block/blk.h5
-rw-r--r--block/elevator.c6
-rw-r--r--block/ioctl.c27
9 files changed, 236 insertions, 76 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index d2da64170513..a33870b1847b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -606,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
606 /* 606 /*
607 * A queue starts its life with bypass turned on to avoid 607 * A queue starts its life with bypass turned on to avoid
608 * unnecessary bypass on/off overhead and nasty surprises during 608 * unnecessary bypass on/off overhead and nasty surprises during
609 * init. The initial bypass will be finished at the end of 609 * init. The initial bypass will be finished when the queue is
610 * blk_init_allocated_queue(). 610 * registered by blk_register_queue().
611 */ 611 */
612 q->bypass_depth = 1; 612 q->bypass_depth = 1;
613 __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); 613 __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
@@ -694,7 +694,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
694 q->request_fn = rfn; 694 q->request_fn = rfn;
695 q->prep_rq_fn = NULL; 695 q->prep_rq_fn = NULL;
696 q->unprep_rq_fn = NULL; 696 q->unprep_rq_fn = NULL;
697 q->queue_flags = QUEUE_FLAG_DEFAULT; 697 q->queue_flags |= QUEUE_FLAG_DEFAULT;
698 698
699 /* Override internal queue lock with supplied lock pointer */ 699 /* Override internal queue lock with supplied lock pointer */
700 if (lock) 700 if (lock)
@@ -710,11 +710,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
710 /* init elevator */ 710 /* init elevator */
711 if (elevator_init(q, NULL)) 711 if (elevator_init(q, NULL))
712 return NULL; 712 return NULL;
713
714 blk_queue_congestion_threshold(q);
715
716 /* all done, end the initial bypass */
717 blk_queue_bypass_end(q);
718 return q; 713 return q;
719} 714}
720EXPORT_SYMBOL(blk_init_allocated_queue); 715EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1657,8 +1652,8 @@ generic_make_request_checks(struct bio *bio)
1657 goto end_io; 1652 goto end_io;
1658 } 1653 }
1659 1654
1660 if (unlikely(!(bio->bi_rw & REQ_DISCARD) && 1655 if (likely(bio_is_rw(bio) &&
1661 nr_sectors > queue_max_hw_sectors(q))) { 1656 nr_sectors > queue_max_hw_sectors(q))) {
1662 printk(KERN_ERR "bio too big device %s (%u > %u)\n", 1657 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1663 bdevname(bio->bi_bdev, b), 1658 bdevname(bio->bi_bdev, b),
1664 bio_sectors(bio), 1659 bio_sectors(bio),
@@ -1699,8 +1694,12 @@ generic_make_request_checks(struct bio *bio)
1699 1694
1700 if ((bio->bi_rw & REQ_DISCARD) && 1695 if ((bio->bi_rw & REQ_DISCARD) &&
1701 (!blk_queue_discard(q) || 1696 (!blk_queue_discard(q) ||
1702 ((bio->bi_rw & REQ_SECURE) && 1697 ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
1703 !blk_queue_secdiscard(q)))) { 1698 err = -EOPNOTSUPP;
1699 goto end_io;
1700 }
1701
1702 if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
1704 err = -EOPNOTSUPP; 1703 err = -EOPNOTSUPP;
1705 goto end_io; 1704 goto end_io;
1706 } 1705 }
@@ -1810,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request);
1810 */ 1809 */
1811void submit_bio(int rw, struct bio *bio) 1810void submit_bio(int rw, struct bio *bio)
1812{ 1811{
1813 int count = bio_sectors(bio);
1814
1815 bio->bi_rw |= rw; 1812 bio->bi_rw |= rw;
1816 1813
1817 /* 1814 /*
1818 * If it's a regular read/write or a barrier with data attached, 1815 * If it's a regular read/write or a barrier with data attached,
1819 * go through the normal accounting stuff before submission. 1816 * go through the normal accounting stuff before submission.
1820 */ 1817 */
1821 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { 1818 if (bio_has_data(bio)) {
1819 unsigned int count;
1820
1821 if (unlikely(rw & REQ_WRITE_SAME))
1822 count = bdev_logical_block_size(bio->bi_bdev) >> 9;
1823 else
1824 count = bio_sectors(bio);
1825
1822 if (rw & WRITE) { 1826 if (rw & WRITE) {
1823 count_vm_events(PGPGOUT, count); 1827 count_vm_events(PGPGOUT, count);
1824 } else { 1828 } else {
@@ -1864,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio);
1864 */ 1868 */
1865int blk_rq_check_limits(struct request_queue *q, struct request *rq) 1869int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1866{ 1870{
1867 if (rq->cmd_flags & REQ_DISCARD) 1871 if (!rq_mergeable(rq))
1868 return 0; 1872 return 0;
1869 1873
1870 if (blk_rq_sectors(rq) > queue_max_sectors(q) || 1874 if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
1871 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1872 printk(KERN_ERR "%s: over max size limit.\n", __func__); 1875 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1873 return -EIO; 1876 return -EIO;
1874 } 1877 }
@@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2340 req->buffer = bio_data(req->bio); 2343 req->buffer = bio_data(req->bio);
2341 2344
2342 /* update sector only for requests with clear definition of sector */ 2345 /* update sector only for requests with clear definition of sector */
2343 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) 2346 if (req->cmd_type == REQ_TYPE_FS)
2344 req->__sector += total_bytes >> 9; 2347 req->__sector += total_bytes >> 9;
2345 2348
2346 /* mixed attributes always follow the first bio */ 2349 /* mixed attributes always follow the first bio */
@@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2781 blk_rq_init(NULL, rq); 2784 blk_rq_init(NULL, rq);
2782 2785
2783 __rq_for_each_bio(bio_src, rq_src) { 2786 __rq_for_each_bio(bio_src, rq_src) {
2784 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); 2787 bio = bio_clone_bioset(bio_src, gfp_mask, bs);
2785 if (!bio) 2788 if (!bio)
2786 goto free_and_out; 2789 goto free_and_out;
2787 2790
2788 __bio_clone(bio, bio_src);
2789
2790 if (bio_integrity(bio_src) &&
2791 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2792 goto free_and_out;
2793
2794 if (bio_ctr && bio_ctr(bio, bio_src, data)) 2791 if (bio_ctr && bio_ctr(bio, bio_src, data))
2795 goto free_and_out; 2792 goto free_and_out;
2796 2793
@@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2807 2804
2808free_and_out: 2805free_and_out:
2809 if (bio) 2806 if (bio)
2810 bio_free(bio, bs); 2807 bio_put(bio);
2811 blk_rq_unprep_clone(rq); 2808 blk_rq_unprep_clone(rq);
2812 2809
2813 return -ENOMEM; 2810 return -ENOMEM;
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 19cc761cacb2..9373b58dfab1 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -130,6 +130,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
130EXPORT_SYMBOL(blkdev_issue_discard); 130EXPORT_SYMBOL(blkdev_issue_discard);
131 131
132/** 132/**
133 * blkdev_issue_write_same - queue a write same operation
134 * @bdev: target blockdev
135 * @sector: start sector
136 * @nr_sects: number of sectors to write
137 * @gfp_mask: memory allocation flags (for bio_alloc)
138 * @page: page containing data to write
139 *
140 * Description:
141 * Issue a write same request for the sectors in question.
142 */
143int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
144 sector_t nr_sects, gfp_t gfp_mask,
145 struct page *page)
146{
147 DECLARE_COMPLETION_ONSTACK(wait);
148 struct request_queue *q = bdev_get_queue(bdev);
149 unsigned int max_write_same_sectors;
150 struct bio_batch bb;
151 struct bio *bio;
152 int ret = 0;
153
154 if (!q)
155 return -ENXIO;
156
157 max_write_same_sectors = q->limits.max_write_same_sectors;
158
159 if (max_write_same_sectors == 0)
160 return -EOPNOTSUPP;
161
162 atomic_set(&bb.done, 1);
163 bb.flags = 1 << BIO_UPTODATE;
164 bb.wait = &wait;
165
166 while (nr_sects) {
167 bio = bio_alloc(gfp_mask, 1);
168 if (!bio) {
169 ret = -ENOMEM;
170 break;
171 }
172
173 bio->bi_sector = sector;
174 bio->bi_end_io = bio_batch_end_io;
175 bio->bi_bdev = bdev;
176 bio->bi_private = &bb;
177 bio->bi_vcnt = 1;
178 bio->bi_io_vec->bv_page = page;
179 bio->bi_io_vec->bv_offset = 0;
180 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
181
182 if (nr_sects > max_write_same_sectors) {
183 bio->bi_size = max_write_same_sectors << 9;
184 nr_sects -= max_write_same_sectors;
185 sector += max_write_same_sectors;
186 } else {
187 bio->bi_size = nr_sects << 9;
188 nr_sects = 0;
189 }
190
191 atomic_inc(&bb.done);
192 submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
193 }
194
195 /* Wait for bios in-flight */
196 if (!atomic_dec_and_test(&bb.done))
197 wait_for_completion(&wait);
198
199 if (!test_bit(BIO_UPTODATE, &bb.flags))
200 ret = -ENOTSUPP;
201
202 return ret;
203}
204EXPORT_SYMBOL(blkdev_issue_write_same);
205
206/**
133 * blkdev_issue_zeroout - generate number of zero filed write bios 207 * blkdev_issue_zeroout - generate number of zero filed write bios
134 * @bdev: blockdev to issue 208 * @bdev: blockdev to issue
135 * @sector: start sector 209 * @sector: start sector
@@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard);
140 * Generate and issue number of bios with zerofiled pages. 214 * Generate and issue number of bios with zerofiled pages.
141 */ 215 */
142 216
143int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 217int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
144 sector_t nr_sects, gfp_t gfp_mask) 218 sector_t nr_sects, gfp_t gfp_mask)
145{ 219{
146 int ret; 220 int ret;
@@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
190 264
191 return ret; 265 return ret;
192} 266}
267
268/**
269 * blkdev_issue_zeroout - zero-fill a block range
270 * @bdev: blockdev to write
271 * @sector: start sector
272 * @nr_sects: number of sectors to write
273 * @gfp_mask: memory allocation flags (for bio_alloc)
274 *
275 * Description:
276 * Generate and issue number of bios with zerofiled pages.
277 */
278
279int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
280 sector_t nr_sects, gfp_t gfp_mask)
281{
282 if (bdev_write_same(bdev)) {
283 unsigned char bdn[BDEVNAME_SIZE];
284
285 if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
286 ZERO_PAGE(0)))
287 return 0;
288
289 bdevname(bdev, bdn);
290 pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
291 }
292
293 return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
294}
193EXPORT_SYMBOL(blkdev_issue_zeroout); 295EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e76279e41162..936a110de0b9 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -275,14 +275,8 @@ no_merge:
275int ll_back_merge_fn(struct request_queue *q, struct request *req, 275int ll_back_merge_fn(struct request_queue *q, struct request *req,
276 struct bio *bio) 276 struct bio *bio)
277{ 277{
278 unsigned short max_sectors; 278 if (blk_rq_sectors(req) + bio_sectors(bio) >
279 279 blk_rq_get_max_sectors(req)) {
280 if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
281 max_sectors = queue_max_hw_sectors(q);
282 else
283 max_sectors = queue_max_sectors(q);
284
285 if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
286 req->cmd_flags |= REQ_NOMERGE; 280 req->cmd_flags |= REQ_NOMERGE;
287 if (req == q->last_merge) 281 if (req == q->last_merge)
288 q->last_merge = NULL; 282 q->last_merge = NULL;
@@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
299int ll_front_merge_fn(struct request_queue *q, struct request *req, 293int ll_front_merge_fn(struct request_queue *q, struct request *req,
300 struct bio *bio) 294 struct bio *bio)
301{ 295{
302 unsigned short max_sectors; 296 if (blk_rq_sectors(req) + bio_sectors(bio) >
303 297 blk_rq_get_max_sectors(req)) {
304 if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
305 max_sectors = queue_max_hw_sectors(q);
306 else
307 max_sectors = queue_max_sectors(q);
308
309
310 if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
311 req->cmd_flags |= REQ_NOMERGE; 298 req->cmd_flags |= REQ_NOMERGE;
312 if (req == q->last_merge) 299 if (req == q->last_merge)
313 q->last_merge = NULL; 300 q->last_merge = NULL;
@@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
338 /* 325 /*
339 * Will it become too large? 326 * Will it become too large?
340 */ 327 */
341 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q)) 328 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
329 blk_rq_get_max_sectors(req))
342 return 0; 330 return 0;
343 331
344 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 332 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -417,16 +405,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
417 if (!rq_mergeable(req) || !rq_mergeable(next)) 405 if (!rq_mergeable(req) || !rq_mergeable(next))
418 return 0; 406 return 0;
419 407
420 /* 408 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
421 * Don't merge file system requests and discard requests
422 */
423 if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD))
424 return 0;
425
426 /*
427 * Don't merge discard requests and secure discard requests
428 */
429 if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE))
430 return 0; 409 return 0;
431 410
432 /* 411 /*
@@ -440,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
440 || next->special) 419 || next->special)
441 return 0; 420 return 0;
442 421
422 if (req->cmd_flags & REQ_WRITE_SAME &&
423 !blk_write_same_mergeable(req->bio, next->bio))
424 return 0;
425
443 /* 426 /*
444 * If we are allowed to merge, then append bio list 427 * If we are allowed to merge, then append bio list
445 * from next to rq and release next. merge_requests_fn 428 * from next to rq and release next. merge_requests_fn
@@ -521,15 +504,10 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
521 504
522bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 505bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
523{ 506{
524 if (!rq_mergeable(rq)) 507 if (!rq_mergeable(rq) || !bio_mergeable(bio))
525 return false; 508 return false;
526 509
527 /* don't merge file system requests and discard requests */ 510 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
528 if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
529 return false;
530
531 /* don't merge discard requests and secure discard requests */
532 if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
533 return false; 511 return false;
534 512
535 /* different data direction or already started, don't merge */ 513 /* different data direction or already started, don't merge */
@@ -544,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
544 if (bio_integrity(bio) != blk_integrity_rq(rq)) 522 if (bio_integrity(bio) != blk_integrity_rq(rq))
545 return false; 523 return false;
546 524
525 /* must be using the same buffer */
526 if (rq->cmd_flags & REQ_WRITE_SAME &&
527 !blk_write_same_mergeable(rq->bio, bio))
528 return false;
529
547 return true; 530 return true;
548} 531}
549 532
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 565a6786032f..779bb7646bcd 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
113 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 113 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
114 lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; 114 lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
115 lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; 115 lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
116 lim->max_write_same_sectors = 0;
116 lim->max_discard_sectors = 0; 117 lim->max_discard_sectors = 0;
117 lim->discard_granularity = 0; 118 lim->discard_granularity = 0;
118 lim->discard_alignment = 0; 119 lim->discard_alignment = 0;
@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
144 lim->max_segments = USHRT_MAX; 145 lim->max_segments = USHRT_MAX;
145 lim->max_hw_sectors = UINT_MAX; 146 lim->max_hw_sectors = UINT_MAX;
146 lim->max_sectors = UINT_MAX; 147 lim->max_sectors = UINT_MAX;
148 lim->max_write_same_sectors = UINT_MAX;
147} 149}
148EXPORT_SYMBOL(blk_set_stacking_limits); 150EXPORT_SYMBOL(blk_set_stacking_limits);
149 151
@@ -286,6 +288,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
286EXPORT_SYMBOL(blk_queue_max_discard_sectors); 288EXPORT_SYMBOL(blk_queue_max_discard_sectors);
287 289
288/** 290/**
291 * blk_queue_max_write_same_sectors - set max sectors for a single write same
292 * @q: the request queue for the device
293 * @max_write_same_sectors: maximum number of sectors to write per command
294 **/
295void blk_queue_max_write_same_sectors(struct request_queue *q,
296 unsigned int max_write_same_sectors)
297{
298 q->limits.max_write_same_sectors = max_write_same_sectors;
299}
300EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
301
302/**
289 * blk_queue_max_segments - set max hw segments for a request for this queue 303 * blk_queue_max_segments - set max hw segments for a request for this queue
290 * @q: the request queue for the device 304 * @q: the request queue for the device
291 * @max_segments: max number of segments 305 * @max_segments: max number of segments
@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
510 524
511 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); 525 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
512 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); 526 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
527 t->max_write_same_sectors = min(t->max_write_same_sectors,
528 b->max_write_same_sectors);
513 t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); 529 t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
514 530
515 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, 531 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9628b291f960..ce6204608822 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -26,9 +26,15 @@ queue_var_show(unsigned long var, char *page)
26static ssize_t 26static ssize_t
27queue_var_store(unsigned long *var, const char *page, size_t count) 27queue_var_store(unsigned long *var, const char *page, size_t count)
28{ 28{
29 char *p = (char *) page; 29 int err;
30 unsigned long v;
31
32 err = strict_strtoul(page, 10, &v);
33 if (err || v > UINT_MAX)
34 return -EINVAL;
35
36 *var = v;
30 37
31 *var = simple_strtoul(p, &p, 10);
32 return count; 38 return count;
33} 39}
34 40
@@ -48,6 +54,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
48 return -EINVAL; 54 return -EINVAL;
49 55
50 ret = queue_var_store(&nr, page, count); 56 ret = queue_var_store(&nr, page, count);
57 if (ret < 0)
58 return ret;
59
51 if (nr < BLKDEV_MIN_RQ) 60 if (nr < BLKDEV_MIN_RQ)
52 nr = BLKDEV_MIN_RQ; 61 nr = BLKDEV_MIN_RQ;
53 62
@@ -102,6 +111,9 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
102 unsigned long ra_kb; 111 unsigned long ra_kb;
103 ssize_t ret = queue_var_store(&ra_kb, page, count); 112 ssize_t ret = queue_var_store(&ra_kb, page, count);
104 113
114 if (ret < 0)
115 return ret;
116
105 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); 117 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
106 118
107 return ret; 119 return ret;
@@ -168,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
168 return queue_var_show(queue_discard_zeroes_data(q), page); 180 return queue_var_show(queue_discard_zeroes_data(q), page);
169} 181}
170 182
183static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
184{
185 return sprintf(page, "%llu\n",
186 (unsigned long long)q->limits.max_write_same_sectors << 9);
187}
188
189
171static ssize_t 190static ssize_t
172queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) 191queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
173{ 192{
@@ -176,6 +195,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
176 page_kb = 1 << (PAGE_CACHE_SHIFT - 10); 195 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
177 ssize_t ret = queue_var_store(&max_sectors_kb, page, count); 196 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
178 197
198 if (ret < 0)
199 return ret;
200
179 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) 201 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
180 return -EINVAL; 202 return -EINVAL;
181 203
@@ -236,6 +258,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
236 unsigned long nm; 258 unsigned long nm;
237 ssize_t ret = queue_var_store(&nm, page, count); 259 ssize_t ret = queue_var_store(&nm, page, count);
238 260
261 if (ret < 0)
262 return ret;
263
239 spin_lock_irq(q->queue_lock); 264 spin_lock_irq(q->queue_lock);
240 queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 265 queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
241 queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); 266 queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
@@ -264,6 +289,9 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
264 unsigned long val; 289 unsigned long val;
265 290
266 ret = queue_var_store(&val, page, count); 291 ret = queue_var_store(&val, page, count);
292 if (ret < 0)
293 return ret;
294
267 spin_lock_irq(q->queue_lock); 295 spin_lock_irq(q->queue_lock);
268 if (val == 2) { 296 if (val == 2) {
269 queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 297 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
@@ -364,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
364 .show = queue_discard_zeroes_data_show, 392 .show = queue_discard_zeroes_data_show,
365}; 393};
366 394
395static struct queue_sysfs_entry queue_write_same_max_entry = {
396 .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
397 .show = queue_write_same_max_show,
398};
399
367static struct queue_sysfs_entry queue_nonrot_entry = { 400static struct queue_sysfs_entry queue_nonrot_entry = {
368 .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, 401 .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
369 .show = queue_show_nonrot, 402 .show = queue_show_nonrot,
@@ -411,6 +444,7 @@ static struct attribute *default_attrs[] = {
411 &queue_discard_granularity_entry.attr, 444 &queue_discard_granularity_entry.attr,
412 &queue_discard_max_entry.attr, 445 &queue_discard_max_entry.attr,
413 &queue_discard_zeroes_data_entry.attr, 446 &queue_discard_zeroes_data_entry.attr,
447 &queue_write_same_max_entry.attr,
414 &queue_nonrot_entry.attr, 448 &queue_nonrot_entry.attr,
415 &queue_nomerges_entry.attr, 449 &queue_nomerges_entry.attr,
416 &queue_rq_affinity_entry.attr, 450 &queue_rq_affinity_entry.attr,
@@ -527,6 +561,12 @@ int blk_register_queue(struct gendisk *disk)
527 if (WARN_ON(!q)) 561 if (WARN_ON(!q))
528 return -ENXIO; 562 return -ENXIO;
529 563
564 /*
565 * Initialization must be complete by now. Finish the initial
566 * bypass from queue allocation.
567 */
568 blk_queue_bypass_end(q);
569
530 ret = blk_trace_init_sysfs(dev); 570 ret = blk_trace_init_sysfs(dev);
531 if (ret) 571 if (ret)
532 return ret; 572 return ret;
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 4af6f5cc1167..cc345e1d8d4e 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -186,7 +186,8 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
186 tags = __blk_queue_init_tags(q, depth); 186 tags = __blk_queue_init_tags(q, depth);
187 187
188 if (!tags) 188 if (!tags)
189 goto fail; 189 return -ENOMEM;
190
190 } else if (q->queue_tags) { 191 } else if (q->queue_tags) {
191 rc = blk_queue_resize_tags(q, depth); 192 rc = blk_queue_resize_tags(q, depth);
192 if (rc) 193 if (rc)
@@ -203,9 +204,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
203 queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); 204 queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
204 INIT_LIST_HEAD(&q->tag_busy_list); 205 INIT_LIST_HEAD(&q->tag_busy_list);
205 return 0; 206 return 0;
206fail:
207 kfree(tags);
208 return -ENOMEM;
209} 207}
210EXPORT_SYMBOL(blk_queue_init_tags); 208EXPORT_SYMBOL(blk_queue_init_tags);
211 209
diff --git a/block/blk.h b/block/blk.h
index 2a0ea32d249f..ca51543b248c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
171 * 171 *
172 * a) it's attached to a gendisk, and 172 * a) it's attached to a gendisk, and
173 * b) the queue had IO stats enabled when this request was started, and 173 * b) the queue had IO stats enabled when this request was started, and
174 * c) it's a file system request or a discard request 174 * c) it's a file system request
175 */ 175 */
176static inline int blk_do_io_stat(struct request *rq) 176static inline int blk_do_io_stat(struct request *rq)
177{ 177{
178 return rq->rq_disk && 178 return rq->rq_disk &&
179 (rq->cmd_flags & REQ_IO_STAT) && 179 (rq->cmd_flags & REQ_IO_STAT) &&
180 (rq->cmd_type == REQ_TYPE_FS || 180 (rq->cmd_type == REQ_TYPE_FS);
181 (rq->cmd_flags & REQ_DISCARD));
182} 181}
183 182
184/* 183/*
diff --git a/block/elevator.c b/block/elevator.c
index 6a55d418896f..9b1d42b62f20 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
562 562
563 if (rq->cmd_flags & REQ_SOFTBARRIER) { 563 if (rq->cmd_flags & REQ_SOFTBARRIER) {
564 /* barriers are scheduling boundary, update end_sector */ 564 /* barriers are scheduling boundary, update end_sector */
565 if (rq->cmd_type == REQ_TYPE_FS || 565 if (rq->cmd_type == REQ_TYPE_FS) {
566 (rq->cmd_flags & REQ_DISCARD)) {
567 q->end_sector = rq_end_sector(rq); 566 q->end_sector = rq_end_sector(rq);
568 q->boundary_rq = rq; 567 q->boundary_rq = rq;
569 } 568 }
@@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
605 if (elv_attempt_insert_merge(q, rq)) 604 if (elv_attempt_insert_merge(q, rq))
606 break; 605 break;
607 case ELEVATOR_INSERT_SORT: 606 case ELEVATOR_INSERT_SORT:
608 BUG_ON(rq->cmd_type != REQ_TYPE_FS && 607 BUG_ON(rq->cmd_type != REQ_TYPE_FS);
609 !(rq->cmd_flags & REQ_DISCARD));
610 rq->cmd_flags |= REQ_SORTED; 608 rq->cmd_flags |= REQ_SORTED;
611 q->nr_sorted++; 609 q->nr_sorted++;
612 if (rq_mergeable(rq)) { 610 if (rq_mergeable(rq)) {
diff --git a/block/ioctl.c b/block/ioctl.c
index 4a85096f5410..a31d91d9bc5a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -185,6 +185,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
185 return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); 185 return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
186} 186}
187 187
188static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
189 uint64_t len)
190{
191 if (start & 511)
192 return -EINVAL;
193 if (len & 511)
194 return -EINVAL;
195 start >>= 9;
196 len >>= 9;
197
198 if (start + len > (i_size_read(bdev->bd_inode) >> 9))
199 return -EINVAL;
200
201 return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
202}
203
188static int put_ushort(unsigned long arg, unsigned short val) 204static int put_ushort(unsigned long arg, unsigned short val)
189{ 205{
190 return put_user(val, (unsigned short __user *)arg); 206 return put_user(val, (unsigned short __user *)arg);
@@ -300,6 +316,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
300 return blk_ioctl_discard(bdev, range[0], range[1], 316 return blk_ioctl_discard(bdev, range[0], range[1],
301 cmd == BLKSECDISCARD); 317 cmd == BLKSECDISCARD);
302 } 318 }
319 case BLKZEROOUT: {
320 uint64_t range[2];
321
322 if (!(mode & FMODE_WRITE))
323 return -EBADF;
324
325 if (copy_from_user(range, (void __user *)arg, sizeof(range)))
326 return -EFAULT;
327
328 return blk_ioctl_zeroout(bdev, range[0], range[1]);
329 }
303 330
304 case HDIO_GETGEO: { 331 case HDIO_GETGEO: {
305 struct hd_geometry geo; 332 struct hd_geometry geo;