aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 20:04:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 20:04:23 -0400
commitce40be7a820bb393ac4ac69865f018d2f4038cf0 (patch)
treeb1fe5a93346eb06f22b1c303d63ec5456d7212ab
parentba0a5a36f60e4c1152af3a2ae2813251974405bf (diff)
parent02f3939e1a9357b7c370a4a69717cf9c02452737 (diff)
Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block
Pull block IO update from Jens Axboe: "Core block IO bits for 3.7. Not a huge round this time, it contains: - First series from Kent cleaning up and generalizing bio allocation and freeing. - WRITE_SAME support from Martin. - Mikulas patches to prevent O_DIRECT crashes when someone changes the block size of a device. - Make bio_split() work on data-less bio's (like trim/discards). - A few other minor fixups." Fixed up silent semantic mis-merge as per Mikulas Patocka and Andrew Morton. It is due to the VM no longer using a prio-tree (see commit 6b2dbba8b6ac: "mm: replace vma prio_tree with an interval tree"). So make set_blocksize() use mapping_mapped() instead of open-coding the internal VM knowledge that has changed. * 'for-3.7/core' of git://git.kernel.dk/linux-block: (26 commits) block: makes bio_split support bio without data scatterlist: refactor the sg_nents scatterlist: add sg_nents fs: fix include/percpu-rwsem.h export error percpu-rw-semaphore: fix documentation typos fs/block_dev.c:1644:5: sparse: symbol 'blkdev_mmap' was not declared blockdev: turn a rw semaphore into a percpu rw semaphore Fix a crash when block device is read and block size is changed at the same time block: fix request_queue->flags initialization block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue() block: ioctl to zero block ranges block: Make blkdev_issue_zeroout use WRITE SAME block: Implement support for WRITE SAME block: Consolidate command flag and queue limit checks for merges block: Clean up special command handling logic block/blk-tag.c: Remove useless kfree block: remove the duplicated setting for congestion_threshold block: reject invalid queue attribute values block: Add bio_clone_bioset(), bio_clone_kmalloc() block: Consolidate bio_alloc_bioset(), bio_kmalloc() ...
-rw-r--r--Documentation/ABI/testing/sysfs-block14
-rw-r--r--Documentation/block/biodoc.txt5
-rw-r--r--Documentation/percpu-rw-semaphore.txt27
-rw-r--r--block/blk-core.c51
-rw-r--r--block/blk-lib.c104
-rw-r--r--block/blk-merge.c53
-rw-r--r--block/blk-settings.c16
-rw-r--r--block/blk-sysfs.c44
-rw-r--r--block/blk-tag.c6
-rw-r--r--block/blk.h5
-rw-r--r--block/elevator.c6
-rw-r--r--block/ioctl.c27
-rw-r--r--drivers/block/drbd/drbd_main.c13
-rw-r--r--drivers/block/osdblk.c3
-rw-r--r--drivers/block/pktcdvd.c52
-rw-r--r--drivers/char/raw.c2
-rw-r--r--drivers/md/dm-crypt.c16
-rw-r--r--drivers/md/dm-io.c11
-rw-r--r--drivers/md/dm.c74
-rw-r--r--drivers/md/md.c44
-rw-r--r--drivers/md/raid0.c1
-rw-r--r--drivers/target/target_core_iblock.c9
-rw-r--r--fs/bio-integrity.c44
-rw-r--r--fs/bio.c231
-rw-r--r--fs/block_dev.c68
-rw-r--r--fs/exofs/ore.c5
-rw-r--r--include/linux/bio.h70
-rw-r--r--include/linux/blk_types.h36
-rw-r--r--include/linux/blkdev.h82
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/percpu-rwsem.h89
-rw-r--r--include/linux/scatterlist.h1
-rw-r--r--lib/scatterlist.c19
33 files changed, 770 insertions, 464 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index c1eb41cb9876..279da08f7541 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -206,3 +206,17 @@ Description:
206 when a discarded area is read the discard_zeroes_data 206 when a discarded area is read the discard_zeroes_data
207 parameter will be set to one. Otherwise it will be 0 and 207 parameter will be set to one. Otherwise it will be 0 and
208 the result of reading a discarded area is undefined. 208 the result of reading a discarded area is undefined.
209
210What: /sys/block/<disk>/queue/write_same_max_bytes
211Date: January 2012
212Contact: Martin K. Petersen <martin.petersen@oracle.com>
213Description:
214 Some devices support a write same operation in which a
215 single data block can be written to a range of several
216 contiguous blocks on storage. This can be used to wipe
217 areas on disk or to initialize drives in a RAID
218 configuration. write_same_max_bytes indicates how many
219 bytes can be written in a single write same command. If
220 write_same_max_bytes is 0, write same is not supported
221 by the device.
222
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index e418dc0a7086..8df5e8e6dceb 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -465,7 +465,6 @@ struct bio {
465 bio_end_io_t *bi_end_io; /* bi_end_io (bio) */ 465 bio_end_io_t *bi_end_io; /* bi_end_io (bio) */
466 atomic_t bi_cnt; /* pin count: free when it hits zero */ 466 atomic_t bi_cnt; /* pin count: free when it hits zero */
467 void *bi_private; 467 void *bi_private;
468 bio_destructor_t *bi_destructor; /* bi_destructor (bio) */
469}; 468};
470 469
471With this multipage bio design: 470With this multipage bio design:
@@ -647,10 +646,6 @@ for a non-clone bio. There are the 6 pools setup for different size biovecs,
647so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the 646so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
648given size from these slabs. 647given size from these slabs.
649 648
650The bi_destructor() routine takes into account the possibility of the bio
651having originated from a different source (see later discussions on
652n/w to block transfers and kvec_cb)
653
654The bio_get() routine may be used to hold an extra reference on a bio prior 649The bio_get() routine may be used to hold an extra reference on a bio prior
655to i/o submission, if the bio fields are likely to be accessed after the 650to i/o submission, if the bio fields are likely to be accessed after the
656i/o is issued (since the bio may otherwise get freed in case i/o completion 651i/o is issued (since the bio may otherwise get freed in case i/o completion
diff --git a/Documentation/percpu-rw-semaphore.txt b/Documentation/percpu-rw-semaphore.txt
new file mode 100644
index 000000000000..7d3c82431909
--- /dev/null
+++ b/Documentation/percpu-rw-semaphore.txt
@@ -0,0 +1,27 @@
1Percpu rw semaphores
2--------------------
3
4Percpu rw semaphores is a new read-write semaphore design that is
5optimized for locking for reading.
6
7The problem with traditional read-write semaphores is that when multiple
8cores take the lock for reading, the cache line containing the semaphore
9is bouncing between L1 caches of the cores, causing performance
10degradation.
11
12Locking for reading is very fast, it uses RCU and it avoids any atomic
13instruction in the lock and unlock path. On the other hand, locking for
14writing is very expensive, it calls synchronize_rcu() that can take
15hundreds of milliseconds.
16
17The lock is declared with "struct percpu_rw_semaphore" type.
18The lock is initialized percpu_init_rwsem, it returns 0 on success and
19-ENOMEM on allocation failure.
20The lock must be freed with percpu_free_rwsem to avoid memory leak.
21
22The lock is locked for read with percpu_down_read, percpu_up_read and
23for write with percpu_down_write, percpu_up_write.
24
25The idea of using RCU for optimized rw-lock was introduced by
26Eric Dumazet <eric.dumazet@gmail.com>.
27The code was written by Mikulas Patocka <mpatocka@redhat.com>
diff --git a/block/blk-core.c b/block/blk-core.c
index d2da64170513..a33870b1847b 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -606,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
606 /* 606 /*
607 * A queue starts its life with bypass turned on to avoid 607 * A queue starts its life with bypass turned on to avoid
608 * unnecessary bypass on/off overhead and nasty surprises during 608 * unnecessary bypass on/off overhead and nasty surprises during
609 * init. The initial bypass will be finished at the end of 609 * init. The initial bypass will be finished when the queue is
610 * blk_init_allocated_queue(). 610 * registered by blk_register_queue().
611 */ 611 */
612 q->bypass_depth = 1; 612 q->bypass_depth = 1;
613 __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); 613 __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
@@ -694,7 +694,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
694 q->request_fn = rfn; 694 q->request_fn = rfn;
695 q->prep_rq_fn = NULL; 695 q->prep_rq_fn = NULL;
696 q->unprep_rq_fn = NULL; 696 q->unprep_rq_fn = NULL;
697 q->queue_flags = QUEUE_FLAG_DEFAULT; 697 q->queue_flags |= QUEUE_FLAG_DEFAULT;
698 698
699 /* Override internal queue lock with supplied lock pointer */ 699 /* Override internal queue lock with supplied lock pointer */
700 if (lock) 700 if (lock)
@@ -710,11 +710,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
710 /* init elevator */ 710 /* init elevator */
711 if (elevator_init(q, NULL)) 711 if (elevator_init(q, NULL))
712 return NULL; 712 return NULL;
713
714 blk_queue_congestion_threshold(q);
715
716 /* all done, end the initial bypass */
717 blk_queue_bypass_end(q);
718 return q; 713 return q;
719} 714}
720EXPORT_SYMBOL(blk_init_allocated_queue); 715EXPORT_SYMBOL(blk_init_allocated_queue);
@@ -1657,8 +1652,8 @@ generic_make_request_checks(struct bio *bio)
1657 goto end_io; 1652 goto end_io;
1658 } 1653 }
1659 1654
1660 if (unlikely(!(bio->bi_rw & REQ_DISCARD) && 1655 if (likely(bio_is_rw(bio) &&
1661 nr_sectors > queue_max_hw_sectors(q))) { 1656 nr_sectors > queue_max_hw_sectors(q))) {
1662 printk(KERN_ERR "bio too big device %s (%u > %u)\n", 1657 printk(KERN_ERR "bio too big device %s (%u > %u)\n",
1663 bdevname(bio->bi_bdev, b), 1658 bdevname(bio->bi_bdev, b),
1664 bio_sectors(bio), 1659 bio_sectors(bio),
@@ -1699,8 +1694,12 @@ generic_make_request_checks(struct bio *bio)
1699 1694
1700 if ((bio->bi_rw & REQ_DISCARD) && 1695 if ((bio->bi_rw & REQ_DISCARD) &&
1701 (!blk_queue_discard(q) || 1696 (!blk_queue_discard(q) ||
1702 ((bio->bi_rw & REQ_SECURE) && 1697 ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
1703 !blk_queue_secdiscard(q)))) { 1698 err = -EOPNOTSUPP;
1699 goto end_io;
1700 }
1701
1702 if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
1704 err = -EOPNOTSUPP; 1703 err = -EOPNOTSUPP;
1705 goto end_io; 1704 goto end_io;
1706 } 1705 }
@@ -1810,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request);
1810 */ 1809 */
1811void submit_bio(int rw, struct bio *bio) 1810void submit_bio(int rw, struct bio *bio)
1812{ 1811{
1813 int count = bio_sectors(bio);
1814
1815 bio->bi_rw |= rw; 1812 bio->bi_rw |= rw;
1816 1813
1817 /* 1814 /*
1818 * If it's a regular read/write or a barrier with data attached, 1815 * If it's a regular read/write or a barrier with data attached,
1819 * go through the normal accounting stuff before submission. 1816 * go through the normal accounting stuff before submission.
1820 */ 1817 */
1821 if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { 1818 if (bio_has_data(bio)) {
1819 unsigned int count;
1820
1821 if (unlikely(rw & REQ_WRITE_SAME))
1822 count = bdev_logical_block_size(bio->bi_bdev) >> 9;
1823 else
1824 count = bio_sectors(bio);
1825
1822 if (rw & WRITE) { 1826 if (rw & WRITE) {
1823 count_vm_events(PGPGOUT, count); 1827 count_vm_events(PGPGOUT, count);
1824 } else { 1828 } else {
@@ -1864,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio);
1864 */ 1868 */
1865int blk_rq_check_limits(struct request_queue *q, struct request *rq) 1869int blk_rq_check_limits(struct request_queue *q, struct request *rq)
1866{ 1870{
1867 if (rq->cmd_flags & REQ_DISCARD) 1871 if (!rq_mergeable(rq))
1868 return 0; 1872 return 0;
1869 1873
1870 if (blk_rq_sectors(rq) > queue_max_sectors(q) || 1874 if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
1871 blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
1872 printk(KERN_ERR "%s: over max size limit.\n", __func__); 1875 printk(KERN_ERR "%s: over max size limit.\n", __func__);
1873 return -EIO; 1876 return -EIO;
1874 } 1877 }
@@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
2340 req->buffer = bio_data(req->bio); 2343 req->buffer = bio_data(req->bio);
2341 2344
2342 /* update sector only for requests with clear definition of sector */ 2345 /* update sector only for requests with clear definition of sector */
2343 if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) 2346 if (req->cmd_type == REQ_TYPE_FS)
2344 req->__sector += total_bytes >> 9; 2347 req->__sector += total_bytes >> 9;
2345 2348
2346 /* mixed attributes always follow the first bio */ 2349 /* mixed attributes always follow the first bio */
@@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2781 blk_rq_init(NULL, rq); 2784 blk_rq_init(NULL, rq);
2782 2785
2783 __rq_for_each_bio(bio_src, rq_src) { 2786 __rq_for_each_bio(bio_src, rq_src) {
2784 bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); 2787 bio = bio_clone_bioset(bio_src, gfp_mask, bs);
2785 if (!bio) 2788 if (!bio)
2786 goto free_and_out; 2789 goto free_and_out;
2787 2790
2788 __bio_clone(bio, bio_src);
2789
2790 if (bio_integrity(bio_src) &&
2791 bio_integrity_clone(bio, bio_src, gfp_mask, bs))
2792 goto free_and_out;
2793
2794 if (bio_ctr && bio_ctr(bio, bio_src, data)) 2791 if (bio_ctr && bio_ctr(bio, bio_src, data))
2795 goto free_and_out; 2792 goto free_and_out;
2796 2793
@@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
2807 2804
2808free_and_out: 2805free_and_out:
2809 if (bio) 2806 if (bio)
2810 bio_free(bio, bs); 2807 bio_put(bio);
2811 blk_rq_unprep_clone(rq); 2808 blk_rq_unprep_clone(rq);
2812 2809
2813 return -ENOMEM; 2810 return -ENOMEM;
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 19cc761cacb2..9373b58dfab1 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -130,6 +130,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
130EXPORT_SYMBOL(blkdev_issue_discard); 130EXPORT_SYMBOL(blkdev_issue_discard);
131 131
132/** 132/**
133 * blkdev_issue_write_same - queue a write same operation
134 * @bdev: target blockdev
135 * @sector: start sector
136 * @nr_sects: number of sectors to write
137 * @gfp_mask: memory allocation flags (for bio_alloc)
138 * @page: page containing data to write
139 *
140 * Description:
141 * Issue a write same request for the sectors in question.
142 */
143int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
144 sector_t nr_sects, gfp_t gfp_mask,
145 struct page *page)
146{
147 DECLARE_COMPLETION_ONSTACK(wait);
148 struct request_queue *q = bdev_get_queue(bdev);
149 unsigned int max_write_same_sectors;
150 struct bio_batch bb;
151 struct bio *bio;
152 int ret = 0;
153
154 if (!q)
155 return -ENXIO;
156
157 max_write_same_sectors = q->limits.max_write_same_sectors;
158
159 if (max_write_same_sectors == 0)
160 return -EOPNOTSUPP;
161
162 atomic_set(&bb.done, 1);
163 bb.flags = 1 << BIO_UPTODATE;
164 bb.wait = &wait;
165
166 while (nr_sects) {
167 bio = bio_alloc(gfp_mask, 1);
168 if (!bio) {
169 ret = -ENOMEM;
170 break;
171 }
172
173 bio->bi_sector = sector;
174 bio->bi_end_io = bio_batch_end_io;
175 bio->bi_bdev = bdev;
176 bio->bi_private = &bb;
177 bio->bi_vcnt = 1;
178 bio->bi_io_vec->bv_page = page;
179 bio->bi_io_vec->bv_offset = 0;
180 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
181
182 if (nr_sects > max_write_same_sectors) {
183 bio->bi_size = max_write_same_sectors << 9;
184 nr_sects -= max_write_same_sectors;
185 sector += max_write_same_sectors;
186 } else {
187 bio->bi_size = nr_sects << 9;
188 nr_sects = 0;
189 }
190
191 atomic_inc(&bb.done);
192 submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio);
193 }
194
195 /* Wait for bios in-flight */
196 if (!atomic_dec_and_test(&bb.done))
197 wait_for_completion(&wait);
198
199 if (!test_bit(BIO_UPTODATE, &bb.flags))
200 ret = -ENOTSUPP;
201
202 return ret;
203}
204EXPORT_SYMBOL(blkdev_issue_write_same);
205
206/**
133 * blkdev_issue_zeroout - generate number of zero filed write bios 207 * blkdev_issue_zeroout - generate number of zero filed write bios
134 * @bdev: blockdev to issue 208 * @bdev: blockdev to issue
135 * @sector: start sector 209 * @sector: start sector
@@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard);
140 * Generate and issue number of bios with zerofiled pages. 214 * Generate and issue number of bios with zerofiled pages.
141 */ 215 */
142 216
143int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 217int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
144 sector_t nr_sects, gfp_t gfp_mask) 218 sector_t nr_sects, gfp_t gfp_mask)
145{ 219{
146 int ret; 220 int ret;
@@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
190 264
191 return ret; 265 return ret;
192} 266}
267
268/**
269 * blkdev_issue_zeroout - zero-fill a block range
270 * @bdev: blockdev to write
271 * @sector: start sector
272 * @nr_sects: number of sectors to write
273 * @gfp_mask: memory allocation flags (for bio_alloc)
274 *
275 * Description:
276 * Generate and issue number of bios with zerofiled pages.
277 */
278
279int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
280 sector_t nr_sects, gfp_t gfp_mask)
281{
282 if (bdev_write_same(bdev)) {
283 unsigned char bdn[BDEVNAME_SIZE];
284
285 if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
286 ZERO_PAGE(0)))
287 return 0;
288
289 bdevname(bdev, bdn);
290 pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
291 }
292
293 return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
294}
193EXPORT_SYMBOL(blkdev_issue_zeroout); 295EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index e76279e41162..936a110de0b9 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -275,14 +275,8 @@ no_merge:
275int ll_back_merge_fn(struct request_queue *q, struct request *req, 275int ll_back_merge_fn(struct request_queue *q, struct request *req,
276 struct bio *bio) 276 struct bio *bio)
277{ 277{
278 unsigned short max_sectors; 278 if (blk_rq_sectors(req) + bio_sectors(bio) >
279 279 blk_rq_get_max_sectors(req)) {
280 if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
281 max_sectors = queue_max_hw_sectors(q);
282 else
283 max_sectors = queue_max_sectors(q);
284
285 if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
286 req->cmd_flags |= REQ_NOMERGE; 280 req->cmd_flags |= REQ_NOMERGE;
287 if (req == q->last_merge) 281 if (req == q->last_merge)
288 q->last_merge = NULL; 282 q->last_merge = NULL;
@@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
299int ll_front_merge_fn(struct request_queue *q, struct request *req, 293int ll_front_merge_fn(struct request_queue *q, struct request *req,
300 struct bio *bio) 294 struct bio *bio)
301{ 295{
302 unsigned short max_sectors; 296 if (blk_rq_sectors(req) + bio_sectors(bio) >
303 297 blk_rq_get_max_sectors(req)) {
304 if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC))
305 max_sectors = queue_max_hw_sectors(q);
306 else
307 max_sectors = queue_max_sectors(q);
308
309
310 if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) {
311 req->cmd_flags |= REQ_NOMERGE; 298 req->cmd_flags |= REQ_NOMERGE;
312 if (req == q->last_merge) 299 if (req == q->last_merge)
313 q->last_merge = NULL; 300 q->last_merge = NULL;
@@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
338 /* 325 /*
339 * Will it become too large? 326 * Will it become too large?
340 */ 327 */
341 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q)) 328 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
329 blk_rq_get_max_sectors(req))
342 return 0; 330 return 0;
343 331
344 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 332 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
@@ -417,16 +405,7 @@ static int attempt_merge(struct request_queue *q, struct request *req,
417 if (!rq_mergeable(req) || !rq_mergeable(next)) 405 if (!rq_mergeable(req) || !rq_mergeable(next))
418 return 0; 406 return 0;
419 407
420 /* 408 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags))
421 * Don't merge file system requests and discard requests
422 */
423 if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD))
424 return 0;
425
426 /*
427 * Don't merge discard requests and secure discard requests
428 */
429 if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE))
430 return 0; 409 return 0;
431 410
432 /* 411 /*
@@ -440,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req,
440 || next->special) 419 || next->special)
441 return 0; 420 return 0;
442 421
422 if (req->cmd_flags & REQ_WRITE_SAME &&
423 !blk_write_same_mergeable(req->bio, next->bio))
424 return 0;
425
443 /* 426 /*
444 * If we are allowed to merge, then append bio list 427 * If we are allowed to merge, then append bio list
445 * from next to rq and release next. merge_requests_fn 428 * from next to rq and release next. merge_requests_fn
@@ -521,15 +504,10 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq,
521 504
522bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 505bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
523{ 506{
524 if (!rq_mergeable(rq)) 507 if (!rq_mergeable(rq) || !bio_mergeable(bio))
525 return false; 508 return false;
526 509
527 /* don't merge file system requests and discard requests */ 510 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw))
528 if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD))
529 return false;
530
531 /* don't merge discard requests and secure discard requests */
532 if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE))
533 return false; 511 return false;
534 512
535 /* different data direction or already started, don't merge */ 513 /* different data direction or already started, don't merge */
@@ -544,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
544 if (bio_integrity(bio) != blk_integrity_rq(rq)) 522 if (bio_integrity(bio) != blk_integrity_rq(rq))
545 return false; 523 return false;
546 524
525 /* must be using the same buffer */
526 if (rq->cmd_flags & REQ_WRITE_SAME &&
527 !blk_write_same_mergeable(rq->bio, bio))
528 return false;
529
547 return true; 530 return true;
548} 531}
549 532
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 565a6786032f..779bb7646bcd 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim)
113 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 113 lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
114 lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; 114 lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
115 lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; 115 lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS;
116 lim->max_write_same_sectors = 0;
116 lim->max_discard_sectors = 0; 117 lim->max_discard_sectors = 0;
117 lim->discard_granularity = 0; 118 lim->discard_granularity = 0;
118 lim->discard_alignment = 0; 119 lim->discard_alignment = 0;
@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
144 lim->max_segments = USHRT_MAX; 145 lim->max_segments = USHRT_MAX;
145 lim->max_hw_sectors = UINT_MAX; 146 lim->max_hw_sectors = UINT_MAX;
146 lim->max_sectors = UINT_MAX; 147 lim->max_sectors = UINT_MAX;
148 lim->max_write_same_sectors = UINT_MAX;
147} 149}
148EXPORT_SYMBOL(blk_set_stacking_limits); 150EXPORT_SYMBOL(blk_set_stacking_limits);
149 151
@@ -286,6 +288,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
286EXPORT_SYMBOL(blk_queue_max_discard_sectors); 288EXPORT_SYMBOL(blk_queue_max_discard_sectors);
287 289
288/** 290/**
291 * blk_queue_max_write_same_sectors - set max sectors for a single write same
292 * @q: the request queue for the device
293 * @max_write_same_sectors: maximum number of sectors to write per command
294 **/
295void blk_queue_max_write_same_sectors(struct request_queue *q,
296 unsigned int max_write_same_sectors)
297{
298 q->limits.max_write_same_sectors = max_write_same_sectors;
299}
300EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
301
302/**
289 * blk_queue_max_segments - set max hw segments for a request for this queue 303 * blk_queue_max_segments - set max hw segments for a request for this queue
290 * @q: the request queue for the device 304 * @q: the request queue for the device
291 * @max_segments: max number of segments 305 * @max_segments: max number of segments
@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
510 524
511 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); 525 t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
512 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); 526 t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
527 t->max_write_same_sectors = min(t->max_write_same_sectors,
528 b->max_write_same_sectors);
513 t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); 529 t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
514 530
515 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, 531 t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 9628b291f960..ce6204608822 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -26,9 +26,15 @@ queue_var_show(unsigned long var, char *page)
26static ssize_t 26static ssize_t
27queue_var_store(unsigned long *var, const char *page, size_t count) 27queue_var_store(unsigned long *var, const char *page, size_t count)
28{ 28{
29 char *p = (char *) page; 29 int err;
30 unsigned long v;
31
32 err = strict_strtoul(page, 10, &v);
33 if (err || v > UINT_MAX)
34 return -EINVAL;
35
36 *var = v;
30 37
31 *var = simple_strtoul(p, &p, 10);
32 return count; 38 return count;
33} 39}
34 40
@@ -48,6 +54,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
48 return -EINVAL; 54 return -EINVAL;
49 55
50 ret = queue_var_store(&nr, page, count); 56 ret = queue_var_store(&nr, page, count);
57 if (ret < 0)
58 return ret;
59
51 if (nr < BLKDEV_MIN_RQ) 60 if (nr < BLKDEV_MIN_RQ)
52 nr = BLKDEV_MIN_RQ; 61 nr = BLKDEV_MIN_RQ;
53 62
@@ -102,6 +111,9 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count)
102 unsigned long ra_kb; 111 unsigned long ra_kb;
103 ssize_t ret = queue_var_store(&ra_kb, page, count); 112 ssize_t ret = queue_var_store(&ra_kb, page, count);
104 113
114 if (ret < 0)
115 return ret;
116
105 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); 117 q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10);
106 118
107 return ret; 119 return ret;
@@ -168,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag
168 return queue_var_show(queue_discard_zeroes_data(q), page); 180 return queue_var_show(queue_discard_zeroes_data(q), page);
169} 181}
170 182
183static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
184{
185 return sprintf(page, "%llu\n",
186 (unsigned long long)q->limits.max_write_same_sectors << 9);
187}
188
189
171static ssize_t 190static ssize_t
172queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) 191queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
173{ 192{
@@ -176,6 +195,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
176 page_kb = 1 << (PAGE_CACHE_SHIFT - 10); 195 page_kb = 1 << (PAGE_CACHE_SHIFT - 10);
177 ssize_t ret = queue_var_store(&max_sectors_kb, page, count); 196 ssize_t ret = queue_var_store(&max_sectors_kb, page, count);
178 197
198 if (ret < 0)
199 return ret;
200
179 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) 201 if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb)
180 return -EINVAL; 202 return -EINVAL;
181 203
@@ -236,6 +258,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
236 unsigned long nm; 258 unsigned long nm;
237 ssize_t ret = queue_var_store(&nm, page, count); 259 ssize_t ret = queue_var_store(&nm, page, count);
238 260
261 if (ret < 0)
262 return ret;
263
239 spin_lock_irq(q->queue_lock); 264 spin_lock_irq(q->queue_lock);
240 queue_flag_clear(QUEUE_FLAG_NOMERGES, q); 265 queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
241 queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); 266 queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
@@ -264,6 +289,9 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
264 unsigned long val; 289 unsigned long val;
265 290
266 ret = queue_var_store(&val, page, count); 291 ret = queue_var_store(&val, page, count);
292 if (ret < 0)
293 return ret;
294
267 spin_lock_irq(q->queue_lock); 295 spin_lock_irq(q->queue_lock);
268 if (val == 2) { 296 if (val == 2) {
269 queue_flag_set(QUEUE_FLAG_SAME_COMP, q); 297 queue_flag_set(QUEUE_FLAG_SAME_COMP, q);
@@ -364,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = {
364 .show = queue_discard_zeroes_data_show, 392 .show = queue_discard_zeroes_data_show,
365}; 393};
366 394
395static struct queue_sysfs_entry queue_write_same_max_entry = {
396 .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO },
397 .show = queue_write_same_max_show,
398};
399
367static struct queue_sysfs_entry queue_nonrot_entry = { 400static struct queue_sysfs_entry queue_nonrot_entry = {
368 .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, 401 .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
369 .show = queue_show_nonrot, 402 .show = queue_show_nonrot,
@@ -411,6 +444,7 @@ static struct attribute *default_attrs[] = {
411 &queue_discard_granularity_entry.attr, 444 &queue_discard_granularity_entry.attr,
412 &queue_discard_max_entry.attr, 445 &queue_discard_max_entry.attr,
413 &queue_discard_zeroes_data_entry.attr, 446 &queue_discard_zeroes_data_entry.attr,
447 &queue_write_same_max_entry.attr,
414 &queue_nonrot_entry.attr, 448 &queue_nonrot_entry.attr,
415 &queue_nomerges_entry.attr, 449 &queue_nomerges_entry.attr,
416 &queue_rq_affinity_entry.attr, 450 &queue_rq_affinity_entry.attr,
@@ -527,6 +561,12 @@ int blk_register_queue(struct gendisk *disk)
527 if (WARN_ON(!q)) 561 if (WARN_ON(!q))
528 return -ENXIO; 562 return -ENXIO;
529 563
564 /*
565 * Initialization must be complete by now. Finish the initial
566 * bypass from queue allocation.
567 */
568 blk_queue_bypass_end(q);
569
530 ret = blk_trace_init_sysfs(dev); 570 ret = blk_trace_init_sysfs(dev);
531 if (ret) 571 if (ret)
532 return ret; 572 return ret;
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 4af6f5cc1167..cc345e1d8d4e 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -186,7 +186,8 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
186 tags = __blk_queue_init_tags(q, depth); 186 tags = __blk_queue_init_tags(q, depth);
187 187
188 if (!tags) 188 if (!tags)
189 goto fail; 189 return -ENOMEM;
190
190 } else if (q->queue_tags) { 191 } else if (q->queue_tags) {
191 rc = blk_queue_resize_tags(q, depth); 192 rc = blk_queue_resize_tags(q, depth);
192 if (rc) 193 if (rc)
@@ -203,9 +204,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
203 queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); 204 queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
204 INIT_LIST_HEAD(&q->tag_busy_list); 205 INIT_LIST_HEAD(&q->tag_busy_list);
205 return 0; 206 return 0;
206fail:
207 kfree(tags);
208 return -ENOMEM;
209} 207}
210EXPORT_SYMBOL(blk_queue_init_tags); 208EXPORT_SYMBOL(blk_queue_init_tags);
211 209
diff --git a/block/blk.h b/block/blk.h
index 2a0ea32d249f..ca51543b248c 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
171 * 171 *
172 * a) it's attached to a gendisk, and 172 * a) it's attached to a gendisk, and
173 * b) the queue had IO stats enabled when this request was started, and 173 * b) the queue had IO stats enabled when this request was started, and
174 * c) it's a file system request or a discard request 174 * c) it's a file system request
175 */ 175 */
176static inline int blk_do_io_stat(struct request *rq) 176static inline int blk_do_io_stat(struct request *rq)
177{ 177{
178 return rq->rq_disk && 178 return rq->rq_disk &&
179 (rq->cmd_flags & REQ_IO_STAT) && 179 (rq->cmd_flags & REQ_IO_STAT) &&
180 (rq->cmd_type == REQ_TYPE_FS || 180 (rq->cmd_type == REQ_TYPE_FS);
181 (rq->cmd_flags & REQ_DISCARD));
182} 181}
183 182
184/* 183/*
diff --git a/block/elevator.c b/block/elevator.c
index 6a55d418896f..9b1d42b62f20 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
562 562
563 if (rq->cmd_flags & REQ_SOFTBARRIER) { 563 if (rq->cmd_flags & REQ_SOFTBARRIER) {
564 /* barriers are scheduling boundary, update end_sector */ 564 /* barriers are scheduling boundary, update end_sector */
565 if (rq->cmd_type == REQ_TYPE_FS || 565 if (rq->cmd_type == REQ_TYPE_FS) {
566 (rq->cmd_flags & REQ_DISCARD)) {
567 q->end_sector = rq_end_sector(rq); 566 q->end_sector = rq_end_sector(rq);
568 q->boundary_rq = rq; 567 q->boundary_rq = rq;
569 } 568 }
@@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where)
605 if (elv_attempt_insert_merge(q, rq)) 604 if (elv_attempt_insert_merge(q, rq))
606 break; 605 break;
607 case ELEVATOR_INSERT_SORT: 606 case ELEVATOR_INSERT_SORT:
608 BUG_ON(rq->cmd_type != REQ_TYPE_FS && 607 BUG_ON(rq->cmd_type != REQ_TYPE_FS);
609 !(rq->cmd_flags & REQ_DISCARD));
610 rq->cmd_flags |= REQ_SORTED; 608 rq->cmd_flags |= REQ_SORTED;
611 q->nr_sorted++; 609 q->nr_sorted++;
612 if (rq_mergeable(rq)) { 610 if (rq_mergeable(rq)) {
diff --git a/block/ioctl.c b/block/ioctl.c
index 4a85096f5410..a31d91d9bc5a 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -185,6 +185,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start,
185 return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); 185 return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags);
186} 186}
187 187
188static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start,
189 uint64_t len)
190{
191 if (start & 511)
192 return -EINVAL;
193 if (len & 511)
194 return -EINVAL;
195 start >>= 9;
196 len >>= 9;
197
198 if (start + len > (i_size_read(bdev->bd_inode) >> 9))
199 return -EINVAL;
200
201 return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL);
202}
203
188static int put_ushort(unsigned long arg, unsigned short val) 204static int put_ushort(unsigned long arg, unsigned short val)
189{ 205{
190 return put_user(val, (unsigned short __user *)arg); 206 return put_user(val, (unsigned short __user *)arg);
@@ -300,6 +316,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
300 return blk_ioctl_discard(bdev, range[0], range[1], 316 return blk_ioctl_discard(bdev, range[0], range[1],
301 cmd == BLKSECDISCARD); 317 cmd == BLKSECDISCARD);
302 } 318 }
319 case BLKZEROOUT: {
320 uint64_t range[2];
321
322 if (!(mode & FMODE_WRITE))
323 return -EBADF;
324
325 if (copy_from_user(range, (void __user *)arg, sizeof(range)))
326 return -EFAULT;
327
328 return blk_ioctl_zeroout(bdev, range[0], range[1]);
329 }
303 330
304 case HDIO_GETGEO: { 331 case HDIO_GETGEO: {
305 struct hd_geometry geo; 332 struct hd_geometry geo;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index f93a0320e952..f55683ad4ffa 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -162,23 +162,12 @@ static const struct block_device_operations drbd_ops = {
162 .release = drbd_release, 162 .release = drbd_release,
163}; 163};
164 164
165static void bio_destructor_drbd(struct bio *bio)
166{
167 bio_free(bio, drbd_md_io_bio_set);
168}
169
170struct bio *bio_alloc_drbd(gfp_t gfp_mask) 165struct bio *bio_alloc_drbd(gfp_t gfp_mask)
171{ 166{
172 struct bio *bio;
173
174 if (!drbd_md_io_bio_set) 167 if (!drbd_md_io_bio_set)
175 return bio_alloc(gfp_mask, 1); 168 return bio_alloc(gfp_mask, 1);
176 169
177 bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); 170 return bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set);
178 if (!bio)
179 return NULL;
180 bio->bi_destructor = bio_destructor_drbd;
181 return bio;
182} 171}
183 172
184#ifdef __CHECKER__ 173#ifdef __CHECKER__
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c
index 87311ebac0db..1bbc681688e4 100644
--- a/drivers/block/osdblk.c
+++ b/drivers/block/osdblk.c
@@ -266,11 +266,10 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask)
266 struct bio *tmp, *new_chain = NULL, *tail = NULL; 266 struct bio *tmp, *new_chain = NULL, *tail = NULL;
267 267
268 while (old_chain) { 268 while (old_chain) {
269 tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); 269 tmp = bio_clone_kmalloc(old_chain, gfpmask);
270 if (!tmp) 270 if (!tmp)
271 goto err_out; 271 goto err_out;
272 272
273 __bio_clone(tmp, old_chain);
274 tmp->bi_bdev = NULL; 273 tmp->bi_bdev = NULL;
275 gfpmask &= ~__GFP_WAIT; 274 gfpmask &= ~__GFP_WAIT;
276 tmp->bi_next = NULL; 275 tmp->bi_next = NULL;
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index ba66e4445f41..2e7de7a59bfc 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -522,38 +522,6 @@ static void pkt_bio_finished(struct pktcdvd_device *pd)
522 } 522 }
523} 523}
524 524
525static void pkt_bio_destructor(struct bio *bio)
526{
527 kfree(bio->bi_io_vec);
528 kfree(bio);
529}
530
531static struct bio *pkt_bio_alloc(int nr_iovecs)
532{
533 struct bio_vec *bvl = NULL;
534 struct bio *bio;
535
536 bio = kmalloc(sizeof(struct bio), GFP_KERNEL);
537 if (!bio)
538 goto no_bio;
539 bio_init(bio);
540
541 bvl = kcalloc(nr_iovecs, sizeof(struct bio_vec), GFP_KERNEL);
542 if (!bvl)
543 goto no_bvl;
544
545 bio->bi_max_vecs = nr_iovecs;
546 bio->bi_io_vec = bvl;
547 bio->bi_destructor = pkt_bio_destructor;
548
549 return bio;
550
551 no_bvl:
552 kfree(bio);
553 no_bio:
554 return NULL;
555}
556
557/* 525/*
558 * Allocate a packet_data struct 526 * Allocate a packet_data struct
559 */ 527 */
@@ -567,7 +535,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
567 goto no_pkt; 535 goto no_pkt;
568 536
569 pkt->frames = frames; 537 pkt->frames = frames;
570 pkt->w_bio = pkt_bio_alloc(frames); 538 pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames);
571 if (!pkt->w_bio) 539 if (!pkt->w_bio)
572 goto no_bio; 540 goto no_bio;
573 541
@@ -581,9 +549,10 @@ static struct packet_data *pkt_alloc_packet_data(int frames)
581 bio_list_init(&pkt->orig_bios); 549 bio_list_init(&pkt->orig_bios);
582 550
583 for (i = 0; i < frames; i++) { 551 for (i = 0; i < frames; i++) {
584 struct bio *bio = pkt_bio_alloc(1); 552 struct bio *bio = bio_kmalloc(GFP_KERNEL, 1);
585 if (!bio) 553 if (!bio)
586 goto no_rd_bio; 554 goto no_rd_bio;
555
587 pkt->r_bios[i] = bio; 556 pkt->r_bios[i] = bio;
588 } 557 }
589 558
@@ -1111,21 +1080,17 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
1111 * Schedule reads for missing parts of the packet. 1080 * Schedule reads for missing parts of the packet.
1112 */ 1081 */
1113 for (f = 0; f < pkt->frames; f++) { 1082 for (f = 0; f < pkt->frames; f++) {
1114 struct bio_vec *vec;
1115
1116 int p, offset; 1083 int p, offset;
1084
1117 if (written[f]) 1085 if (written[f])
1118 continue; 1086 continue;
1087
1119 bio = pkt->r_bios[f]; 1088 bio = pkt->r_bios[f];
1120 vec = bio->bi_io_vec; 1089 bio_reset(bio);
1121 bio_init(bio);
1122 bio->bi_max_vecs = 1;
1123 bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); 1090 bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
1124 bio->bi_bdev = pd->bdev; 1091 bio->bi_bdev = pd->bdev;
1125 bio->bi_end_io = pkt_end_io_read; 1092 bio->bi_end_io = pkt_end_io_read;
1126 bio->bi_private = pkt; 1093 bio->bi_private = pkt;
1127 bio->bi_io_vec = vec;
1128 bio->bi_destructor = pkt_bio_destructor;
1129 1094
1130 p = (f * CD_FRAMESIZE) / PAGE_SIZE; 1095 p = (f * CD_FRAMESIZE) / PAGE_SIZE;
1131 offset = (f * CD_FRAMESIZE) % PAGE_SIZE; 1096 offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
@@ -1418,14 +1383,11 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
1418 } 1383 }
1419 1384
1420 /* Start the write request */ 1385 /* Start the write request */
1421 bio_init(pkt->w_bio); 1386 bio_reset(pkt->w_bio);
1422 pkt->w_bio->bi_max_vecs = PACKET_MAX_SIZE;
1423 pkt->w_bio->bi_sector = pkt->sector; 1387 pkt->w_bio->bi_sector = pkt->sector;
1424 pkt->w_bio->bi_bdev = pd->bdev; 1388 pkt->w_bio->bi_bdev = pd->bdev;
1425 pkt->w_bio->bi_end_io = pkt_end_io_packet_write; 1389 pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
1426 pkt->w_bio->bi_private = pkt; 1390 pkt->w_bio->bi_private = pkt;
1427 pkt->w_bio->bi_io_vec = bvec;
1428 pkt->w_bio->bi_destructor = pkt_bio_destructor;
1429 for (f = 0; f < pkt->frames; f++) 1391 for (f = 0; f < pkt->frames; f++)
1430 if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) 1392 if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
1431 BUG(); 1393 BUG();
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 54a3a6d09819..0bb207eaef2f 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
285 285
286static const struct file_operations raw_fops = { 286static const struct file_operations raw_fops = {
287 .read = do_sync_read, 287 .read = do_sync_read,
288 .aio_read = generic_file_aio_read, 288 .aio_read = blkdev_aio_read,
289 .write = do_sync_write, 289 .write = do_sync_write,
290 .aio_write = blkdev_aio_write, 290 .aio_write = blkdev_aio_write,
291 .fsync = blkdev_fsync, 291 .fsync = blkdev_fsync,
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 664743d6a6cd..bbf459bca61d 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -798,14 +798,6 @@ static int crypt_convert(struct crypt_config *cc,
798 return 0; 798 return 0;
799} 799}
800 800
801static void dm_crypt_bio_destructor(struct bio *bio)
802{
803 struct dm_crypt_io *io = bio->bi_private;
804 struct crypt_config *cc = io->cc;
805
806 bio_free(bio, cc->bs);
807}
808
809/* 801/*
810 * Generate a new unfragmented bio with the given size 802 * Generate a new unfragmented bio with the given size
811 * This should never violate the device limitations 803 * This should never violate the device limitations
@@ -974,7 +966,6 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone)
974 clone->bi_end_io = crypt_endio; 966 clone->bi_end_io = crypt_endio;
975 clone->bi_bdev = cc->dev->bdev; 967 clone->bi_bdev = cc->dev->bdev;
976 clone->bi_rw = io->base_bio->bi_rw; 968 clone->bi_rw = io->base_bio->bi_rw;
977 clone->bi_destructor = dm_crypt_bio_destructor;
978} 969}
979 970
980static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) 971static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
@@ -988,19 +979,14 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
988 * copy the required bvecs because we need the original 979 * copy the required bvecs because we need the original
989 * one in order to decrypt the whole bio data *afterwards*. 980 * one in order to decrypt the whole bio data *afterwards*.
990 */ 981 */
991 clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs); 982 clone = bio_clone_bioset(base_bio, gfp, cc->bs);
992 if (!clone) 983 if (!clone)
993 return 1; 984 return 1;
994 985
995 crypt_inc_pending(io); 986 crypt_inc_pending(io);
996 987
997 clone_init(io, clone); 988 clone_init(io, clone);
998 clone->bi_idx = 0;
999 clone->bi_vcnt = bio_segments(base_bio);
1000 clone->bi_size = base_bio->bi_size;
1001 clone->bi_sector = cc->start + io->sector; 989 clone->bi_sector = cc->start + io->sector;
1002 memcpy(clone->bi_io_vec, bio_iovec(base_bio),
1003 sizeof(struct bio_vec) * clone->bi_vcnt);
1004 990
1005 generic_make_request(clone); 991 generic_make_request(clone);
1006 return 0; 992 return 0;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index ea5dd289fe2a..1c46f97d6664 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -249,16 +249,6 @@ static void vm_dp_init(struct dpages *dp, void *data)
249 dp->context_ptr = data; 249 dp->context_ptr = data;
250} 250}
251 251
252static void dm_bio_destructor(struct bio *bio)
253{
254 unsigned region;
255 struct io *io;
256
257 retrieve_io_and_region_from_bio(bio, &io, &region);
258
259 bio_free(bio, io->client->bios);
260}
261
262/* 252/*
263 * Functions for getting the pages from kernel memory. 253 * Functions for getting the pages from kernel memory.
264 */ 254 */
@@ -317,7 +307,6 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where,
317 bio->bi_sector = where->sector + (where->count - remaining); 307 bio->bi_sector = where->sector + (where->count - remaining);
318 bio->bi_bdev = where->bdev; 308 bio->bi_bdev = where->bdev;
319 bio->bi_end_io = endio; 309 bio->bi_end_io = endio;
320 bio->bi_destructor = dm_bio_destructor;
321 store_io_and_region_in_bio(bio, io, region); 310 store_io_and_region_in_bio(bio, io, region);
322 311
323 if (rw & REQ_DISCARD) { 312 if (rw & REQ_DISCARD) {
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 67ffa391edcf..66ceaff6455c 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -86,12 +86,17 @@ struct dm_rq_target_io {
86}; 86};
87 87
88/* 88/*
89 * For request-based dm. 89 * For request-based dm - the bio clones we allocate are embedded in these
90 * One of these is allocated per bio. 90 * structs.
91 *
92 * We allocate these with bio_alloc_bioset, using the front_pad parameter when
93 * the bioset is created - this means the bio has to come at the end of the
94 * struct.
91 */ 95 */
92struct dm_rq_clone_bio_info { 96struct dm_rq_clone_bio_info {
93 struct bio *orig; 97 struct bio *orig;
94 struct dm_rq_target_io *tio; 98 struct dm_rq_target_io *tio;
99 struct bio clone;
95}; 100};
96 101
97union map_info *dm_get_mapinfo(struct bio *bio) 102union map_info *dm_get_mapinfo(struct bio *bio)
@@ -211,6 +216,11 @@ struct dm_md_mempools {
211static struct kmem_cache *_io_cache; 216static struct kmem_cache *_io_cache;
212static struct kmem_cache *_tio_cache; 217static struct kmem_cache *_tio_cache;
213static struct kmem_cache *_rq_tio_cache; 218static struct kmem_cache *_rq_tio_cache;
219
220/*
221 * Unused now, and needs to be deleted. But since io_pool is overloaded and it's
222 * still used for _io_cache, I'm leaving this for a later cleanup
223 */
214static struct kmem_cache *_rq_bio_info_cache; 224static struct kmem_cache *_rq_bio_info_cache;
215 225
216static int __init local_init(void) 226static int __init local_init(void)
@@ -467,16 +477,6 @@ static void free_rq_tio(struct dm_rq_target_io *tio)
467 mempool_free(tio, tio->md->tio_pool); 477 mempool_free(tio, tio->md->tio_pool);
468} 478}
469 479
470static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md)
471{
472 return mempool_alloc(md->io_pool, GFP_ATOMIC);
473}
474
475static void free_bio_info(struct dm_rq_clone_bio_info *info)
476{
477 mempool_free(info, info->tio->md->io_pool);
478}
479
480static int md_in_flight(struct mapped_device *md) 480static int md_in_flight(struct mapped_device *md)
481{ 481{
482 return atomic_read(&md->pending[READ]) + 482 return atomic_read(&md->pending[READ]) +
@@ -681,11 +681,6 @@ static void clone_endio(struct bio *bio, int error)
681 } 681 }
682 } 682 }
683 683
684 /*
685 * Store md for cleanup instead of tio which is about to get freed.
686 */
687 bio->bi_private = md->bs;
688
689 free_tio(md, tio); 684 free_tio(md, tio);
690 bio_put(bio); 685 bio_put(bio);
691 dec_pending(io, error); 686 dec_pending(io, error);
@@ -1036,11 +1031,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone,
1036 /* error the io and bail out, or requeue it if needed */ 1031 /* error the io and bail out, or requeue it if needed */
1037 md = tio->io->md; 1032 md = tio->io->md;
1038 dec_pending(tio->io, r); 1033 dec_pending(tio->io, r);
1039 /*
1040 * Store bio_set for cleanup.
1041 */
1042 clone->bi_end_io = NULL;
1043 clone->bi_private = md->bs;
1044 bio_put(clone); 1034 bio_put(clone);
1045 free_tio(md, tio); 1035 free_tio(md, tio);
1046 } else if (r) { 1036 } else if (r) {
@@ -1059,13 +1049,6 @@ struct clone_info {
1059 unsigned short idx; 1049 unsigned short idx;
1060}; 1050};
1061 1051
1062static void dm_bio_destructor(struct bio *bio)
1063{
1064 struct bio_set *bs = bio->bi_private;
1065
1066 bio_free(bio, bs);
1067}
1068
1069/* 1052/*
1070 * Creates a little bio that just does part of a bvec. 1053 * Creates a little bio that just does part of a bvec.
1071 */ 1054 */
@@ -1077,7 +1060,6 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
1077 struct bio_vec *bv = bio->bi_io_vec + idx; 1060 struct bio_vec *bv = bio->bi_io_vec + idx;
1078 1061
1079 clone = bio_alloc_bioset(GFP_NOIO, 1, bs); 1062 clone = bio_alloc_bioset(GFP_NOIO, 1, bs);
1080 clone->bi_destructor = dm_bio_destructor;
1081 *clone->bi_io_vec = *bv; 1063 *clone->bi_io_vec = *bv;
1082 1064
1083 clone->bi_sector = sector; 1065 clone->bi_sector = sector;
@@ -1090,7 +1072,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
1090 clone->bi_flags |= 1 << BIO_CLONED; 1072 clone->bi_flags |= 1 << BIO_CLONED;
1091 1073
1092 if (bio_integrity(bio)) { 1074 if (bio_integrity(bio)) {
1093 bio_integrity_clone(clone, bio, GFP_NOIO, bs); 1075 bio_integrity_clone(clone, bio, GFP_NOIO);
1094 bio_integrity_trim(clone, 1076 bio_integrity_trim(clone,
1095 bio_sector_offset(bio, idx, offset), len); 1077 bio_sector_offset(bio, idx, offset), len);
1096 } 1078 }
@@ -1109,7 +1091,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
1109 1091
1110 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); 1092 clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs);
1111 __bio_clone(clone, bio); 1093 __bio_clone(clone, bio);
1112 clone->bi_destructor = dm_bio_destructor;
1113 clone->bi_sector = sector; 1094 clone->bi_sector = sector;
1114 clone->bi_idx = idx; 1095 clone->bi_idx = idx;
1115 clone->bi_vcnt = idx + bv_count; 1096 clone->bi_vcnt = idx + bv_count;
@@ -1117,7 +1098,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
1117 clone->bi_flags &= ~(1 << BIO_SEG_VALID); 1098 clone->bi_flags &= ~(1 << BIO_SEG_VALID);
1118 1099
1119 if (bio_integrity(bio)) { 1100 if (bio_integrity(bio)) {
1120 bio_integrity_clone(clone, bio, GFP_NOIO, bs); 1101 bio_integrity_clone(clone, bio, GFP_NOIO);
1121 1102
1122 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) 1103 if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
1123 bio_integrity_trim(clone, 1104 bio_integrity_trim(clone,
@@ -1152,9 +1133,8 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti,
1152 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush 1133 * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush
1153 * and discard, so no need for concern about wasted bvec allocations. 1134 * and discard, so no need for concern about wasted bvec allocations.
1154 */ 1135 */
1155 clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs); 1136 clone = bio_clone_bioset(ci->bio, GFP_NOIO, ci->md->bs);
1156 __bio_clone(clone, ci->bio); 1137
1157 clone->bi_destructor = dm_bio_destructor;
1158 if (len) { 1138 if (len) {
1159 clone->bi_sector = ci->sector; 1139 clone->bi_sector = ci->sector;
1160 clone->bi_size = to_bytes(len); 1140 clone->bi_size = to_bytes(len);
@@ -1484,30 +1464,17 @@ void dm_dispatch_request(struct request *rq)
1484} 1464}
1485EXPORT_SYMBOL_GPL(dm_dispatch_request); 1465EXPORT_SYMBOL_GPL(dm_dispatch_request);
1486 1466
1487static void dm_rq_bio_destructor(struct bio *bio)
1488{
1489 struct dm_rq_clone_bio_info *info = bio->bi_private;
1490 struct mapped_device *md = info->tio->md;
1491
1492 free_bio_info(info);
1493 bio_free(bio, md->bs);
1494}
1495
1496static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 1467static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
1497 void *data) 1468 void *data)
1498{ 1469{
1499 struct dm_rq_target_io *tio = data; 1470 struct dm_rq_target_io *tio = data;
1500 struct mapped_device *md = tio->md; 1471 struct dm_rq_clone_bio_info *info =
1501 struct dm_rq_clone_bio_info *info = alloc_bio_info(md); 1472 container_of(bio, struct dm_rq_clone_bio_info, clone);
1502
1503 if (!info)
1504 return -ENOMEM;
1505 1473
1506 info->orig = bio_orig; 1474 info->orig = bio_orig;
1507 info->tio = tio; 1475 info->tio = tio;
1508 bio->bi_end_io = end_clone_bio; 1476 bio->bi_end_io = end_clone_bio;
1509 bio->bi_private = info; 1477 bio->bi_private = info;
1510 bio->bi_destructor = dm_rq_bio_destructor;
1511 1478
1512 return 0; 1479 return 0;
1513} 1480}
@@ -2771,7 +2738,10 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity)
2771 if (!pools->tio_pool) 2738 if (!pools->tio_pool)
2772 goto free_io_pool_and_out; 2739 goto free_io_pool_and_out;
2773 2740
2774 pools->bs = bioset_create(pool_size, 0); 2741 pools->bs = (type == DM_TYPE_BIO_BASED) ?
2742 bioset_create(pool_size, 0) :
2743 bioset_create(pool_size,
2744 offsetof(struct dm_rq_clone_bio_info, clone));
2775 if (!pools->bs) 2745 if (!pools->bs)
2776 goto free_tio_pool_and_out; 2746 goto free_tio_pool_and_out;
2777 2747
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 308e87b417e0..95c88012a3b9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -155,32 +155,17 @@ static int start_readonly;
155 * like bio_clone, but with a local bio set 155 * like bio_clone, but with a local bio set
156 */ 156 */
157 157
158static void mddev_bio_destructor(struct bio *bio)
159{
160 struct mddev *mddev, **mddevp;
161
162 mddevp = (void*)bio;
163 mddev = mddevp[-1];
164
165 bio_free(bio, mddev->bio_set);
166}
167
168struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, 158struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
169 struct mddev *mddev) 159 struct mddev *mddev)
170{ 160{
171 struct bio *b; 161 struct bio *b;
172 struct mddev **mddevp;
173 162
174 if (!mddev || !mddev->bio_set) 163 if (!mddev || !mddev->bio_set)
175 return bio_alloc(gfp_mask, nr_iovecs); 164 return bio_alloc(gfp_mask, nr_iovecs);
176 165
177 b = bio_alloc_bioset(gfp_mask, nr_iovecs, 166 b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set);
178 mddev->bio_set);
179 if (!b) 167 if (!b)
180 return NULL; 168 return NULL;
181 mddevp = (void*)b;
182 mddevp[-1] = mddev;
183 b->bi_destructor = mddev_bio_destructor;
184 return b; 169 return b;
185} 170}
186EXPORT_SYMBOL_GPL(bio_alloc_mddev); 171EXPORT_SYMBOL_GPL(bio_alloc_mddev);
@@ -188,32 +173,10 @@ EXPORT_SYMBOL_GPL(bio_alloc_mddev);
188struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, 173struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
189 struct mddev *mddev) 174 struct mddev *mddev)
190{ 175{
191 struct bio *b;
192 struct mddev **mddevp;
193
194 if (!mddev || !mddev->bio_set) 176 if (!mddev || !mddev->bio_set)
195 return bio_clone(bio, gfp_mask); 177 return bio_clone(bio, gfp_mask);
196 178
197 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, 179 return bio_clone_bioset(bio, gfp_mask, mddev->bio_set);
198 mddev->bio_set);
199 if (!b)
200 return NULL;
201 mddevp = (void*)b;
202 mddevp[-1] = mddev;
203 b->bi_destructor = mddev_bio_destructor;
204 __bio_clone(b, bio);
205 if (bio_integrity(bio)) {
206 int ret;
207
208 ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set);
209
210 if (ret < 0) {
211 bio_put(b);
212 return NULL;
213 }
214 }
215
216 return b;
217} 180}
218EXPORT_SYMBOL_GPL(bio_clone_mddev); 181EXPORT_SYMBOL_GPL(bio_clone_mddev);
219 182
@@ -5006,8 +4969,7 @@ int md_run(struct mddev *mddev)
5006 } 4969 }
5007 4970
5008 if (mddev->bio_set == NULL) 4971 if (mddev->bio_set == NULL)
5009 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 4972 mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0);
5010 sizeof(struct mddev *));
5011 4973
5012 spin_lock(&pers_lock); 4974 spin_lock(&pers_lock);
5013 pers = find_pers(mddev->level, mddev->clevel); 4975 pers = find_pers(mddev->level, mddev->clevel);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index de63a1fc3737..a9e4fa95dfaa 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -422,6 +422,7 @@ static int raid0_run(struct mddev *mddev)
422 if (md_check_no_bitmap(mddev)) 422 if (md_check_no_bitmap(mddev))
423 return -EINVAL; 423 return -EINVAL;
424 blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); 424 blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
425 blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
425 426
426 /* if private is not null, we are here after takeover */ 427 /* if private is not null, we are here after takeover */
427 if (mddev->private == NULL) { 428 if (mddev->private == NULL) {
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 29408d46a6d9..57d7674c5013 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -553,14 +553,6 @@ static void iblock_complete_cmd(struct se_cmd *cmd)
553 kfree(ibr); 553 kfree(ibr);
554} 554}
555 555
556static void iblock_bio_destructor(struct bio *bio)
557{
558 struct se_cmd *cmd = bio->bi_private;
559 struct iblock_dev *ib_dev = cmd->se_dev->dev_ptr;
560
561 bio_free(bio, ib_dev->ibd_bio_set);
562}
563
564static struct bio * 556static struct bio *
565iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num) 557iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num)
566{ 558{
@@ -582,7 +574,6 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num)
582 574
583 bio->bi_bdev = ib_dev->ibd_bd; 575 bio->bi_bdev = ib_dev->ibd_bd;
584 bio->bi_private = cmd; 576 bio->bi_private = cmd;
585 bio->bi_destructor = iblock_bio_destructor;
586 bio->bi_end_io = &iblock_bio_done; 577 bio->bi_end_io = &iblock_bio_done;
587 bio->bi_sector = lba; 578 bio->bi_sector = lba;
588 return bio; 579 return bio;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index e85c04b9f61c..a3f28f331b2b 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -70,23 +70,25 @@ static inline int use_bip_pool(unsigned int idx)
70} 70}
71 71
72/** 72/**
73 * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio 73 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
74 * @bio: bio to attach integrity metadata to 74 * @bio: bio to attach integrity metadata to
75 * @gfp_mask: Memory allocation mask 75 * @gfp_mask: Memory allocation mask
76 * @nr_vecs: Number of integrity metadata scatter-gather elements 76 * @nr_vecs: Number of integrity metadata scatter-gather elements
77 * @bs: bio_set to allocate from
78 * 77 *
79 * Description: This function prepares a bio for attaching integrity 78 * Description: This function prepares a bio for attaching integrity
80 * metadata. nr_vecs specifies the maximum number of pages containing 79 * metadata. nr_vecs specifies the maximum number of pages containing
81 * integrity metadata that can be attached. 80 * integrity metadata that can be attached.
82 */ 81 */
83struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, 82struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
84 gfp_t gfp_mask, 83 gfp_t gfp_mask,
85 unsigned int nr_vecs, 84 unsigned int nr_vecs)
86 struct bio_set *bs)
87{ 85{
88 struct bio_integrity_payload *bip; 86 struct bio_integrity_payload *bip;
89 unsigned int idx = vecs_to_idx(nr_vecs); 87 unsigned int idx = vecs_to_idx(nr_vecs);
88 struct bio_set *bs = bio->bi_pool;
89
90 if (!bs)
91 bs = fs_bio_set;
90 92
91 BUG_ON(bio == NULL); 93 BUG_ON(bio == NULL);
92 bip = NULL; 94 bip = NULL;
@@ -114,37 +116,22 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
114 116
115 return bip; 117 return bip;
116} 118}
117EXPORT_SYMBOL(bio_integrity_alloc_bioset);
118
119/**
120 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
121 * @bio: bio to attach integrity metadata to
122 * @gfp_mask: Memory allocation mask
123 * @nr_vecs: Number of integrity metadata scatter-gather elements
124 *
125 * Description: This function prepares a bio for attaching integrity
126 * metadata. nr_vecs specifies the maximum number of pages containing
127 * integrity metadata that can be attached.
128 */
129struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
130 gfp_t gfp_mask,
131 unsigned int nr_vecs)
132{
133 return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
134}
135EXPORT_SYMBOL(bio_integrity_alloc); 119EXPORT_SYMBOL(bio_integrity_alloc);
136 120
137/** 121/**
138 * bio_integrity_free - Free bio integrity payload 122 * bio_integrity_free - Free bio integrity payload
139 * @bio: bio containing bip to be freed 123 * @bio: bio containing bip to be freed
140 * @bs: bio_set this bio was allocated from
141 * 124 *
142 * Description: Used to free the integrity portion of a bio. Usually 125 * Description: Used to free the integrity portion of a bio. Usually
143 * called from bio_free(). 126 * called from bio_free().
144 */ 127 */
145void bio_integrity_free(struct bio *bio, struct bio_set *bs) 128void bio_integrity_free(struct bio *bio)
146{ 129{
147 struct bio_integrity_payload *bip = bio->bi_integrity; 130 struct bio_integrity_payload *bip = bio->bi_integrity;
131 struct bio_set *bs = bio->bi_pool;
132
133 if (!bs)
134 bs = fs_bio_set;
148 135
149 BUG_ON(bip == NULL); 136 BUG_ON(bip == NULL);
150 137
@@ -730,19 +717,18 @@ EXPORT_SYMBOL(bio_integrity_split);
730 * @bio: New bio 717 * @bio: New bio
731 * @bio_src: Original bio 718 * @bio_src: Original bio
732 * @gfp_mask: Memory allocation mask 719 * @gfp_mask: Memory allocation mask
733 * @bs: bio_set to allocate bip from
734 * 720 *
735 * Description: Called to allocate a bip when cloning a bio 721 * Description: Called to allocate a bip when cloning a bio
736 */ 722 */
737int bio_integrity_clone(struct bio *bio, struct bio *bio_src, 723int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
738 gfp_t gfp_mask, struct bio_set *bs) 724 gfp_t gfp_mask)
739{ 725{
740 struct bio_integrity_payload *bip_src = bio_src->bi_integrity; 726 struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
741 struct bio_integrity_payload *bip; 727 struct bio_integrity_payload *bip;
742 728
743 BUG_ON(bip_src == NULL); 729 BUG_ON(bip_src == NULL);
744 730
745 bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); 731 bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
746 732
747 if (bip == NULL) 733 if (bip == NULL)
748 return -EIO; 734 return -EIO;
diff --git a/fs/bio.c b/fs/bio.c
index 71072ab99128..9298c65ad9c7 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -55,6 +55,7 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
55 * IO code that does not need private memory pools. 55 * IO code that does not need private memory pools.
56 */ 56 */
57struct bio_set *fs_bio_set; 57struct bio_set *fs_bio_set;
58EXPORT_SYMBOL(fs_bio_set);
58 59
59/* 60/*
60 * Our slab pool management 61 * Our slab pool management
@@ -233,26 +234,37 @@ fallback:
233 return bvl; 234 return bvl;
234} 235}
235 236
236void bio_free(struct bio *bio, struct bio_set *bs) 237static void __bio_free(struct bio *bio)
237{ 238{
239 bio_disassociate_task(bio);
240
241 if (bio_integrity(bio))
242 bio_integrity_free(bio);
243}
244
245static void bio_free(struct bio *bio)
246{
247 struct bio_set *bs = bio->bi_pool;
238 void *p; 248 void *p;
239 249
240 if (bio_has_allocated_vec(bio)) 250 __bio_free(bio);
241 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
242 251
243 if (bio_integrity(bio)) 252 if (bs) {
244 bio_integrity_free(bio, bs); 253 if (bio_has_allocated_vec(bio))
254 bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio));
245 255
246 /* 256 /*
247 * If we have front padding, adjust the bio pointer before freeing 257 * If we have front padding, adjust the bio pointer before freeing
248 */ 258 */
249 p = bio; 259 p = bio;
250 if (bs->front_pad)
251 p -= bs->front_pad; 260 p -= bs->front_pad;
252 261
253 mempool_free(p, bs->bio_pool); 262 mempool_free(p, bs->bio_pool);
263 } else {
264 /* Bio was allocated by bio_kmalloc() */
265 kfree(bio);
266 }
254} 267}
255EXPORT_SYMBOL(bio_free);
256 268
257void bio_init(struct bio *bio) 269void bio_init(struct bio *bio)
258{ 270{
@@ -263,48 +275,85 @@ void bio_init(struct bio *bio)
263EXPORT_SYMBOL(bio_init); 275EXPORT_SYMBOL(bio_init);
264 276
265/** 277/**
278 * bio_reset - reinitialize a bio
279 * @bio: bio to reset
280 *
281 * Description:
282 * After calling bio_reset(), @bio will be in the same state as a freshly
283 * allocated bio returned bio bio_alloc_bioset() - the only fields that are
284 * preserved are the ones that are initialized by bio_alloc_bioset(). See
285 * comment in struct bio.
286 */
287void bio_reset(struct bio *bio)
288{
289 unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS);
290
291 __bio_free(bio);
292
293 memset(bio, 0, BIO_RESET_BYTES);
294 bio->bi_flags = flags|(1 << BIO_UPTODATE);
295}
296EXPORT_SYMBOL(bio_reset);
297
298/**
266 * bio_alloc_bioset - allocate a bio for I/O 299 * bio_alloc_bioset - allocate a bio for I/O
267 * @gfp_mask: the GFP_ mask given to the slab allocator 300 * @gfp_mask: the GFP_ mask given to the slab allocator
268 * @nr_iovecs: number of iovecs to pre-allocate 301 * @nr_iovecs: number of iovecs to pre-allocate
269 * @bs: the bio_set to allocate from. 302 * @bs: the bio_set to allocate from.
270 * 303 *
271 * Description: 304 * Description:
272 * bio_alloc_bioset will try its own mempool to satisfy the allocation. 305 * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is
273 * If %__GFP_WAIT is set then we will block on the internal pool waiting 306 * backed by the @bs's mempool.
274 * for a &struct bio to become free.
275 * 307 *
276 * Note that the caller must set ->bi_destructor on successful return 308 * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be
277 * of a bio, to do the appropriate freeing of the bio once the reference 309 * able to allocate a bio. This is due to the mempool guarantees. To make this
278 * count drops to zero. 310 * work, callers must never allocate more than 1 bio at a time from this pool.
279 **/ 311 * Callers that need to allocate more than 1 bio must always submit the
312 * previously allocated bio for IO before attempting to allocate a new one.
313 * Failure to do so can cause deadlocks under memory pressure.
314 *
315 * RETURNS:
316 * Pointer to new bio on success, NULL on failure.
317 */
280struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) 318struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
281{ 319{
320 unsigned front_pad;
321 unsigned inline_vecs;
282 unsigned long idx = BIO_POOL_NONE; 322 unsigned long idx = BIO_POOL_NONE;
283 struct bio_vec *bvl = NULL; 323 struct bio_vec *bvl = NULL;
284 struct bio *bio; 324 struct bio *bio;
285 void *p; 325 void *p;
286 326
287 p = mempool_alloc(bs->bio_pool, gfp_mask); 327 if (!bs) {
328 if (nr_iovecs > UIO_MAXIOV)
329 return NULL;
330
331 p = kmalloc(sizeof(struct bio) +
332 nr_iovecs * sizeof(struct bio_vec),
333 gfp_mask);
334 front_pad = 0;
335 inline_vecs = nr_iovecs;
336 } else {
337 p = mempool_alloc(bs->bio_pool, gfp_mask);
338 front_pad = bs->front_pad;
339 inline_vecs = BIO_INLINE_VECS;
340 }
341
288 if (unlikely(!p)) 342 if (unlikely(!p))
289 return NULL; 343 return NULL;
290 bio = p + bs->front_pad;
291 344
345 bio = p + front_pad;
292 bio_init(bio); 346 bio_init(bio);
293 347
294 if (unlikely(!nr_iovecs)) 348 if (nr_iovecs > inline_vecs) {
295 goto out_set;
296
297 if (nr_iovecs <= BIO_INLINE_VECS) {
298 bvl = bio->bi_inline_vecs;
299 nr_iovecs = BIO_INLINE_VECS;
300 } else {
301 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); 349 bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
302 if (unlikely(!bvl)) 350 if (unlikely(!bvl))
303 goto err_free; 351 goto err_free;
304 352 } else if (nr_iovecs) {
305 nr_iovecs = bvec_nr_vecs(idx); 353 bvl = bio->bi_inline_vecs;
306 } 354 }
307out_set: 355
356 bio->bi_pool = bs;
308 bio->bi_flags |= idx << BIO_POOL_OFFSET; 357 bio->bi_flags |= idx << BIO_POOL_OFFSET;
309 bio->bi_max_vecs = nr_iovecs; 358 bio->bi_max_vecs = nr_iovecs;
310 bio->bi_io_vec = bvl; 359 bio->bi_io_vec = bvl;
@@ -316,80 +365,6 @@ err_free:
316} 365}
317EXPORT_SYMBOL(bio_alloc_bioset); 366EXPORT_SYMBOL(bio_alloc_bioset);
318 367
319static void bio_fs_destructor(struct bio *bio)
320{
321 bio_free(bio, fs_bio_set);
322}
323
324/**
325 * bio_alloc - allocate a new bio, memory pool backed
326 * @gfp_mask: allocation mask to use
327 * @nr_iovecs: number of iovecs
328 *
329 * bio_alloc will allocate a bio and associated bio_vec array that can hold
330 * at least @nr_iovecs entries. Allocations will be done from the
331 * fs_bio_set. Also see @bio_alloc_bioset and @bio_kmalloc.
332 *
333 * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate
334 * a bio. This is due to the mempool guarantees. To make this work, callers
335 * must never allocate more than 1 bio at a time from this pool. Callers
336 * that need to allocate more than 1 bio must always submit the previously
337 * allocated bio for IO before attempting to allocate a new one. Failure to
338 * do so can cause livelocks under memory pressure.
339 *
340 * RETURNS:
341 * Pointer to new bio on success, NULL on failure.
342 */
343struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
344{
345 struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
346
347 if (bio)
348 bio->bi_destructor = bio_fs_destructor;
349
350 return bio;
351}
352EXPORT_SYMBOL(bio_alloc);
353
354static void bio_kmalloc_destructor(struct bio *bio)
355{
356 if (bio_integrity(bio))
357 bio_integrity_free(bio, fs_bio_set);
358 kfree(bio);
359}
360
361/**
362 * bio_kmalloc - allocate a bio for I/O using kmalloc()
363 * @gfp_mask: the GFP_ mask given to the slab allocator
364 * @nr_iovecs: number of iovecs to pre-allocate
365 *
366 * Description:
367 * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask contains
368 * %__GFP_WAIT, the allocation is guaranteed to succeed.
369 *
370 **/
371struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
372{
373 struct bio *bio;
374
375 if (nr_iovecs > UIO_MAXIOV)
376 return NULL;
377
378 bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
379 gfp_mask);
380 if (unlikely(!bio))
381 return NULL;
382
383 bio_init(bio);
384 bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET;
385 bio->bi_max_vecs = nr_iovecs;
386 bio->bi_io_vec = bio->bi_inline_vecs;
387 bio->bi_destructor = bio_kmalloc_destructor;
388
389 return bio;
390}
391EXPORT_SYMBOL(bio_kmalloc);
392
393void zero_fill_bio(struct bio *bio) 368void zero_fill_bio(struct bio *bio)
394{ 369{
395 unsigned long flags; 370 unsigned long flags;
@@ -420,11 +395,8 @@ void bio_put(struct bio *bio)
420 /* 395 /*
421 * last put frees it 396 * last put frees it
422 */ 397 */
423 if (atomic_dec_and_test(&bio->bi_cnt)) { 398 if (atomic_dec_and_test(&bio->bi_cnt))
424 bio_disassociate_task(bio); 399 bio_free(bio);
425 bio->bi_next = NULL;
426 bio->bi_destructor(bio);
427 }
428} 400}
429EXPORT_SYMBOL(bio_put); 401EXPORT_SYMBOL(bio_put);
430 402
@@ -466,26 +438,28 @@ void __bio_clone(struct bio *bio, struct bio *bio_src)
466EXPORT_SYMBOL(__bio_clone); 438EXPORT_SYMBOL(__bio_clone);
467 439
468/** 440/**
469 * bio_clone - clone a bio 441 * bio_clone_bioset - clone a bio
470 * @bio: bio to clone 442 * @bio: bio to clone
471 * @gfp_mask: allocation priority 443 * @gfp_mask: allocation priority
444 * @bs: bio_set to allocate from
472 * 445 *
473 * Like __bio_clone, only also allocates the returned bio 446 * Like __bio_clone, only also allocates the returned bio
474 */ 447 */
475struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) 448struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask,
449 struct bio_set *bs)
476{ 450{
477 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); 451 struct bio *b;
478 452
453 b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs);
479 if (!b) 454 if (!b)
480 return NULL; 455 return NULL;
481 456
482 b->bi_destructor = bio_fs_destructor;
483 __bio_clone(b, bio); 457 __bio_clone(b, bio);
484 458
485 if (bio_integrity(bio)) { 459 if (bio_integrity(bio)) {
486 int ret; 460 int ret;
487 461
488 ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); 462 ret = bio_integrity_clone(b, bio, gfp_mask);
489 463
490 if (ret < 0) { 464 if (ret < 0) {
491 bio_put(b); 465 bio_put(b);
@@ -495,7 +469,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
495 469
496 return b; 470 return b;
497} 471}
498EXPORT_SYMBOL(bio_clone); 472EXPORT_SYMBOL(bio_clone_bioset);
499 473
500/** 474/**
501 * bio_get_nr_vecs - return approx number of vecs 475 * bio_get_nr_vecs - return approx number of vecs
@@ -1501,7 +1475,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1501 trace_block_split(bdev_get_queue(bi->bi_bdev), bi, 1475 trace_block_split(bdev_get_queue(bi->bi_bdev), bi,
1502 bi->bi_sector + first_sectors); 1476 bi->bi_sector + first_sectors);
1503 1477
1504 BUG_ON(bi->bi_vcnt != 1); 1478 BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0);
1505 BUG_ON(bi->bi_idx != 0); 1479 BUG_ON(bi->bi_idx != 0);
1506 atomic_set(&bp->cnt, 3); 1480 atomic_set(&bp->cnt, 3);
1507 bp->error = 0; 1481 bp->error = 0;
@@ -1511,17 +1485,22 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors)
1511 bp->bio2.bi_size -= first_sectors << 9; 1485 bp->bio2.bi_size -= first_sectors << 9;
1512 bp->bio1.bi_size = first_sectors << 9; 1486 bp->bio1.bi_size = first_sectors << 9;
1513 1487
1514 bp->bv1 = bi->bi_io_vec[0]; 1488 if (bi->bi_vcnt != 0) {
1515 bp->bv2 = bi->bi_io_vec[0]; 1489 bp->bv1 = bi->bi_io_vec[0];
1516 bp->bv2.bv_offset += first_sectors << 9; 1490 bp->bv2 = bi->bi_io_vec[0];
1517 bp->bv2.bv_len -= first_sectors << 9; 1491
1518 bp->bv1.bv_len = first_sectors << 9; 1492 if (bio_is_rw(bi)) {
1493 bp->bv2.bv_offset += first_sectors << 9;
1494 bp->bv2.bv_len -= first_sectors << 9;
1495 bp->bv1.bv_len = first_sectors << 9;
1496 }
1519 1497
1520 bp->bio1.bi_io_vec = &bp->bv1; 1498 bp->bio1.bi_io_vec = &bp->bv1;
1521 bp->bio2.bi_io_vec = &bp->bv2; 1499 bp->bio2.bi_io_vec = &bp->bv2;
1522 1500
1523 bp->bio1.bi_max_vecs = 1; 1501 bp->bio1.bi_max_vecs = 1;
1524 bp->bio2.bi_max_vecs = 1; 1502 bp->bio2.bi_max_vecs = 1;
1503 }
1525 1504
1526 bp->bio1.bi_end_io = bio_pair_end_1; 1505 bp->bio1.bi_end_io = bio_pair_end_1;
1527 bp->bio2.bi_end_io = bio_pair_end_2; 1506 bp->bio2.bi_end_io = bio_pair_end_2;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 38e721b35d45..b3c1d3dae77d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -116,6 +116,8 @@ EXPORT_SYMBOL(invalidate_bdev);
116 116
117int set_blocksize(struct block_device *bdev, int size) 117int set_blocksize(struct block_device *bdev, int size)
118{ 118{
119 struct address_space *mapping;
120
119 /* Size must be a power of two, and between 512 and PAGE_SIZE */ 121 /* Size must be a power of two, and between 512 and PAGE_SIZE */
120 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) 122 if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
121 return -EINVAL; 123 return -EINVAL;
@@ -124,6 +126,19 @@ int set_blocksize(struct block_device *bdev, int size)
124 if (size < bdev_logical_block_size(bdev)) 126 if (size < bdev_logical_block_size(bdev))
125 return -EINVAL; 127 return -EINVAL;
126 128
129 /* Prevent starting I/O or mapping the device */
130 percpu_down_write(&bdev->bd_block_size_semaphore);
131
132 /* Check that the block device is not memory mapped */
133 mapping = bdev->bd_inode->i_mapping;
134 mutex_lock(&mapping->i_mmap_mutex);
135 if (mapping_mapped(mapping)) {
136 mutex_unlock(&mapping->i_mmap_mutex);
137 percpu_up_write(&bdev->bd_block_size_semaphore);
138 return -EBUSY;
139 }
140 mutex_unlock(&mapping->i_mmap_mutex);
141
127 /* Don't change the size if it is same as current */ 142 /* Don't change the size if it is same as current */
128 if (bdev->bd_block_size != size) { 143 if (bdev->bd_block_size != size) {
129 sync_blockdev(bdev); 144 sync_blockdev(bdev);
@@ -131,6 +146,9 @@ int set_blocksize(struct block_device *bdev, int size)
131 bdev->bd_inode->i_blkbits = blksize_bits(size); 146 bdev->bd_inode->i_blkbits = blksize_bits(size);
132 kill_bdev(bdev); 147 kill_bdev(bdev);
133 } 148 }
149
150 percpu_up_write(&bdev->bd_block_size_semaphore);
151
134 return 0; 152 return 0;
135} 153}
136 154
@@ -441,6 +459,12 @@ static struct inode *bdev_alloc_inode(struct super_block *sb)
441 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); 459 struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
442 if (!ei) 460 if (!ei)
443 return NULL; 461 return NULL;
462
463 if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) {
464 kmem_cache_free(bdev_cachep, ei);
465 return NULL;
466 }
467
444 return &ei->vfs_inode; 468 return &ei->vfs_inode;
445} 469}
446 470
@@ -449,6 +473,8 @@ static void bdev_i_callback(struct rcu_head *head)
449 struct inode *inode = container_of(head, struct inode, i_rcu); 473 struct inode *inode = container_of(head, struct inode, i_rcu);
450 struct bdev_inode *bdi = BDEV_I(inode); 474 struct bdev_inode *bdi = BDEV_I(inode);
451 475
476 percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore);
477
452 kmem_cache_free(bdev_cachep, bdi); 478 kmem_cache_free(bdev_cachep, bdi);
453} 479}
454 480
@@ -1567,6 +1593,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
1567 return blkdev_ioctl(bdev, mode, cmd, arg); 1593 return blkdev_ioctl(bdev, mode, cmd, arg);
1568} 1594}
1569 1595
1596ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
1597 unsigned long nr_segs, loff_t pos)
1598{
1599 ssize_t ret;
1600 struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
1601
1602 percpu_down_read(&bdev->bd_block_size_semaphore);
1603
1604 ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
1605
1606 percpu_up_read(&bdev->bd_block_size_semaphore);
1607
1608 return ret;
1609}
1610EXPORT_SYMBOL_GPL(blkdev_aio_read);
1611
1570/* 1612/*
1571 * Write data to the block device. Only intended for the block device itself 1613 * Write data to the block device. Only intended for the block device itself
1572 * and the raw driver which basically is a fake block device. 1614 * and the raw driver which basically is a fake block device.
@@ -1578,12 +1620,16 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1578 unsigned long nr_segs, loff_t pos) 1620 unsigned long nr_segs, loff_t pos)
1579{ 1621{
1580 struct file *file = iocb->ki_filp; 1622 struct file *file = iocb->ki_filp;
1623 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1581 struct blk_plug plug; 1624 struct blk_plug plug;
1582 ssize_t ret; 1625 ssize_t ret;
1583 1626
1584 BUG_ON(iocb->ki_pos != pos); 1627 BUG_ON(iocb->ki_pos != pos);
1585 1628
1586 blk_start_plug(&plug); 1629 blk_start_plug(&plug);
1630
1631 percpu_down_read(&bdev->bd_block_size_semaphore);
1632
1587 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); 1633 ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
1588 if (ret > 0 || ret == -EIOCBQUEUED) { 1634 if (ret > 0 || ret == -EIOCBQUEUED) {
1589 ssize_t err; 1635 ssize_t err;
@@ -1592,11 +1638,29 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
1592 if (err < 0 && ret > 0) 1638 if (err < 0 && ret > 0)
1593 ret = err; 1639 ret = err;
1594 } 1640 }
1641
1642 percpu_up_read(&bdev->bd_block_size_semaphore);
1643
1595 blk_finish_plug(&plug); 1644 blk_finish_plug(&plug);
1645
1596 return ret; 1646 return ret;
1597} 1647}
1598EXPORT_SYMBOL_GPL(blkdev_aio_write); 1648EXPORT_SYMBOL_GPL(blkdev_aio_write);
1599 1649
1650static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
1651{
1652 int ret;
1653 struct block_device *bdev = I_BDEV(file->f_mapping->host);
1654
1655 percpu_down_read(&bdev->bd_block_size_semaphore);
1656
1657 ret = generic_file_mmap(file, vma);
1658
1659 percpu_up_read(&bdev->bd_block_size_semaphore);
1660
1661 return ret;
1662}
1663
1600/* 1664/*
1601 * Try to release a page associated with block device when the system 1665 * Try to release a page associated with block device when the system
1602 * is under memory pressure. 1666 * is under memory pressure.
@@ -1627,9 +1691,9 @@ const struct file_operations def_blk_fops = {
1627 .llseek = block_llseek, 1691 .llseek = block_llseek,
1628 .read = do_sync_read, 1692 .read = do_sync_read,
1629 .write = do_sync_write, 1693 .write = do_sync_write,
1630 .aio_read = generic_file_aio_read, 1694 .aio_read = blkdev_aio_read,
1631 .aio_write = blkdev_aio_write, 1695 .aio_write = blkdev_aio_write,
1632 .mmap = generic_file_mmap, 1696 .mmap = blkdev_mmap,
1633 .fsync = blkdev_fsync, 1697 .fsync = blkdev_fsync,
1634 .unlocked_ioctl = block_ioctl, 1698 .unlocked_ioctl = block_ioctl,
1635#ifdef CONFIG_COMPAT 1699#ifdef CONFIG_COMPAT
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 1585db1aa365..f936cb50dc0d 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -814,8 +814,8 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
814 struct bio *bio; 814 struct bio *bio;
815 815
816 if (per_dev != master_dev) { 816 if (per_dev != master_dev) {
817 bio = bio_kmalloc(GFP_KERNEL, 817 bio = bio_clone_kmalloc(master_dev->bio,
818 master_dev->bio->bi_max_vecs); 818 GFP_KERNEL);
819 if (unlikely(!bio)) { 819 if (unlikely(!bio)) {
820 ORE_DBGMSG( 820 ORE_DBGMSG(
821 "Failed to allocate BIO size=%u\n", 821 "Failed to allocate BIO size=%u\n",
@@ -824,7 +824,6 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
824 goto out; 824 goto out;
825 } 825 }
826 826
827 __bio_clone(bio, master_dev->bio);
828 bio->bi_bdev = NULL; 827 bio->bi_bdev = NULL;
829 bio->bi_next = NULL; 828 bio->bi_next = NULL;
830 per_dev->offset = master_dev->offset; 829 per_dev->offset = master_dev->offset;
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 26435890dc87..820e7aaad4fd 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -212,20 +212,41 @@ extern void bio_pair_release(struct bio_pair *dbio);
212extern struct bio_set *bioset_create(unsigned int, unsigned int); 212extern struct bio_set *bioset_create(unsigned int, unsigned int);
213extern void bioset_free(struct bio_set *); 213extern void bioset_free(struct bio_set *);
214 214
215extern struct bio *bio_alloc(gfp_t, unsigned int);
216extern struct bio *bio_kmalloc(gfp_t, unsigned int);
217extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); 215extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *);
218extern void bio_put(struct bio *); 216extern void bio_put(struct bio *);
219extern void bio_free(struct bio *, struct bio_set *); 217
218extern void __bio_clone(struct bio *, struct bio *);
219extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
220
221extern struct bio_set *fs_bio_set;
222
223static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
224{
225 return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set);
226}
227
228static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
229{
230 return bio_clone_bioset(bio, gfp_mask, fs_bio_set);
231}
232
233static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs)
234{
235 return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL);
236}
237
238static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
239{
240 return bio_clone_bioset(bio, gfp_mask, NULL);
241
242}
220 243
221extern void bio_endio(struct bio *, int); 244extern void bio_endio(struct bio *, int);
222struct request_queue; 245struct request_queue;
223extern int bio_phys_segments(struct request_queue *, struct bio *); 246extern int bio_phys_segments(struct request_queue *, struct bio *);
224 247
225extern void __bio_clone(struct bio *, struct bio *);
226extern struct bio *bio_clone(struct bio *, gfp_t);
227
228extern void bio_init(struct bio *); 248extern void bio_init(struct bio *);
249extern void bio_reset(struct bio *);
229 250
230extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); 251extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
231extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, 252extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
@@ -304,8 +325,6 @@ struct biovec_slab {
304 struct kmem_cache *slab; 325 struct kmem_cache *slab;
305}; 326};
306 327
307extern struct bio_set *fs_bio_set;
308
309/* 328/*
310 * a small number of entries is fine, not going to be performance critical. 329 * a small number of entries is fine, not going to be performance critical.
311 * basically we just need to survive 330 * basically we just need to survive
@@ -367,9 +386,31 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx,
367/* 386/*
368 * Check whether this bio carries any data or not. A NULL bio is allowed. 387 * Check whether this bio carries any data or not. A NULL bio is allowed.
369 */ 388 */
370static inline int bio_has_data(struct bio *bio) 389static inline bool bio_has_data(struct bio *bio)
371{ 390{
372 return bio && bio->bi_io_vec != NULL; 391 if (bio && bio->bi_vcnt)
392 return true;
393
394 return false;
395}
396
397static inline bool bio_is_rw(struct bio *bio)
398{
399 if (!bio_has_data(bio))
400 return false;
401
402 if (bio->bi_rw & REQ_WRITE_SAME)
403 return false;
404
405 return true;
406}
407
408static inline bool bio_mergeable(struct bio *bio)
409{
410 if (bio->bi_rw & REQ_NOMERGE_FLAGS)
411 return false;
412
413 return true;
373} 414}
374 415
375/* 416/*
@@ -505,9 +546,8 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
505 546
506#define bio_integrity(bio) (bio->bi_integrity != NULL) 547#define bio_integrity(bio) (bio->bi_integrity != NULL)
507 548
508extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *);
509extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); 549extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
510extern void bio_integrity_free(struct bio *, struct bio_set *); 550extern void bio_integrity_free(struct bio *);
511extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); 551extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
512extern int bio_integrity_enabled(struct bio *bio); 552extern int bio_integrity_enabled(struct bio *bio);
513extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); 553extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
@@ -517,7 +557,7 @@ extern void bio_integrity_endio(struct bio *, int);
517extern void bio_integrity_advance(struct bio *, unsigned int); 557extern void bio_integrity_advance(struct bio *, unsigned int);
518extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); 558extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int);
519extern void bio_integrity_split(struct bio *, struct bio_pair *, int); 559extern void bio_integrity_split(struct bio *, struct bio_pair *, int);
520extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *); 560extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t);
521extern int bioset_integrity_create(struct bio_set *, int); 561extern int bioset_integrity_create(struct bio_set *, int);
522extern void bioset_integrity_free(struct bio_set *); 562extern void bioset_integrity_free(struct bio_set *);
523extern void bio_integrity_init(void); 563extern void bio_integrity_init(void);
@@ -549,13 +589,13 @@ static inline int bio_integrity_prep(struct bio *bio)
549 return 0; 589 return 0;
550} 590}
551 591
552static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs) 592static inline void bio_integrity_free(struct bio *bio)
553{ 593{
554 return; 594 return;
555} 595}
556 596
557static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, 597static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
558 gfp_t gfp_mask, struct bio_set *bs) 598 gfp_t gfp_mask)
559{ 599{
560 return 0; 600 return 0;
561} 601}
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 7b7ac9ccec7a..cdf11191e645 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -59,12 +59,6 @@ struct bio {
59 unsigned int bi_seg_front_size; 59 unsigned int bi_seg_front_size;
60 unsigned int bi_seg_back_size; 60 unsigned int bi_seg_back_size;
61 61
62 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
63
64 atomic_t bi_cnt; /* pin count */
65
66 struct bio_vec *bi_io_vec; /* the actual vec list */
67
68 bio_end_io_t *bi_end_io; 62 bio_end_io_t *bi_end_io;
69 63
70 void *bi_private; 64 void *bi_private;
@@ -80,7 +74,17 @@ struct bio {
80 struct bio_integrity_payload *bi_integrity; /* data integrity */ 74 struct bio_integrity_payload *bi_integrity; /* data integrity */
81#endif 75#endif
82 76
83 bio_destructor_t *bi_destructor; /* destructor */ 77 /*
78 * Everything starting with bi_max_vecs will be preserved by bio_reset()
79 */
80
81 unsigned int bi_max_vecs; /* max bvl_vecs we can hold */
82
83 atomic_t bi_cnt; /* pin count */
84
85 struct bio_vec *bi_io_vec; /* the actual vec list */
86
87 struct bio_set *bi_pool;
84 88
85 /* 89 /*
86 * We can inline a number of vecs at the end of the bio, to avoid 90 * We can inline a number of vecs at the end of the bio, to avoid
@@ -90,6 +94,8 @@ struct bio {
90 struct bio_vec bi_inline_vecs[0]; 94 struct bio_vec bi_inline_vecs[0];
91}; 95};
92 96
97#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs)
98
93/* 99/*
94 * bio flags 100 * bio flags
95 */ 101 */
@@ -105,6 +111,13 @@ struct bio {
105#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ 111#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */
106#define BIO_QUIET 10 /* Make BIO Quiet */ 112#define BIO_QUIET 10 /* Make BIO Quiet */
107#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ 113#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
114
115/*
116 * Flags starting here get preserved by bio_reset() - this includes
117 * BIO_POOL_IDX()
118 */
119#define BIO_RESET_BITS 12
120
108#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) 121#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
109 122
110/* 123/*
@@ -134,6 +147,7 @@ enum rq_flag_bits {
134 __REQ_PRIO, /* boost priority in cfq */ 147 __REQ_PRIO, /* boost priority in cfq */
135 __REQ_DISCARD, /* request to discard sectors */ 148 __REQ_DISCARD, /* request to discard sectors */
136 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ 149 __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */
150 __REQ_WRITE_SAME, /* write same block many times */
137 151
138 __REQ_NOIDLE, /* don't anticipate more IO after this one */ 152 __REQ_NOIDLE, /* don't anticipate more IO after this one */
139 __REQ_FUA, /* forced unit access */ 153 __REQ_FUA, /* forced unit access */
@@ -172,15 +186,21 @@ enum rq_flag_bits {
172#define REQ_META (1 << __REQ_META) 186#define REQ_META (1 << __REQ_META)
173#define REQ_PRIO (1 << __REQ_PRIO) 187#define REQ_PRIO (1 << __REQ_PRIO)
174#define REQ_DISCARD (1 << __REQ_DISCARD) 188#define REQ_DISCARD (1 << __REQ_DISCARD)
189#define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME)
175#define REQ_NOIDLE (1 << __REQ_NOIDLE) 190#define REQ_NOIDLE (1 << __REQ_NOIDLE)
176 191
177#define REQ_FAILFAST_MASK \ 192#define REQ_FAILFAST_MASK \
178 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) 193 (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
179#define REQ_COMMON_MASK \ 194#define REQ_COMMON_MASK \
180 (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ 195 (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
181 REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) 196 REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
197 REQ_SECURE)
182#define REQ_CLONE_MASK REQ_COMMON_MASK 198#define REQ_CLONE_MASK REQ_COMMON_MASK
183 199
200/* This mask is used for both bio and request merge checking */
201#define REQ_NOMERGE_FLAGS \
202 (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA)
203
184#define REQ_RAHEAD (1 << __REQ_RAHEAD) 204#define REQ_RAHEAD (1 << __REQ_RAHEAD)
185#define REQ_THROTTLED (1 << __REQ_THROTTLED) 205#define REQ_THROTTLED (1 << __REQ_THROTTLED)
186 206
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4a2ab7c85393..1756001210d2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -270,6 +270,7 @@ struct queue_limits {
270 unsigned int io_min; 270 unsigned int io_min;
271 unsigned int io_opt; 271 unsigned int io_opt;
272 unsigned int max_discard_sectors; 272 unsigned int max_discard_sectors;
273 unsigned int max_write_same_sectors;
273 unsigned int discard_granularity; 274 unsigned int discard_granularity;
274 unsigned int discard_alignment; 275 unsigned int discard_alignment;
275 276
@@ -540,8 +541,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
540 541
541#define blk_account_rq(rq) \ 542#define blk_account_rq(rq) \
542 (((rq)->cmd_flags & REQ_STARTED) && \ 543 (((rq)->cmd_flags & REQ_STARTED) && \
543 ((rq)->cmd_type == REQ_TYPE_FS || \ 544 ((rq)->cmd_type == REQ_TYPE_FS))
544 ((rq)->cmd_flags & REQ_DISCARD)))
545 545
546#define blk_pm_request(rq) \ 546#define blk_pm_request(rq) \
547 ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ 547 ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \
@@ -595,17 +595,39 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync)
595 rl->flags &= ~flag; 595 rl->flags &= ~flag;
596} 596}
597 597
598static inline bool rq_mergeable(struct request *rq)
599{
600 if (rq->cmd_type != REQ_TYPE_FS)
601 return false;
598 602
599/* 603 if (rq->cmd_flags & REQ_NOMERGE_FLAGS)
600 * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may 604 return false;
601 * it already be started by driver. 605
602 */ 606 return true;
603#define RQ_NOMERGE_FLAGS \ 607}
604 (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD) 608
605#define rq_mergeable(rq) \ 609static inline bool blk_check_merge_flags(unsigned int flags1,
606 (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ 610 unsigned int flags2)
607 (((rq)->cmd_flags & REQ_DISCARD) || \ 611{
608 (rq)->cmd_type == REQ_TYPE_FS)) 612 if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD))
613 return false;
614
615 if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE))
616 return false;
617
618 if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME))
619 return false;
620
621 return true;
622}
623
624static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b)
625{
626 if (bio_data(a) == bio_data(b))
627 return true;
628
629 return false;
630}
609 631
610/* 632/*
611 * q->prep_rq_fn return values 633 * q->prep_rq_fn return values
@@ -802,6 +824,28 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
802 return blk_rq_cur_bytes(rq) >> 9; 824 return blk_rq_cur_bytes(rq) >> 9;
803} 825}
804 826
827static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
828 unsigned int cmd_flags)
829{
830 if (unlikely(cmd_flags & REQ_DISCARD))
831 return q->limits.max_discard_sectors;
832
833 if (unlikely(cmd_flags & REQ_WRITE_SAME))
834 return q->limits.max_write_same_sectors;
835
836 return q->limits.max_sectors;
837}
838
839static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
840{
841 struct request_queue *q = rq->q;
842
843 if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
844 return q->limits.max_hw_sectors;
845
846 return blk_queue_get_max_sectors(q, rq->cmd_flags);
847}
848
805/* 849/*
806 * Request issue related functions. 850 * Request issue related functions.
807 */ 851 */
@@ -857,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short);
857extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); 901extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
858extern void blk_queue_max_discard_sectors(struct request_queue *q, 902extern void blk_queue_max_discard_sectors(struct request_queue *q,
859 unsigned int max_discard_sectors); 903 unsigned int max_discard_sectors);
904extern void blk_queue_max_write_same_sectors(struct request_queue *q,
905 unsigned int max_write_same_sectors);
860extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); 906extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
861extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); 907extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
862extern void blk_queue_alignment_offset(struct request_queue *q, 908extern void blk_queue_alignment_offset(struct request_queue *q,
@@ -987,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
987extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); 1033extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
988extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 1034extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
989 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); 1035 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
1036extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
1037 sector_t nr_sects, gfp_t gfp_mask, struct page *page);
990extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 1038extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
991 sector_t nr_sects, gfp_t gfp_mask); 1039 sector_t nr_sects, gfp_t gfp_mask);
992static inline int sb_issue_discard(struct super_block *sb, sector_t block, 1040static inline int sb_issue_discard(struct super_block *sb, sector_t block,
@@ -1164,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev)
1164 return queue_discard_zeroes_data(bdev_get_queue(bdev)); 1212 return queue_discard_zeroes_data(bdev_get_queue(bdev));
1165} 1213}
1166 1214
1215static inline unsigned int bdev_write_same(struct block_device *bdev)
1216{
1217 struct request_queue *q = bdev_get_queue(bdev);
1218
1219 if (q)
1220 return q->limits.max_write_same_sectors;
1221
1222 return 0;
1223}
1224
1167static inline int queue_dma_alignment(struct request_queue *q) 1225static inline int queue_dma_alignment(struct request_queue *q)
1168{ 1226{
1169 return q ? q->dma_alignment : 511; 1227 return q ? q->dma_alignment : 511;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c617ed024df8..39f3e12ca752 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -335,6 +335,7 @@ struct inodes_stat_t {
335#define BLKDISCARDZEROES _IO(0x12,124) 335#define BLKDISCARDZEROES _IO(0x12,124)
336#define BLKSECDISCARD _IO(0x12,125) 336#define BLKSECDISCARD _IO(0x12,125)
337#define BLKROTATIONAL _IO(0x12,126) 337#define BLKROTATIONAL _IO(0x12,126)
338#define BLKZEROOUT _IO(0x12,127)
338 339
339#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ 340#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
340#define FIBMAP _IO(0x00,1) /* bmap access */ 341#define FIBMAP _IO(0x00,1) /* bmap access */
@@ -415,6 +416,7 @@ struct inodes_stat_t {
415#include <linux/migrate_mode.h> 416#include <linux/migrate_mode.h>
416#include <linux/uidgid.h> 417#include <linux/uidgid.h>
417#include <linux/lockdep.h> 418#include <linux/lockdep.h>
419#include <linux/percpu-rwsem.h>
418 420
419#include <asm/byteorder.h> 421#include <asm/byteorder.h>
420 422
@@ -724,6 +726,8 @@ struct block_device {
724 int bd_fsfreeze_count; 726 int bd_fsfreeze_count;
725 /* Mutex for freeze */ 727 /* Mutex for freeze */
726 struct mutex bd_fsfreeze_mutex; 728 struct mutex bd_fsfreeze_mutex;
729 /* A semaphore that prevents I/O while block size is being changed */
730 struct percpu_rw_semaphore bd_block_size_semaphore;
727}; 731};
728 732
729/* 733/*
@@ -2570,6 +2574,8 @@ extern int generic_segment_checks(const struct iovec *iov,
2570 unsigned long *nr_segs, size_t *count, int access_flags); 2574 unsigned long *nr_segs, size_t *count, int access_flags);
2571 2575
2572/* fs/block_dev.c */ 2576/* fs/block_dev.c */
2577extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
2578 unsigned long nr_segs, loff_t pos);
2573extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, 2579extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
2574 unsigned long nr_segs, loff_t pos); 2580 unsigned long nr_segs, loff_t pos);
2575extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, 2581extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
new file mode 100644
index 000000000000..cf80f7e5277f
--- /dev/null
+++ b/include/linux/percpu-rwsem.h
@@ -0,0 +1,89 @@
1#ifndef _LINUX_PERCPU_RWSEM_H
2#define _LINUX_PERCPU_RWSEM_H
3
4#include <linux/mutex.h>
5#include <linux/percpu.h>
6#include <linux/rcupdate.h>
7#include <linux/delay.h>
8
9struct percpu_rw_semaphore {
10 unsigned __percpu *counters;
11 bool locked;
12 struct mutex mtx;
13};
14
15static inline void percpu_down_read(struct percpu_rw_semaphore *p)
16{
17 rcu_read_lock();
18 if (unlikely(p->locked)) {
19 rcu_read_unlock();
20 mutex_lock(&p->mtx);
21 this_cpu_inc(*p->counters);
22 mutex_unlock(&p->mtx);
23 return;
24 }
25 this_cpu_inc(*p->counters);
26 rcu_read_unlock();
27}
28
29static inline void percpu_up_read(struct percpu_rw_semaphore *p)
30{
31 /*
32 * On X86, write operation in this_cpu_dec serves as a memory unlock
33 * barrier (i.e. memory accesses may be moved before the write, but
34 * no memory accesses are moved past the write).
35 * On other architectures this may not be the case, so we need smp_mb()
36 * there.
37 */
38#if defined(CONFIG_X86) && (!defined(CONFIG_X86_PPRO_FENCE) && !defined(CONFIG_X86_OOSTORE))
39 barrier();
40#else
41 smp_mb();
42#endif
43 this_cpu_dec(*p->counters);
44}
45
46static inline unsigned __percpu_count(unsigned __percpu *counters)
47{
48 unsigned total = 0;
49 int cpu;
50
51 for_each_possible_cpu(cpu)
52 total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu));
53
54 return total;
55}
56
57static inline void percpu_down_write(struct percpu_rw_semaphore *p)
58{
59 mutex_lock(&p->mtx);
60 p->locked = true;
61 synchronize_rcu();
62 while (__percpu_count(p->counters))
63 msleep(1);
64 smp_rmb(); /* paired with smp_mb() in percpu_sem_up_read() */
65}
66
67static inline void percpu_up_write(struct percpu_rw_semaphore *p)
68{
69 p->locked = false;
70 mutex_unlock(&p->mtx);
71}
72
73static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p)
74{
75 p->counters = alloc_percpu(unsigned);
76 if (unlikely(!p->counters))
77 return -ENOMEM;
78 p->locked = false;
79 mutex_init(&p->mtx);
80 return 0;
81}
82
83static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p)
84{
85 free_percpu(p->counters);
86 p->counters = NULL; /* catch use after free bugs */
87}
88
89#endif
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 7b600da9a635..4bd6c06eb28e 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -201,6 +201,7 @@ static inline void *sg_virt(struct scatterlist *sg)
201 return page_address(sg_page(sg)) + sg->offset; 201 return page_address(sg_page(sg)) + sg->offset;
202} 202}
203 203
204int sg_nents(struct scatterlist *sg);
204struct scatterlist *sg_next(struct scatterlist *); 205struct scatterlist *sg_next(struct scatterlist *);
205struct scatterlist *sg_last(struct scatterlist *s, unsigned int); 206struct scatterlist *sg_last(struct scatterlist *s, unsigned int);
206void sg_init_table(struct scatterlist *, unsigned int); 207void sg_init_table(struct scatterlist *, unsigned int);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index e76d85cf3175..3675452b23ca 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -39,6 +39,25 @@ struct scatterlist *sg_next(struct scatterlist *sg)
39EXPORT_SYMBOL(sg_next); 39EXPORT_SYMBOL(sg_next);
40 40
41/** 41/**
42 * sg_nents - return total count of entries in scatterlist
43 * @sg: The scatterlist
44 *
45 * Description:
46 * Allows to know how many entries are in sg, taking into acount
47 * chaining as well
48 *
49 **/
50int sg_nents(struct scatterlist *sg)
51{
52 int nents;
53 for (nents = 0; sg; sg = sg_next(sg))
54 nents++;
55 return nents;
56}
57EXPORT_SYMBOL(sg_nents);
58
59
60/**
42 * sg_last - return the last scatterlist entry in a list 61 * sg_last - return the last scatterlist entry in a list
43 * @sgl: First entry in the scatterlist 62 * @sgl: First entry in the scatterlist
44 * @nents: Number of entries in the scatterlist 63 * @nents: Number of entries in the scatterlist