diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-10 20:04:23 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-10 20:04:23 -0400 |
commit | ce40be7a820bb393ac4ac69865f018d2f4038cf0 (patch) | |
tree | b1fe5a93346eb06f22b1c303d63ec5456d7212ab | |
parent | ba0a5a36f60e4c1152af3a2ae2813251974405bf (diff) | |
parent | 02f3939e1a9357b7c370a4a69717cf9c02452737 (diff) |
Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block
Pull block IO update from Jens Axboe:
"Core block IO bits for 3.7. Not a huge round this time, it contains:
- First series from Kent cleaning up and generalizing bio allocation
and freeing.
- WRITE_SAME support from Martin.
- Mikulas patches to prevent O_DIRECT crashes when someone changes
the block size of a device.
- Make bio_split() work on data-less bio's (like trim/discards).
- A few other minor fixups."
Fixed up silent semantic mis-merge as per Mikulas Patocka and Andrew
Morton. It is due to the VM no longer using a prio-tree (see commit
6b2dbba8b6ac: "mm: replace vma prio_tree with an interval tree").
So make set_blocksize() use mapping_mapped() instead of open-coding the
internal VM knowledge that has changed.
* 'for-3.7/core' of git://git.kernel.dk/linux-block: (26 commits)
block: makes bio_split support bio without data
scatterlist: refactor the sg_nents
scatterlist: add sg_nents
fs: fix include/percpu-rwsem.h export error
percpu-rw-semaphore: fix documentation typos
fs/block_dev.c:1644:5: sparse: symbol 'blkdev_mmap' was not declared
blockdev: turn a rw semaphore into a percpu rw semaphore
Fix a crash when block device is read and block size is changed at the same time
block: fix request_queue->flags initialization
block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue()
block: ioctl to zero block ranges
block: Make blkdev_issue_zeroout use WRITE SAME
block: Implement support for WRITE SAME
block: Consolidate command flag and queue limit checks for merges
block: Clean up special command handling logic
block/blk-tag.c: Remove useless kfree
block: remove the duplicated setting for congestion_threshold
block: reject invalid queue attribute values
block: Add bio_clone_bioset(), bio_clone_kmalloc()
block: Consolidate bio_alloc_bioset(), bio_kmalloc()
...
33 files changed, 770 insertions, 464 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index c1eb41cb9876..279da08f7541 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block | |||
@@ -206,3 +206,17 @@ Description: | |||
206 | when a discarded area is read the discard_zeroes_data | 206 | when a discarded area is read the discard_zeroes_data |
207 | parameter will be set to one. Otherwise it will be 0 and | 207 | parameter will be set to one. Otherwise it will be 0 and |
208 | the result of reading a discarded area is undefined. | 208 | the result of reading a discarded area is undefined. |
209 | |||
210 | What: /sys/block/<disk>/queue/write_same_max_bytes | ||
211 | Date: January 2012 | ||
212 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
213 | Description: | ||
214 | Some devices support a write same operation in which a | ||
215 | single data block can be written to a range of several | ||
216 | contiguous blocks on storage. This can be used to wipe | ||
217 | areas on disk or to initialize drives in a RAID | ||
218 | configuration. write_same_max_bytes indicates how many | ||
219 | bytes can be written in a single write same command. If | ||
220 | write_same_max_bytes is 0, write same is not supported | ||
221 | by the device. | ||
222 | |||
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index e418dc0a7086..8df5e8e6dceb 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt | |||
@@ -465,7 +465,6 @@ struct bio { | |||
465 | bio_end_io_t *bi_end_io; /* bi_end_io (bio) */ | 465 | bio_end_io_t *bi_end_io; /* bi_end_io (bio) */ |
466 | atomic_t bi_cnt; /* pin count: free when it hits zero */ | 466 | atomic_t bi_cnt; /* pin count: free when it hits zero */ |
467 | void *bi_private; | 467 | void *bi_private; |
468 | bio_destructor_t *bi_destructor; /* bi_destructor (bio) */ | ||
469 | }; | 468 | }; |
470 | 469 | ||
471 | With this multipage bio design: | 470 | With this multipage bio design: |
@@ -647,10 +646,6 @@ for a non-clone bio. There are the 6 pools setup for different size biovecs, | |||
647 | so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the | 646 | so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the |
648 | given size from these slabs. | 647 | given size from these slabs. |
649 | 648 | ||
650 | The bi_destructor() routine takes into account the possibility of the bio | ||
651 | having originated from a different source (see later discussions on | ||
652 | n/w to block transfers and kvec_cb) | ||
653 | |||
654 | The bio_get() routine may be used to hold an extra reference on a bio prior | 649 | The bio_get() routine may be used to hold an extra reference on a bio prior |
655 | to i/o submission, if the bio fields are likely to be accessed after the | 650 | to i/o submission, if the bio fields are likely to be accessed after the |
656 | i/o is issued (since the bio may otherwise get freed in case i/o completion | 651 | i/o is issued (since the bio may otherwise get freed in case i/o completion |
diff --git a/Documentation/percpu-rw-semaphore.txt b/Documentation/percpu-rw-semaphore.txt new file mode 100644 index 000000000000..7d3c82431909 --- /dev/null +++ b/Documentation/percpu-rw-semaphore.txt | |||
@@ -0,0 +1,27 @@ | |||
1 | Percpu rw semaphores | ||
2 | -------------------- | ||
3 | |||
4 | Percpu rw semaphores is a new read-write semaphore design that is | ||
5 | optimized for locking for reading. | ||
6 | |||
7 | The problem with traditional read-write semaphores is that when multiple | ||
8 | cores take the lock for reading, the cache line containing the semaphore | ||
9 | is bouncing between L1 caches of the cores, causing performance | ||
10 | degradation. | ||
11 | |||
12 | Locking for reading is very fast, it uses RCU and it avoids any atomic | ||
13 | instruction in the lock and unlock path. On the other hand, locking for | ||
14 | writing is very expensive, it calls synchronize_rcu() that can take | ||
15 | hundreds of milliseconds. | ||
16 | |||
17 | The lock is declared with "struct percpu_rw_semaphore" type. | ||
18 | The lock is initialized percpu_init_rwsem, it returns 0 on success and | ||
19 | -ENOMEM on allocation failure. | ||
20 | The lock must be freed with percpu_free_rwsem to avoid memory leak. | ||
21 | |||
22 | The lock is locked for read with percpu_down_read, percpu_up_read and | ||
23 | for write with percpu_down_write, percpu_up_write. | ||
24 | |||
25 | The idea of using RCU for optimized rw-lock was introduced by | ||
26 | Eric Dumazet <eric.dumazet@gmail.com>. | ||
27 | The code was written by Mikulas Patocka <mpatocka@redhat.com> | ||
diff --git a/block/blk-core.c b/block/blk-core.c index d2da64170513..a33870b1847b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -606,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
606 | /* | 606 | /* |
607 | * A queue starts its life with bypass turned on to avoid | 607 | * A queue starts its life with bypass turned on to avoid |
608 | * unnecessary bypass on/off overhead and nasty surprises during | 608 | * unnecessary bypass on/off overhead and nasty surprises during |
609 | * init. The initial bypass will be finished at the end of | 609 | * init. The initial bypass will be finished when the queue is |
610 | * blk_init_allocated_queue(). | 610 | * registered by blk_register_queue(). |
611 | */ | 611 | */ |
612 | q->bypass_depth = 1; | 612 | q->bypass_depth = 1; |
613 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | 613 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); |
@@ -694,7 +694,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | |||
694 | q->request_fn = rfn; | 694 | q->request_fn = rfn; |
695 | q->prep_rq_fn = NULL; | 695 | q->prep_rq_fn = NULL; |
696 | q->unprep_rq_fn = NULL; | 696 | q->unprep_rq_fn = NULL; |
697 | q->queue_flags = QUEUE_FLAG_DEFAULT; | 697 | q->queue_flags |= QUEUE_FLAG_DEFAULT; |
698 | 698 | ||
699 | /* Override internal queue lock with supplied lock pointer */ | 699 | /* Override internal queue lock with supplied lock pointer */ |
700 | if (lock) | 700 | if (lock) |
@@ -710,11 +710,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | |||
710 | /* init elevator */ | 710 | /* init elevator */ |
711 | if (elevator_init(q, NULL)) | 711 | if (elevator_init(q, NULL)) |
712 | return NULL; | 712 | return NULL; |
713 | |||
714 | blk_queue_congestion_threshold(q); | ||
715 | |||
716 | /* all done, end the initial bypass */ | ||
717 | blk_queue_bypass_end(q); | ||
718 | return q; | 713 | return q; |
719 | } | 714 | } |
720 | EXPORT_SYMBOL(blk_init_allocated_queue); | 715 | EXPORT_SYMBOL(blk_init_allocated_queue); |
@@ -1657,8 +1652,8 @@ generic_make_request_checks(struct bio *bio) | |||
1657 | goto end_io; | 1652 | goto end_io; |
1658 | } | 1653 | } |
1659 | 1654 | ||
1660 | if (unlikely(!(bio->bi_rw & REQ_DISCARD) && | 1655 | if (likely(bio_is_rw(bio) && |
1661 | nr_sectors > queue_max_hw_sectors(q))) { | 1656 | nr_sectors > queue_max_hw_sectors(q))) { |
1662 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", | 1657 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", |
1663 | bdevname(bio->bi_bdev, b), | 1658 | bdevname(bio->bi_bdev, b), |
1664 | bio_sectors(bio), | 1659 | bio_sectors(bio), |
@@ -1699,8 +1694,12 @@ generic_make_request_checks(struct bio *bio) | |||
1699 | 1694 | ||
1700 | if ((bio->bi_rw & REQ_DISCARD) && | 1695 | if ((bio->bi_rw & REQ_DISCARD) && |
1701 | (!blk_queue_discard(q) || | 1696 | (!blk_queue_discard(q) || |
1702 | ((bio->bi_rw & REQ_SECURE) && | 1697 | ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) { |
1703 | !blk_queue_secdiscard(q)))) { | 1698 | err = -EOPNOTSUPP; |
1699 | goto end_io; | ||
1700 | } | ||
1701 | |||
1702 | if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { | ||
1704 | err = -EOPNOTSUPP; | 1703 | err = -EOPNOTSUPP; |
1705 | goto end_io; | 1704 | goto end_io; |
1706 | } | 1705 | } |
@@ -1810,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request); | |||
1810 | */ | 1809 | */ |
1811 | void submit_bio(int rw, struct bio *bio) | 1810 | void submit_bio(int rw, struct bio *bio) |
1812 | { | 1811 | { |
1813 | int count = bio_sectors(bio); | ||
1814 | |||
1815 | bio->bi_rw |= rw; | 1812 | bio->bi_rw |= rw; |
1816 | 1813 | ||
1817 | /* | 1814 | /* |
1818 | * If it's a regular read/write or a barrier with data attached, | 1815 | * If it's a regular read/write or a barrier with data attached, |
1819 | * go through the normal accounting stuff before submission. | 1816 | * go through the normal accounting stuff before submission. |
1820 | */ | 1817 | */ |
1821 | if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { | 1818 | if (bio_has_data(bio)) { |
1819 | unsigned int count; | ||
1820 | |||
1821 | if (unlikely(rw & REQ_WRITE_SAME)) | ||
1822 | count = bdev_logical_block_size(bio->bi_bdev) >> 9; | ||
1823 | else | ||
1824 | count = bio_sectors(bio); | ||
1825 | |||
1822 | if (rw & WRITE) { | 1826 | if (rw & WRITE) { |
1823 | count_vm_events(PGPGOUT, count); | 1827 | count_vm_events(PGPGOUT, count); |
1824 | } else { | 1828 | } else { |
@@ -1864,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio); | |||
1864 | */ | 1868 | */ |
1865 | int blk_rq_check_limits(struct request_queue *q, struct request *rq) | 1869 | int blk_rq_check_limits(struct request_queue *q, struct request *rq) |
1866 | { | 1870 | { |
1867 | if (rq->cmd_flags & REQ_DISCARD) | 1871 | if (!rq_mergeable(rq)) |
1868 | return 0; | 1872 | return 0; |
1869 | 1873 | ||
1870 | if (blk_rq_sectors(rq) > queue_max_sectors(q) || | 1874 | if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) { |
1871 | blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { | ||
1872 | printk(KERN_ERR "%s: over max size limit.\n", __func__); | 1875 | printk(KERN_ERR "%s: over max size limit.\n", __func__); |
1873 | return -EIO; | 1876 | return -EIO; |
1874 | } | 1877 | } |
@@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | |||
2340 | req->buffer = bio_data(req->bio); | 2343 | req->buffer = bio_data(req->bio); |
2341 | 2344 | ||
2342 | /* update sector only for requests with clear definition of sector */ | 2345 | /* update sector only for requests with clear definition of sector */ |
2343 | if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) | 2346 | if (req->cmd_type == REQ_TYPE_FS) |
2344 | req->__sector += total_bytes >> 9; | 2347 | req->__sector += total_bytes >> 9; |
2345 | 2348 | ||
2346 | /* mixed attributes always follow the first bio */ | 2349 | /* mixed attributes always follow the first bio */ |
@@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | |||
2781 | blk_rq_init(NULL, rq); | 2784 | blk_rq_init(NULL, rq); |
2782 | 2785 | ||
2783 | __rq_for_each_bio(bio_src, rq_src) { | 2786 | __rq_for_each_bio(bio_src, rq_src) { |
2784 | bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); | 2787 | bio = bio_clone_bioset(bio_src, gfp_mask, bs); |
2785 | if (!bio) | 2788 | if (!bio) |
2786 | goto free_and_out; | 2789 | goto free_and_out; |
2787 | 2790 | ||
2788 | __bio_clone(bio, bio_src); | ||
2789 | |||
2790 | if (bio_integrity(bio_src) && | ||
2791 | bio_integrity_clone(bio, bio_src, gfp_mask, bs)) | ||
2792 | goto free_and_out; | ||
2793 | |||
2794 | if (bio_ctr && bio_ctr(bio, bio_src, data)) | 2791 | if (bio_ctr && bio_ctr(bio, bio_src, data)) |
2795 | goto free_and_out; | 2792 | goto free_and_out; |
2796 | 2793 | ||
@@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | |||
2807 | 2804 | ||
2808 | free_and_out: | 2805 | free_and_out: |
2809 | if (bio) | 2806 | if (bio) |
2810 | bio_free(bio, bs); | 2807 | bio_put(bio); |
2811 | blk_rq_unprep_clone(rq); | 2808 | blk_rq_unprep_clone(rq); |
2812 | 2809 | ||
2813 | return -ENOMEM; | 2810 | return -ENOMEM; |
diff --git a/block/blk-lib.c b/block/blk-lib.c index 19cc761cacb2..9373b58dfab1 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -130,6 +130,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
130 | EXPORT_SYMBOL(blkdev_issue_discard); | 130 | EXPORT_SYMBOL(blkdev_issue_discard); |
131 | 131 | ||
132 | /** | 132 | /** |
133 | * blkdev_issue_write_same - queue a write same operation | ||
134 | * @bdev: target blockdev | ||
135 | * @sector: start sector | ||
136 | * @nr_sects: number of sectors to write | ||
137 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
138 | * @page: page containing data to write | ||
139 | * | ||
140 | * Description: | ||
141 | * Issue a write same request for the sectors in question. | ||
142 | */ | ||
143 | int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, | ||
144 | sector_t nr_sects, gfp_t gfp_mask, | ||
145 | struct page *page) | ||
146 | { | ||
147 | DECLARE_COMPLETION_ONSTACK(wait); | ||
148 | struct request_queue *q = bdev_get_queue(bdev); | ||
149 | unsigned int max_write_same_sectors; | ||
150 | struct bio_batch bb; | ||
151 | struct bio *bio; | ||
152 | int ret = 0; | ||
153 | |||
154 | if (!q) | ||
155 | return -ENXIO; | ||
156 | |||
157 | max_write_same_sectors = q->limits.max_write_same_sectors; | ||
158 | |||
159 | if (max_write_same_sectors == 0) | ||
160 | return -EOPNOTSUPP; | ||
161 | |||
162 | atomic_set(&bb.done, 1); | ||
163 | bb.flags = 1 << BIO_UPTODATE; | ||
164 | bb.wait = &wait; | ||
165 | |||
166 | while (nr_sects) { | ||
167 | bio = bio_alloc(gfp_mask, 1); | ||
168 | if (!bio) { | ||
169 | ret = -ENOMEM; | ||
170 | break; | ||
171 | } | ||
172 | |||
173 | bio->bi_sector = sector; | ||
174 | bio->bi_end_io = bio_batch_end_io; | ||
175 | bio->bi_bdev = bdev; | ||
176 | bio->bi_private = &bb; | ||
177 | bio->bi_vcnt = 1; | ||
178 | bio->bi_io_vec->bv_page = page; | ||
179 | bio->bi_io_vec->bv_offset = 0; | ||
180 | bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); | ||
181 | |||
182 | if (nr_sects > max_write_same_sectors) { | ||
183 | bio->bi_size = max_write_same_sectors << 9; | ||
184 | nr_sects -= max_write_same_sectors; | ||
185 | sector += max_write_same_sectors; | ||
186 | } else { | ||
187 | bio->bi_size = nr_sects << 9; | ||
188 | nr_sects = 0; | ||
189 | } | ||
190 | |||
191 | atomic_inc(&bb.done); | ||
192 | submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); | ||
193 | } | ||
194 | |||
195 | /* Wait for bios in-flight */ | ||
196 | if (!atomic_dec_and_test(&bb.done)) | ||
197 | wait_for_completion(&wait); | ||
198 | |||
199 | if (!test_bit(BIO_UPTODATE, &bb.flags)) | ||
200 | ret = -ENOTSUPP; | ||
201 | |||
202 | return ret; | ||
203 | } | ||
204 | EXPORT_SYMBOL(blkdev_issue_write_same); | ||
205 | |||
206 | /** | ||
133 | * blkdev_issue_zeroout - generate number of zero filed write bios | 207 | * blkdev_issue_zeroout - generate number of zero filed write bios |
134 | * @bdev: blockdev to issue | 208 | * @bdev: blockdev to issue |
135 | * @sector: start sector | 209 | * @sector: start sector |
@@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard); | |||
140 | * Generate and issue number of bios with zerofiled pages. | 214 | * Generate and issue number of bios with zerofiled pages. |
141 | */ | 215 | */ |
142 | 216 | ||
143 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 217 | int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
144 | sector_t nr_sects, gfp_t gfp_mask) | 218 | sector_t nr_sects, gfp_t gfp_mask) |
145 | { | 219 | { |
146 | int ret; | 220 | int ret; |
@@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | |||
190 | 264 | ||
191 | return ret; | 265 | return ret; |
192 | } | 266 | } |
267 | |||
268 | /** | ||
269 | * blkdev_issue_zeroout - zero-fill a block range | ||
270 | * @bdev: blockdev to write | ||
271 | * @sector: start sector | ||
272 | * @nr_sects: number of sectors to write | ||
273 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
274 | * | ||
275 | * Description: | ||
276 | * Generate and issue number of bios with zerofiled pages. | ||
277 | */ | ||
278 | |||
279 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | ||
280 | sector_t nr_sects, gfp_t gfp_mask) | ||
281 | { | ||
282 | if (bdev_write_same(bdev)) { | ||
283 | unsigned char bdn[BDEVNAME_SIZE]; | ||
284 | |||
285 | if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, | ||
286 | ZERO_PAGE(0))) | ||
287 | return 0; | ||
288 | |||
289 | bdevname(bdev, bdn); | ||
290 | pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn); | ||
291 | } | ||
292 | |||
293 | return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); | ||
294 | } | ||
193 | EXPORT_SYMBOL(blkdev_issue_zeroout); | 295 | EXPORT_SYMBOL(blkdev_issue_zeroout); |
diff --git a/block/blk-merge.c b/block/blk-merge.c index e76279e41162..936a110de0b9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -275,14 +275,8 @@ no_merge: | |||
275 | int ll_back_merge_fn(struct request_queue *q, struct request *req, | 275 | int ll_back_merge_fn(struct request_queue *q, struct request *req, |
276 | struct bio *bio) | 276 | struct bio *bio) |
277 | { | 277 | { |
278 | unsigned short max_sectors; | 278 | if (blk_rq_sectors(req) + bio_sectors(bio) > |
279 | 279 | blk_rq_get_max_sectors(req)) { | |
280 | if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) | ||
281 | max_sectors = queue_max_hw_sectors(q); | ||
282 | else | ||
283 | max_sectors = queue_max_sectors(q); | ||
284 | |||
285 | if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { | ||
286 | req->cmd_flags |= REQ_NOMERGE; | 280 | req->cmd_flags |= REQ_NOMERGE; |
287 | if (req == q->last_merge) | 281 | if (req == q->last_merge) |
288 | q->last_merge = NULL; | 282 | q->last_merge = NULL; |
@@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, | |||
299 | int ll_front_merge_fn(struct request_queue *q, struct request *req, | 293 | int ll_front_merge_fn(struct request_queue *q, struct request *req, |
300 | struct bio *bio) | 294 | struct bio *bio) |
301 | { | 295 | { |
302 | unsigned short max_sectors; | 296 | if (blk_rq_sectors(req) + bio_sectors(bio) > |
303 | 297 | blk_rq_get_max_sectors(req)) { | |
304 | if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) | ||
305 | max_sectors = queue_max_hw_sectors(q); | ||
306 | else | ||
307 | max_sectors = queue_max_sectors(q); | ||
308 | |||
309 | |||
310 | if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { | ||
311 | req->cmd_flags |= REQ_NOMERGE; | 298 | req->cmd_flags |= REQ_NOMERGE; |
312 | if (req == q->last_merge) | 299 | if (req == q->last_merge) |
313 | q->last_merge = NULL; | 300 | q->last_merge = NULL; |
@@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | |||
338 | /* | 325 | /* |
339 | * Will it become too large? | 326 | * Will it become too large? |
340 | */ | 327 | */ |
341 | if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q)) | 328 | if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > |
329 | blk_rq_get_max_sectors(req)) | ||
342 | return 0; | 330 | return 0; |
343 | 331 | ||
344 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; | 332 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; |
@@ -417,16 +405,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, | |||
417 | if (!rq_mergeable(req) || !rq_mergeable(next)) | 405 | if (!rq_mergeable(req) || !rq_mergeable(next)) |
418 | return 0; | 406 | return 0; |
419 | 407 | ||
420 | /* | 408 | if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) |
421 | * Don't merge file system requests and discard requests | ||
422 | */ | ||
423 | if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD)) | ||
424 | return 0; | ||
425 | |||
426 | /* | ||
427 | * Don't merge discard requests and secure discard requests | ||
428 | */ | ||
429 | if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE)) | ||
430 | return 0; | 409 | return 0; |
431 | 410 | ||
432 | /* | 411 | /* |
@@ -440,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, | |||
440 | || next->special) | 419 | || next->special) |
441 | return 0; | 420 | return 0; |
442 | 421 | ||
422 | if (req->cmd_flags & REQ_WRITE_SAME && | ||
423 | !blk_write_same_mergeable(req->bio, next->bio)) | ||
424 | return 0; | ||
425 | |||
443 | /* | 426 | /* |
444 | * If we are allowed to merge, then append bio list | 427 | * If we are allowed to merge, then append bio list |
445 | * from next to rq and release next. merge_requests_fn | 428 | * from next to rq and release next. merge_requests_fn |
@@ -521,15 +504,10 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, | |||
521 | 504 | ||
522 | bool blk_rq_merge_ok(struct request *rq, struct bio *bio) | 505 | bool blk_rq_merge_ok(struct request *rq, struct bio *bio) |
523 | { | 506 | { |
524 | if (!rq_mergeable(rq)) | 507 | if (!rq_mergeable(rq) || !bio_mergeable(bio)) |
525 | return false; | 508 | return false; |
526 | 509 | ||
527 | /* don't merge file system requests and discard requests */ | 510 | if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) |
528 | if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD)) | ||
529 | return false; | ||
530 | |||
531 | /* don't merge discard requests and secure discard requests */ | ||
532 | if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE)) | ||
533 | return false; | 511 | return false; |
534 | 512 | ||
535 | /* different data direction or already started, don't merge */ | 513 | /* different data direction or already started, don't merge */ |
@@ -544,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) | |||
544 | if (bio_integrity(bio) != blk_integrity_rq(rq)) | 522 | if (bio_integrity(bio) != blk_integrity_rq(rq)) |
545 | return false; | 523 | return false; |
546 | 524 | ||
525 | /* must be using the same buffer */ | ||
526 | if (rq->cmd_flags & REQ_WRITE_SAME && | ||
527 | !blk_write_same_mergeable(rq->bio, bio)) | ||
528 | return false; | ||
529 | |||
547 | return true; | 530 | return true; |
548 | } | 531 | } |
549 | 532 | ||
diff --git a/block/blk-settings.c b/block/blk-settings.c index 565a6786032f..779bb7646bcd 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim) | |||
113 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; | 113 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; |
114 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; | 114 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; |
115 | lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; | 115 | lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; |
116 | lim->max_write_same_sectors = 0; | ||
116 | lim->max_discard_sectors = 0; | 117 | lim->max_discard_sectors = 0; |
117 | lim->discard_granularity = 0; | 118 | lim->discard_granularity = 0; |
118 | lim->discard_alignment = 0; | 119 | lim->discard_alignment = 0; |
@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) | |||
144 | lim->max_segments = USHRT_MAX; | 145 | lim->max_segments = USHRT_MAX; |
145 | lim->max_hw_sectors = UINT_MAX; | 146 | lim->max_hw_sectors = UINT_MAX; |
146 | lim->max_sectors = UINT_MAX; | 147 | lim->max_sectors = UINT_MAX; |
148 | lim->max_write_same_sectors = UINT_MAX; | ||
147 | } | 149 | } |
148 | EXPORT_SYMBOL(blk_set_stacking_limits); | 150 | EXPORT_SYMBOL(blk_set_stacking_limits); |
149 | 151 | ||
@@ -286,6 +288,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, | |||
286 | EXPORT_SYMBOL(blk_queue_max_discard_sectors); | 288 | EXPORT_SYMBOL(blk_queue_max_discard_sectors); |
287 | 289 | ||
288 | /** | 290 | /** |
291 | * blk_queue_max_write_same_sectors - set max sectors for a single write same | ||
292 | * @q: the request queue for the device | ||
293 | * @max_write_same_sectors: maximum number of sectors to write per command | ||
294 | **/ | ||
295 | void blk_queue_max_write_same_sectors(struct request_queue *q, | ||
296 | unsigned int max_write_same_sectors) | ||
297 | { | ||
298 | q->limits.max_write_same_sectors = max_write_same_sectors; | ||
299 | } | ||
300 | EXPORT_SYMBOL(blk_queue_max_write_same_sectors); | ||
301 | |||
302 | /** | ||
289 | * blk_queue_max_segments - set max hw segments for a request for this queue | 303 | * blk_queue_max_segments - set max hw segments for a request for this queue |
290 | * @q: the request queue for the device | 304 | * @q: the request queue for the device |
291 | * @max_segments: max number of segments | 305 | * @max_segments: max number of segments |
@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, | |||
510 | 524 | ||
511 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); | 525 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); |
512 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); | 526 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); |
527 | t->max_write_same_sectors = min(t->max_write_same_sectors, | ||
528 | b->max_write_same_sectors); | ||
513 | t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); | 529 | t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); |
514 | 530 | ||
515 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, | 531 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9628b291f960..ce6204608822 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -26,9 +26,15 @@ queue_var_show(unsigned long var, char *page) | |||
26 | static ssize_t | 26 | static ssize_t |
27 | queue_var_store(unsigned long *var, const char *page, size_t count) | 27 | queue_var_store(unsigned long *var, const char *page, size_t count) |
28 | { | 28 | { |
29 | char *p = (char *) page; | 29 | int err; |
30 | unsigned long v; | ||
31 | |||
32 | err = strict_strtoul(page, 10, &v); | ||
33 | if (err || v > UINT_MAX) | ||
34 | return -EINVAL; | ||
35 | |||
36 | *var = v; | ||
30 | 37 | ||
31 | *var = simple_strtoul(p, &p, 10); | ||
32 | return count; | 38 | return count; |
33 | } | 39 | } |
34 | 40 | ||
@@ -48,6 +54,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
48 | return -EINVAL; | 54 | return -EINVAL; |
49 | 55 | ||
50 | ret = queue_var_store(&nr, page, count); | 56 | ret = queue_var_store(&nr, page, count); |
57 | if (ret < 0) | ||
58 | return ret; | ||
59 | |||
51 | if (nr < BLKDEV_MIN_RQ) | 60 | if (nr < BLKDEV_MIN_RQ) |
52 | nr = BLKDEV_MIN_RQ; | 61 | nr = BLKDEV_MIN_RQ; |
53 | 62 | ||
@@ -102,6 +111,9 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) | |||
102 | unsigned long ra_kb; | 111 | unsigned long ra_kb; |
103 | ssize_t ret = queue_var_store(&ra_kb, page, count); | 112 | ssize_t ret = queue_var_store(&ra_kb, page, count); |
104 | 113 | ||
114 | if (ret < 0) | ||
115 | return ret; | ||
116 | |||
105 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); | 117 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); |
106 | 118 | ||
107 | return ret; | 119 | return ret; |
@@ -168,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag | |||
168 | return queue_var_show(queue_discard_zeroes_data(q), page); | 180 | return queue_var_show(queue_discard_zeroes_data(q), page); |
169 | } | 181 | } |
170 | 182 | ||
183 | static ssize_t queue_write_same_max_show(struct request_queue *q, char *page) | ||
184 | { | ||
185 | return sprintf(page, "%llu\n", | ||
186 | (unsigned long long)q->limits.max_write_same_sectors << 9); | ||
187 | } | ||
188 | |||
189 | |||
171 | static ssize_t | 190 | static ssize_t |
172 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | 191 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) |
173 | { | 192 | { |
@@ -176,6 +195,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | |||
176 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); | 195 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); |
177 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); | 196 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); |
178 | 197 | ||
198 | if (ret < 0) | ||
199 | return ret; | ||
200 | |||
179 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) | 201 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) |
180 | return -EINVAL; | 202 | return -EINVAL; |
181 | 203 | ||
@@ -236,6 +258,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, | |||
236 | unsigned long nm; | 258 | unsigned long nm; |
237 | ssize_t ret = queue_var_store(&nm, page, count); | 259 | ssize_t ret = queue_var_store(&nm, page, count); |
238 | 260 | ||
261 | if (ret < 0) | ||
262 | return ret; | ||
263 | |||
239 | spin_lock_irq(q->queue_lock); | 264 | spin_lock_irq(q->queue_lock); |
240 | queue_flag_clear(QUEUE_FLAG_NOMERGES, q); | 265 | queue_flag_clear(QUEUE_FLAG_NOMERGES, q); |
241 | queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); | 266 | queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); |
@@ -264,6 +289,9 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) | |||
264 | unsigned long val; | 289 | unsigned long val; |
265 | 290 | ||
266 | ret = queue_var_store(&val, page, count); | 291 | ret = queue_var_store(&val, page, count); |
292 | if (ret < 0) | ||
293 | return ret; | ||
294 | |||
267 | spin_lock_irq(q->queue_lock); | 295 | spin_lock_irq(q->queue_lock); |
268 | if (val == 2) { | 296 | if (val == 2) { |
269 | queue_flag_set(QUEUE_FLAG_SAME_COMP, q); | 297 | queue_flag_set(QUEUE_FLAG_SAME_COMP, q); |
@@ -364,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { | |||
364 | .show = queue_discard_zeroes_data_show, | 392 | .show = queue_discard_zeroes_data_show, |
365 | }; | 393 | }; |
366 | 394 | ||
395 | static struct queue_sysfs_entry queue_write_same_max_entry = { | ||
396 | .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO }, | ||
397 | .show = queue_write_same_max_show, | ||
398 | }; | ||
399 | |||
367 | static struct queue_sysfs_entry queue_nonrot_entry = { | 400 | static struct queue_sysfs_entry queue_nonrot_entry = { |
368 | .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, | 401 | .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, |
369 | .show = queue_show_nonrot, | 402 | .show = queue_show_nonrot, |
@@ -411,6 +444,7 @@ static struct attribute *default_attrs[] = { | |||
411 | &queue_discard_granularity_entry.attr, | 444 | &queue_discard_granularity_entry.attr, |
412 | &queue_discard_max_entry.attr, | 445 | &queue_discard_max_entry.attr, |
413 | &queue_discard_zeroes_data_entry.attr, | 446 | &queue_discard_zeroes_data_entry.attr, |
447 | &queue_write_same_max_entry.attr, | ||
414 | &queue_nonrot_entry.attr, | 448 | &queue_nonrot_entry.attr, |
415 | &queue_nomerges_entry.attr, | 449 | &queue_nomerges_entry.attr, |
416 | &queue_rq_affinity_entry.attr, | 450 | &queue_rq_affinity_entry.attr, |
@@ -527,6 +561,12 @@ int blk_register_queue(struct gendisk *disk) | |||
527 | if (WARN_ON(!q)) | 561 | if (WARN_ON(!q)) |
528 | return -ENXIO; | 562 | return -ENXIO; |
529 | 563 | ||
564 | /* | ||
565 | * Initialization must be complete by now. Finish the initial | ||
566 | * bypass from queue allocation. | ||
567 | */ | ||
568 | blk_queue_bypass_end(q); | ||
569 | |||
530 | ret = blk_trace_init_sysfs(dev); | 570 | ret = blk_trace_init_sysfs(dev); |
531 | if (ret) | 571 | if (ret) |
532 | return ret; | 572 | return ret; |
diff --git a/block/blk-tag.c b/block/blk-tag.c index 4af6f5cc1167..cc345e1d8d4e 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c | |||
@@ -186,7 +186,8 @@ int blk_queue_init_tags(struct request_queue *q, int depth, | |||
186 | tags = __blk_queue_init_tags(q, depth); | 186 | tags = __blk_queue_init_tags(q, depth); |
187 | 187 | ||
188 | if (!tags) | 188 | if (!tags) |
189 | goto fail; | 189 | return -ENOMEM; |
190 | |||
190 | } else if (q->queue_tags) { | 191 | } else if (q->queue_tags) { |
191 | rc = blk_queue_resize_tags(q, depth); | 192 | rc = blk_queue_resize_tags(q, depth); |
192 | if (rc) | 193 | if (rc) |
@@ -203,9 +204,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth, | |||
203 | queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); | 204 | queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); |
204 | INIT_LIST_HEAD(&q->tag_busy_list); | 205 | INIT_LIST_HEAD(&q->tag_busy_list); |
205 | return 0; | 206 | return 0; |
206 | fail: | ||
207 | kfree(tags); | ||
208 | return -ENOMEM; | ||
209 | } | 207 | } |
210 | EXPORT_SYMBOL(blk_queue_init_tags); | 208 | EXPORT_SYMBOL(blk_queue_init_tags); |
211 | 209 | ||
diff --git a/block/blk.h b/block/blk.h index 2a0ea32d249f..ca51543b248c 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) | |||
171 | * | 171 | * |
172 | * a) it's attached to a gendisk, and | 172 | * a) it's attached to a gendisk, and |
173 | * b) the queue had IO stats enabled when this request was started, and | 173 | * b) the queue had IO stats enabled when this request was started, and |
174 | * c) it's a file system request or a discard request | 174 | * c) it's a file system request |
175 | */ | 175 | */ |
176 | static inline int blk_do_io_stat(struct request *rq) | 176 | static inline int blk_do_io_stat(struct request *rq) |
177 | { | 177 | { |
178 | return rq->rq_disk && | 178 | return rq->rq_disk && |
179 | (rq->cmd_flags & REQ_IO_STAT) && | 179 | (rq->cmd_flags & REQ_IO_STAT) && |
180 | (rq->cmd_type == REQ_TYPE_FS || | 180 | (rq->cmd_type == REQ_TYPE_FS); |
181 | (rq->cmd_flags & REQ_DISCARD)); | ||
182 | } | 181 | } |
183 | 182 | ||
184 | /* | 183 | /* |
diff --git a/block/elevator.c b/block/elevator.c index 6a55d418896f..9b1d42b62f20 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) | |||
562 | 562 | ||
563 | if (rq->cmd_flags & REQ_SOFTBARRIER) { | 563 | if (rq->cmd_flags & REQ_SOFTBARRIER) { |
564 | /* barriers are scheduling boundary, update end_sector */ | 564 | /* barriers are scheduling boundary, update end_sector */ |
565 | if (rq->cmd_type == REQ_TYPE_FS || | 565 | if (rq->cmd_type == REQ_TYPE_FS) { |
566 | (rq->cmd_flags & REQ_DISCARD)) { | ||
567 | q->end_sector = rq_end_sector(rq); | 566 | q->end_sector = rq_end_sector(rq); |
568 | q->boundary_rq = rq; | 567 | q->boundary_rq = rq; |
569 | } | 568 | } |
@@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) | |||
605 | if (elv_attempt_insert_merge(q, rq)) | 604 | if (elv_attempt_insert_merge(q, rq)) |
606 | break; | 605 | break; |
607 | case ELEVATOR_INSERT_SORT: | 606 | case ELEVATOR_INSERT_SORT: |
608 | BUG_ON(rq->cmd_type != REQ_TYPE_FS && | 607 | BUG_ON(rq->cmd_type != REQ_TYPE_FS); |
609 | !(rq->cmd_flags & REQ_DISCARD)); | ||
610 | rq->cmd_flags |= REQ_SORTED; | 608 | rq->cmd_flags |= REQ_SORTED; |
611 | q->nr_sorted++; | 609 | q->nr_sorted++; |
612 | if (rq_mergeable(rq)) { | 610 | if (rq_mergeable(rq)) { |
diff --git a/block/ioctl.c b/block/ioctl.c index 4a85096f5410..a31d91d9bc5a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
@@ -185,6 +185,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | |||
185 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); | 185 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); |
186 | } | 186 | } |
187 | 187 | ||
188 | static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start, | ||
189 | uint64_t len) | ||
190 | { | ||
191 | if (start & 511) | ||
192 | return -EINVAL; | ||
193 | if (len & 511) | ||
194 | return -EINVAL; | ||
195 | start >>= 9; | ||
196 | len >>= 9; | ||
197 | |||
198 | if (start + len > (i_size_read(bdev->bd_inode) >> 9)) | ||
199 | return -EINVAL; | ||
200 | |||
201 | return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL); | ||
202 | } | ||
203 | |||
188 | static int put_ushort(unsigned long arg, unsigned short val) | 204 | static int put_ushort(unsigned long arg, unsigned short val) |
189 | { | 205 | { |
190 | return put_user(val, (unsigned short __user *)arg); | 206 | return put_user(val, (unsigned short __user *)arg); |
@@ -300,6 +316,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, | |||
300 | return blk_ioctl_discard(bdev, range[0], range[1], | 316 | return blk_ioctl_discard(bdev, range[0], range[1], |
301 | cmd == BLKSECDISCARD); | 317 | cmd == BLKSECDISCARD); |
302 | } | 318 | } |
319 | case BLKZEROOUT: { | ||
320 | uint64_t range[2]; | ||
321 | |||
322 | if (!(mode & FMODE_WRITE)) | ||
323 | return -EBADF; | ||
324 | |||
325 | if (copy_from_user(range, (void __user *)arg, sizeof(range))) | ||
326 | return -EFAULT; | ||
327 | |||
328 | return blk_ioctl_zeroout(bdev, range[0], range[1]); | ||
329 | } | ||
303 | 330 | ||
304 | case HDIO_GETGEO: { | 331 | case HDIO_GETGEO: { |
305 | struct hd_geometry geo; | 332 | struct hd_geometry geo; |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f93a0320e952..f55683ad4ffa 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -162,23 +162,12 @@ static const struct block_device_operations drbd_ops = { | |||
162 | .release = drbd_release, | 162 | .release = drbd_release, |
163 | }; | 163 | }; |
164 | 164 | ||
165 | static void bio_destructor_drbd(struct bio *bio) | ||
166 | { | ||
167 | bio_free(bio, drbd_md_io_bio_set); | ||
168 | } | ||
169 | |||
170 | struct bio *bio_alloc_drbd(gfp_t gfp_mask) | 165 | struct bio *bio_alloc_drbd(gfp_t gfp_mask) |
171 | { | 166 | { |
172 | struct bio *bio; | ||
173 | |||
174 | if (!drbd_md_io_bio_set) | 167 | if (!drbd_md_io_bio_set) |
175 | return bio_alloc(gfp_mask, 1); | 168 | return bio_alloc(gfp_mask, 1); |
176 | 169 | ||
177 | bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); | 170 | return bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); |
178 | if (!bio) | ||
179 | return NULL; | ||
180 | bio->bi_destructor = bio_destructor_drbd; | ||
181 | return bio; | ||
182 | } | 171 | } |
183 | 172 | ||
184 | #ifdef __CHECKER__ | 173 | #ifdef __CHECKER__ |
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 87311ebac0db..1bbc681688e4 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c | |||
@@ -266,11 +266,10 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) | |||
266 | struct bio *tmp, *new_chain = NULL, *tail = NULL; | 266 | struct bio *tmp, *new_chain = NULL, *tail = NULL; |
267 | 267 | ||
268 | while (old_chain) { | 268 | while (old_chain) { |
269 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); | 269 | tmp = bio_clone_kmalloc(old_chain, gfpmask); |
270 | if (!tmp) | 270 | if (!tmp) |
271 | goto err_out; | 271 | goto err_out; |
272 | 272 | ||
273 | __bio_clone(tmp, old_chain); | ||
274 | tmp->bi_bdev = NULL; | 273 | tmp->bi_bdev = NULL; |
275 | gfpmask &= ~__GFP_WAIT; | 274 | gfpmask &= ~__GFP_WAIT; |
276 | tmp->bi_next = NULL; | 275 | tmp->bi_next = NULL; |
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index ba66e4445f41..2e7de7a59bfc 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c | |||
@@ -522,38 +522,6 @@ static void pkt_bio_finished(struct pktcdvd_device *pd) | |||
522 | } | 522 | } |
523 | } | 523 | } |
524 | 524 | ||
525 | static void pkt_bio_destructor(struct bio *bio) | ||
526 | { | ||
527 | kfree(bio->bi_io_vec); | ||
528 | kfree(bio); | ||
529 | } | ||
530 | |||
531 | static struct bio *pkt_bio_alloc(int nr_iovecs) | ||
532 | { | ||
533 | struct bio_vec *bvl = NULL; | ||
534 | struct bio *bio; | ||
535 | |||
536 | bio = kmalloc(sizeof(struct bio), GFP_KERNEL); | ||
537 | if (!bio) | ||
538 | goto no_bio; | ||
539 | bio_init(bio); | ||
540 | |||
541 | bvl = kcalloc(nr_iovecs, sizeof(struct bio_vec), GFP_KERNEL); | ||
542 | if (!bvl) | ||
543 | goto no_bvl; | ||
544 | |||
545 | bio->bi_max_vecs = nr_iovecs; | ||
546 | bio->bi_io_vec = bvl; | ||
547 | bio->bi_destructor = pkt_bio_destructor; | ||
548 | |||
549 | return bio; | ||
550 | |||
551 | no_bvl: | ||
552 | kfree(bio); | ||
553 | no_bio: | ||
554 | return NULL; | ||
555 | } | ||
556 | |||
557 | /* | 525 | /* |
558 | * Allocate a packet_data struct | 526 | * Allocate a packet_data struct |
559 | */ | 527 | */ |
@@ -567,7 +535,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames) | |||
567 | goto no_pkt; | 535 | goto no_pkt; |
568 | 536 | ||
569 | pkt->frames = frames; | 537 | pkt->frames = frames; |
570 | pkt->w_bio = pkt_bio_alloc(frames); | 538 | pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames); |
571 | if (!pkt->w_bio) | 539 | if (!pkt->w_bio) |
572 | goto no_bio; | 540 | goto no_bio; |
573 | 541 | ||
@@ -581,9 +549,10 @@ static struct packet_data *pkt_alloc_packet_data(int frames) | |||
581 | bio_list_init(&pkt->orig_bios); | 549 | bio_list_init(&pkt->orig_bios); |
582 | 550 | ||
583 | for (i = 0; i < frames; i++) { | 551 | for (i = 0; i < frames; i++) { |
584 | struct bio *bio = pkt_bio_alloc(1); | 552 | struct bio *bio = bio_kmalloc(GFP_KERNEL, 1); |
585 | if (!bio) | 553 | if (!bio) |
586 | goto no_rd_bio; | 554 | goto no_rd_bio; |
555 | |||
587 | pkt->r_bios[i] = bio; | 556 | pkt->r_bios[i] = bio; |
588 | } | 557 | } |
589 | 558 | ||
@@ -1111,21 +1080,17 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
1111 | * Schedule reads for missing parts of the packet. | 1080 | * Schedule reads for missing parts of the packet. |
1112 | */ | 1081 | */ |
1113 | for (f = 0; f < pkt->frames; f++) { | 1082 | for (f = 0; f < pkt->frames; f++) { |
1114 | struct bio_vec *vec; | ||
1115 | |||
1116 | int p, offset; | 1083 | int p, offset; |
1084 | |||
1117 | if (written[f]) | 1085 | if (written[f]) |
1118 | continue; | 1086 | continue; |
1087 | |||
1119 | bio = pkt->r_bios[f]; | 1088 | bio = pkt->r_bios[f]; |
1120 | vec = bio->bi_io_vec; | 1089 | bio_reset(bio); |
1121 | bio_init(bio); | ||
1122 | bio->bi_max_vecs = 1; | ||
1123 | bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); | 1090 | bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); |
1124 | bio->bi_bdev = pd->bdev; | 1091 | bio->bi_bdev = pd->bdev; |
1125 | bio->bi_end_io = pkt_end_io_read; | 1092 | bio->bi_end_io = pkt_end_io_read; |
1126 | bio->bi_private = pkt; | 1093 | bio->bi_private = pkt; |
1127 | bio->bi_io_vec = vec; | ||
1128 | bio->bi_destructor = pkt_bio_destructor; | ||
1129 | 1094 | ||
1130 | p = (f * CD_FRAMESIZE) / PAGE_SIZE; | 1095 | p = (f * CD_FRAMESIZE) / PAGE_SIZE; |
1131 | offset = (f * CD_FRAMESIZE) % PAGE_SIZE; | 1096 | offset = (f * CD_FRAMESIZE) % PAGE_SIZE; |
@@ -1418,14 +1383,11 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
1418 | } | 1383 | } |
1419 | 1384 | ||
1420 | /* Start the write request */ | 1385 | /* Start the write request */ |
1421 | bio_init(pkt->w_bio); | 1386 | bio_reset(pkt->w_bio); |
1422 | pkt->w_bio->bi_max_vecs = PACKET_MAX_SIZE; | ||
1423 | pkt->w_bio->bi_sector = pkt->sector; | 1387 | pkt->w_bio->bi_sector = pkt->sector; |
1424 | pkt->w_bio->bi_bdev = pd->bdev; | 1388 | pkt->w_bio->bi_bdev = pd->bdev; |
1425 | pkt->w_bio->bi_end_io = pkt_end_io_packet_write; | 1389 | pkt->w_bio->bi_end_io = pkt_end_io_packet_write; |
1426 | pkt->w_bio->bi_private = pkt; | 1390 | pkt->w_bio->bi_private = pkt; |
1427 | pkt->w_bio->bi_io_vec = bvec; | ||
1428 | pkt->w_bio->bi_destructor = pkt_bio_destructor; | ||
1429 | for (f = 0; f < pkt->frames; f++) | 1391 | for (f = 0; f < pkt->frames; f++) |
1430 | if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) | 1392 | if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) |
1431 | BUG(); | 1393 | BUG(); |
diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 54a3a6d09819..0bb207eaef2f 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c | |||
@@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd, | |||
285 | 285 | ||
286 | static const struct file_operations raw_fops = { | 286 | static const struct file_operations raw_fops = { |
287 | .read = do_sync_read, | 287 | .read = do_sync_read, |
288 | .aio_read = generic_file_aio_read, | 288 | .aio_read = blkdev_aio_read, |
289 | .write = do_sync_write, | 289 | .write = do_sync_write, |
290 | .aio_write = blkdev_aio_write, | 290 | .aio_write = blkdev_aio_write, |
291 | .fsync = blkdev_fsync, | 291 | .fsync = blkdev_fsync, |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 664743d6a6cd..bbf459bca61d 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
@@ -798,14 +798,6 @@ static int crypt_convert(struct crypt_config *cc, | |||
798 | return 0; | 798 | return 0; |
799 | } | 799 | } |
800 | 800 | ||
801 | static void dm_crypt_bio_destructor(struct bio *bio) | ||
802 | { | ||
803 | struct dm_crypt_io *io = bio->bi_private; | ||
804 | struct crypt_config *cc = io->cc; | ||
805 | |||
806 | bio_free(bio, cc->bs); | ||
807 | } | ||
808 | |||
809 | /* | 801 | /* |
810 | * Generate a new unfragmented bio with the given size | 802 | * Generate a new unfragmented bio with the given size |
811 | * This should never violate the device limitations | 803 | * This should never violate the device limitations |
@@ -974,7 +966,6 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) | |||
974 | clone->bi_end_io = crypt_endio; | 966 | clone->bi_end_io = crypt_endio; |
975 | clone->bi_bdev = cc->dev->bdev; | 967 | clone->bi_bdev = cc->dev->bdev; |
976 | clone->bi_rw = io->base_bio->bi_rw; | 968 | clone->bi_rw = io->base_bio->bi_rw; |
977 | clone->bi_destructor = dm_crypt_bio_destructor; | ||
978 | } | 969 | } |
979 | 970 | ||
980 | static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) | 971 | static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) |
@@ -988,19 +979,14 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) | |||
988 | * copy the required bvecs because we need the original | 979 | * copy the required bvecs because we need the original |
989 | * one in order to decrypt the whole bio data *afterwards*. | 980 | * one in order to decrypt the whole bio data *afterwards*. |
990 | */ | 981 | */ |
991 | clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs); | 982 | clone = bio_clone_bioset(base_bio, gfp, cc->bs); |
992 | if (!clone) | 983 | if (!clone) |
993 | return 1; | 984 | return 1; |
994 | 985 | ||
995 | crypt_inc_pending(io); | 986 | crypt_inc_pending(io); |
996 | 987 | ||
997 | clone_init(io, clone); | 988 | clone_init(io, clone); |
998 | clone->bi_idx = 0; | ||
999 | clone->bi_vcnt = bio_segments(base_bio); | ||
1000 | clone->bi_size = base_bio->bi_size; | ||
1001 | clone->bi_sector = cc->start + io->sector; | 989 | clone->bi_sector = cc->start + io->sector; |
1002 | memcpy(clone->bi_io_vec, bio_iovec(base_bio), | ||
1003 | sizeof(struct bio_vec) * clone->bi_vcnt); | ||
1004 | 990 | ||
1005 | generic_make_request(clone); | 991 | generic_make_request(clone); |
1006 | return 0; | 992 | return 0; |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index ea5dd289fe2a..1c46f97d6664 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
@@ -249,16 +249,6 @@ static void vm_dp_init(struct dpages *dp, void *data) | |||
249 | dp->context_ptr = data; | 249 | dp->context_ptr = data; |
250 | } | 250 | } |
251 | 251 | ||
252 | static void dm_bio_destructor(struct bio *bio) | ||
253 | { | ||
254 | unsigned region; | ||
255 | struct io *io; | ||
256 | |||
257 | retrieve_io_and_region_from_bio(bio, &io, ®ion); | ||
258 | |||
259 | bio_free(bio, io->client->bios); | ||
260 | } | ||
261 | |||
262 | /* | 252 | /* |
263 | * Functions for getting the pages from kernel memory. | 253 | * Functions for getting the pages from kernel memory. |
264 | */ | 254 | */ |
@@ -317,7 +307,6 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
317 | bio->bi_sector = where->sector + (where->count - remaining); | 307 | bio->bi_sector = where->sector + (where->count - remaining); |
318 | bio->bi_bdev = where->bdev; | 308 | bio->bi_bdev = where->bdev; |
319 | bio->bi_end_io = endio; | 309 | bio->bi_end_io = endio; |
320 | bio->bi_destructor = dm_bio_destructor; | ||
321 | store_io_and_region_in_bio(bio, io, region); | 310 | store_io_and_region_in_bio(bio, io, region); |
322 | 311 | ||
323 | if (rw & REQ_DISCARD) { | 312 | if (rw & REQ_DISCARD) { |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 67ffa391edcf..66ceaff6455c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
@@ -86,12 +86,17 @@ struct dm_rq_target_io { | |||
86 | }; | 86 | }; |
87 | 87 | ||
88 | /* | 88 | /* |
89 | * For request-based dm. | 89 | * For request-based dm - the bio clones we allocate are embedded in these |
90 | * One of these is allocated per bio. | 90 | * structs. |
91 | * | ||
92 | * We allocate these with bio_alloc_bioset, using the front_pad parameter when | ||
93 | * the bioset is created - this means the bio has to come at the end of the | ||
94 | * struct. | ||
91 | */ | 95 | */ |
92 | struct dm_rq_clone_bio_info { | 96 | struct dm_rq_clone_bio_info { |
93 | struct bio *orig; | 97 | struct bio *orig; |
94 | struct dm_rq_target_io *tio; | 98 | struct dm_rq_target_io *tio; |
99 | struct bio clone; | ||
95 | }; | 100 | }; |
96 | 101 | ||
97 | union map_info *dm_get_mapinfo(struct bio *bio) | 102 | union map_info *dm_get_mapinfo(struct bio *bio) |
@@ -211,6 +216,11 @@ struct dm_md_mempools { | |||
211 | static struct kmem_cache *_io_cache; | 216 | static struct kmem_cache *_io_cache; |
212 | static struct kmem_cache *_tio_cache; | 217 | static struct kmem_cache *_tio_cache; |
213 | static struct kmem_cache *_rq_tio_cache; | 218 | static struct kmem_cache *_rq_tio_cache; |
219 | |||
220 | /* | ||
221 | * Unused now, and needs to be deleted. But since io_pool is overloaded and it's | ||
222 | * still used for _io_cache, I'm leaving this for a later cleanup | ||
223 | */ | ||
214 | static struct kmem_cache *_rq_bio_info_cache; | 224 | static struct kmem_cache *_rq_bio_info_cache; |
215 | 225 | ||
216 | static int __init local_init(void) | 226 | static int __init local_init(void) |
@@ -467,16 +477,6 @@ static void free_rq_tio(struct dm_rq_target_io *tio) | |||
467 | mempool_free(tio, tio->md->tio_pool); | 477 | mempool_free(tio, tio->md->tio_pool); |
468 | } | 478 | } |
469 | 479 | ||
470 | static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) | ||
471 | { | ||
472 | return mempool_alloc(md->io_pool, GFP_ATOMIC); | ||
473 | } | ||
474 | |||
475 | static void free_bio_info(struct dm_rq_clone_bio_info *info) | ||
476 | { | ||
477 | mempool_free(info, info->tio->md->io_pool); | ||
478 | } | ||
479 | |||
480 | static int md_in_flight(struct mapped_device *md) | 480 | static int md_in_flight(struct mapped_device *md) |
481 | { | 481 | { |
482 | return atomic_read(&md->pending[READ]) + | 482 | return atomic_read(&md->pending[READ]) + |
@@ -681,11 +681,6 @@ static void clone_endio(struct bio *bio, int error) | |||
681 | } | 681 | } |
682 | } | 682 | } |
683 | 683 | ||
684 | /* | ||
685 | * Store md for cleanup instead of tio which is about to get freed. | ||
686 | */ | ||
687 | bio->bi_private = md->bs; | ||
688 | |||
689 | free_tio(md, tio); | 684 | free_tio(md, tio); |
690 | bio_put(bio); | 685 | bio_put(bio); |
691 | dec_pending(io, error); | 686 | dec_pending(io, error); |
@@ -1036,11 +1031,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, | |||
1036 | /* error the io and bail out, or requeue it if needed */ | 1031 | /* error the io and bail out, or requeue it if needed */ |
1037 | md = tio->io->md; | 1032 | md = tio->io->md; |
1038 | dec_pending(tio->io, r); | 1033 | dec_pending(tio->io, r); |
1039 | /* | ||
1040 | * Store bio_set for cleanup. | ||
1041 | */ | ||
1042 | clone->bi_end_io = NULL; | ||
1043 | clone->bi_private = md->bs; | ||
1044 | bio_put(clone); | 1034 | bio_put(clone); |
1045 | free_tio(md, tio); | 1035 | free_tio(md, tio); |
1046 | } else if (r) { | 1036 | } else if (r) { |
@@ -1059,13 +1049,6 @@ struct clone_info { | |||
1059 | unsigned short idx; | 1049 | unsigned short idx; |
1060 | }; | 1050 | }; |
1061 | 1051 | ||
1062 | static void dm_bio_destructor(struct bio *bio) | ||
1063 | { | ||
1064 | struct bio_set *bs = bio->bi_private; | ||
1065 | |||
1066 | bio_free(bio, bs); | ||
1067 | } | ||
1068 | |||
1069 | /* | 1052 | /* |
1070 | * Creates a little bio that just does part of a bvec. | 1053 | * Creates a little bio that just does part of a bvec. |
1071 | */ | 1054 | */ |
@@ -1077,7 +1060,6 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
1077 | struct bio_vec *bv = bio->bi_io_vec + idx; | 1060 | struct bio_vec *bv = bio->bi_io_vec + idx; |
1078 | 1061 | ||
1079 | clone = bio_alloc_bioset(GFP_NOIO, 1, bs); | 1062 | clone = bio_alloc_bioset(GFP_NOIO, 1, bs); |
1080 | clone->bi_destructor = dm_bio_destructor; | ||
1081 | *clone->bi_io_vec = *bv; | 1063 | *clone->bi_io_vec = *bv; |
1082 | 1064 | ||
1083 | clone->bi_sector = sector; | 1065 | clone->bi_sector = sector; |
@@ -1090,7 +1072,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
1090 | clone->bi_flags |= 1 << BIO_CLONED; | 1072 | clone->bi_flags |= 1 << BIO_CLONED; |
1091 | 1073 | ||
1092 | if (bio_integrity(bio)) { | 1074 | if (bio_integrity(bio)) { |
1093 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); | 1075 | bio_integrity_clone(clone, bio, GFP_NOIO); |
1094 | bio_integrity_trim(clone, | 1076 | bio_integrity_trim(clone, |
1095 | bio_sector_offset(bio, idx, offset), len); | 1077 | bio_sector_offset(bio, idx, offset), len); |
1096 | } | 1078 | } |
@@ -1109,7 +1091,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
1109 | 1091 | ||
1110 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1092 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
1111 | __bio_clone(clone, bio); | 1093 | __bio_clone(clone, bio); |
1112 | clone->bi_destructor = dm_bio_destructor; | ||
1113 | clone->bi_sector = sector; | 1094 | clone->bi_sector = sector; |
1114 | clone->bi_idx = idx; | 1095 | clone->bi_idx = idx; |
1115 | clone->bi_vcnt = idx + bv_count; | 1096 | clone->bi_vcnt = idx + bv_count; |
@@ -1117,7 +1098,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
1117 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); | 1098 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); |
1118 | 1099 | ||
1119 | if (bio_integrity(bio)) { | 1100 | if (bio_integrity(bio)) { |
1120 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); | 1101 | bio_integrity_clone(clone, bio, GFP_NOIO); |
1121 | 1102 | ||
1122 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) | 1103 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) |
1123 | bio_integrity_trim(clone, | 1104 | bio_integrity_trim(clone, |
@@ -1152,9 +1133,8 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, | |||
1152 | * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush | 1133 | * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush |
1153 | * and discard, so no need for concern about wasted bvec allocations. | 1134 | * and discard, so no need for concern about wasted bvec allocations. |
1154 | */ | 1135 | */ |
1155 | clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs); | 1136 | clone = bio_clone_bioset(ci->bio, GFP_NOIO, ci->md->bs); |
1156 | __bio_clone(clone, ci->bio); | 1137 | |
1157 | clone->bi_destructor = dm_bio_destructor; | ||
1158 | if (len) { | 1138 | if (len) { |
1159 | clone->bi_sector = ci->sector; | 1139 | clone->bi_sector = ci->sector; |
1160 | clone->bi_size = to_bytes(len); | 1140 | clone->bi_size = to_bytes(len); |
@@ -1484,30 +1464,17 @@ void dm_dispatch_request(struct request *rq) | |||
1484 | } | 1464 | } |
1485 | EXPORT_SYMBOL_GPL(dm_dispatch_request); | 1465 | EXPORT_SYMBOL_GPL(dm_dispatch_request); |
1486 | 1466 | ||
1487 | static void dm_rq_bio_destructor(struct bio *bio) | ||
1488 | { | ||
1489 | struct dm_rq_clone_bio_info *info = bio->bi_private; | ||
1490 | struct mapped_device *md = info->tio->md; | ||
1491 | |||
1492 | free_bio_info(info); | ||
1493 | bio_free(bio, md->bs); | ||
1494 | } | ||
1495 | |||
1496 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | 1467 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, |
1497 | void *data) | 1468 | void *data) |
1498 | { | 1469 | { |
1499 | struct dm_rq_target_io *tio = data; | 1470 | struct dm_rq_target_io *tio = data; |
1500 | struct mapped_device *md = tio->md; | 1471 | struct dm_rq_clone_bio_info *info = |
1501 | struct dm_rq_clone_bio_info *info = alloc_bio_info(md); | 1472 | container_of(bio, struct dm_rq_clone_bio_info, clone); |
1502 | |||
1503 | if (!info) | ||
1504 | return -ENOMEM; | ||
1505 | 1473 | ||
1506 | info->orig = bio_orig; | 1474 | info->orig = bio_orig; |
1507 | info->tio = tio; | 1475 | info->tio = tio; |
1508 | bio->bi_end_io = end_clone_bio; | 1476 | bio->bi_end_io = end_clone_bio; |
1509 | bio->bi_private = info; | 1477 | bio->bi_private = info; |
1510 | bio->bi_destructor = dm_rq_bio_destructor; | ||
1511 | 1478 | ||
1512 | return 0; | 1479 | return 0; |
1513 | } | 1480 | } |
@@ -2771,7 +2738,10 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) | |||
2771 | if (!pools->tio_pool) | 2738 | if (!pools->tio_pool) |
2772 | goto free_io_pool_and_out; | 2739 | goto free_io_pool_and_out; |
2773 | 2740 | ||
2774 | pools->bs = bioset_create(pool_size, 0); | 2741 | pools->bs = (type == DM_TYPE_BIO_BASED) ? |
2742 | bioset_create(pool_size, 0) : | ||
2743 | bioset_create(pool_size, | ||
2744 | offsetof(struct dm_rq_clone_bio_info, clone)); | ||
2775 | if (!pools->bs) | 2745 | if (!pools->bs) |
2776 | goto free_tio_pool_and_out; | 2746 | goto free_tio_pool_and_out; |
2777 | 2747 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 308e87b417e0..95c88012a3b9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -155,32 +155,17 @@ static int start_readonly; | |||
155 | * like bio_clone, but with a local bio set | 155 | * like bio_clone, but with a local bio set |
156 | */ | 156 | */ |
157 | 157 | ||
158 | static void mddev_bio_destructor(struct bio *bio) | ||
159 | { | ||
160 | struct mddev *mddev, **mddevp; | ||
161 | |||
162 | mddevp = (void*)bio; | ||
163 | mddev = mddevp[-1]; | ||
164 | |||
165 | bio_free(bio, mddev->bio_set); | ||
166 | } | ||
167 | |||
168 | struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | 158 | struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, |
169 | struct mddev *mddev) | 159 | struct mddev *mddev) |
170 | { | 160 | { |
171 | struct bio *b; | 161 | struct bio *b; |
172 | struct mddev **mddevp; | ||
173 | 162 | ||
174 | if (!mddev || !mddev->bio_set) | 163 | if (!mddev || !mddev->bio_set) |
175 | return bio_alloc(gfp_mask, nr_iovecs); | 164 | return bio_alloc(gfp_mask, nr_iovecs); |
176 | 165 | ||
177 | b = bio_alloc_bioset(gfp_mask, nr_iovecs, | 166 | b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set); |
178 | mddev->bio_set); | ||
179 | if (!b) | 167 | if (!b) |
180 | return NULL; | 168 | return NULL; |
181 | mddevp = (void*)b; | ||
182 | mddevp[-1] = mddev; | ||
183 | b->bi_destructor = mddev_bio_destructor; | ||
184 | return b; | 169 | return b; |
185 | } | 170 | } |
186 | EXPORT_SYMBOL_GPL(bio_alloc_mddev); | 171 | EXPORT_SYMBOL_GPL(bio_alloc_mddev); |
@@ -188,32 +173,10 @@ EXPORT_SYMBOL_GPL(bio_alloc_mddev); | |||
188 | struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | 173 | struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, |
189 | struct mddev *mddev) | 174 | struct mddev *mddev) |
190 | { | 175 | { |
191 | struct bio *b; | ||
192 | struct mddev **mddevp; | ||
193 | |||
194 | if (!mddev || !mddev->bio_set) | 176 | if (!mddev || !mddev->bio_set) |
195 | return bio_clone(bio, gfp_mask); | 177 | return bio_clone(bio, gfp_mask); |
196 | 178 | ||
197 | b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, | 179 | return bio_clone_bioset(bio, gfp_mask, mddev->bio_set); |
198 | mddev->bio_set); | ||
199 | if (!b) | ||
200 | return NULL; | ||
201 | mddevp = (void*)b; | ||
202 | mddevp[-1] = mddev; | ||
203 | b->bi_destructor = mddev_bio_destructor; | ||
204 | __bio_clone(b, bio); | ||
205 | if (bio_integrity(bio)) { | ||
206 | int ret; | ||
207 | |||
208 | ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set); | ||
209 | |||
210 | if (ret < 0) { | ||
211 | bio_put(b); | ||
212 | return NULL; | ||
213 | } | ||
214 | } | ||
215 | |||
216 | return b; | ||
217 | } | 180 | } |
218 | EXPORT_SYMBOL_GPL(bio_clone_mddev); | 181 | EXPORT_SYMBOL_GPL(bio_clone_mddev); |
219 | 182 | ||
@@ -5006,8 +4969,7 @@ int md_run(struct mddev *mddev) | |||
5006 | } | 4969 | } |
5007 | 4970 | ||
5008 | if (mddev->bio_set == NULL) | 4971 | if (mddev->bio_set == NULL) |
5009 | mddev->bio_set = bioset_create(BIO_POOL_SIZE, | 4972 | mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0); |
5010 | sizeof(struct mddev *)); | ||
5011 | 4973 | ||
5012 | spin_lock(&pers_lock); | 4974 | spin_lock(&pers_lock); |
5013 | pers = find_pers(mddev->level, mddev->clevel); | 4975 | pers = find_pers(mddev->level, mddev->clevel); |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index de63a1fc3737..a9e4fa95dfaa 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
@@ -422,6 +422,7 @@ static int raid0_run(struct mddev *mddev) | |||
422 | if (md_check_no_bitmap(mddev)) | 422 | if (md_check_no_bitmap(mddev)) |
423 | return -EINVAL; | 423 | return -EINVAL; |
424 | blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); | 424 | blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); |
425 | blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); | ||
425 | 426 | ||
426 | /* if private is not null, we are here after takeover */ | 427 | /* if private is not null, we are here after takeover */ |
427 | if (mddev->private == NULL) { | 428 | if (mddev->private == NULL) { |
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 29408d46a6d9..57d7674c5013 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c | |||
@@ -553,14 +553,6 @@ static void iblock_complete_cmd(struct se_cmd *cmd) | |||
553 | kfree(ibr); | 553 | kfree(ibr); |
554 | } | 554 | } |
555 | 555 | ||
556 | static void iblock_bio_destructor(struct bio *bio) | ||
557 | { | ||
558 | struct se_cmd *cmd = bio->bi_private; | ||
559 | struct iblock_dev *ib_dev = cmd->se_dev->dev_ptr; | ||
560 | |||
561 | bio_free(bio, ib_dev->ibd_bio_set); | ||
562 | } | ||
563 | |||
564 | static struct bio * | 556 | static struct bio * |
565 | iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num) | 557 | iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num) |
566 | { | 558 | { |
@@ -582,7 +574,6 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num) | |||
582 | 574 | ||
583 | bio->bi_bdev = ib_dev->ibd_bd; | 575 | bio->bi_bdev = ib_dev->ibd_bd; |
584 | bio->bi_private = cmd; | 576 | bio->bi_private = cmd; |
585 | bio->bi_destructor = iblock_bio_destructor; | ||
586 | bio->bi_end_io = &iblock_bio_done; | 577 | bio->bi_end_io = &iblock_bio_done; |
587 | bio->bi_sector = lba; | 578 | bio->bi_sector = lba; |
588 | return bio; | 579 | return bio; |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index e85c04b9f61c..a3f28f331b2b 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -70,23 +70,25 @@ static inline int use_bip_pool(unsigned int idx) | |||
70 | } | 70 | } |
71 | 71 | ||
72 | /** | 72 | /** |
73 | * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio | 73 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio |
74 | * @bio: bio to attach integrity metadata to | 74 | * @bio: bio to attach integrity metadata to |
75 | * @gfp_mask: Memory allocation mask | 75 | * @gfp_mask: Memory allocation mask |
76 | * @nr_vecs: Number of integrity metadata scatter-gather elements | 76 | * @nr_vecs: Number of integrity metadata scatter-gather elements |
77 | * @bs: bio_set to allocate from | ||
78 | * | 77 | * |
79 | * Description: This function prepares a bio for attaching integrity | 78 | * Description: This function prepares a bio for attaching integrity |
80 | * metadata. nr_vecs specifies the maximum number of pages containing | 79 | * metadata. nr_vecs specifies the maximum number of pages containing |
81 | * integrity metadata that can be attached. | 80 | * integrity metadata that can be attached. |
82 | */ | 81 | */ |
83 | struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, | 82 | struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, |
84 | gfp_t gfp_mask, | 83 | gfp_t gfp_mask, |
85 | unsigned int nr_vecs, | 84 | unsigned int nr_vecs) |
86 | struct bio_set *bs) | ||
87 | { | 85 | { |
88 | struct bio_integrity_payload *bip; | 86 | struct bio_integrity_payload *bip; |
89 | unsigned int idx = vecs_to_idx(nr_vecs); | 87 | unsigned int idx = vecs_to_idx(nr_vecs); |
88 | struct bio_set *bs = bio->bi_pool; | ||
89 | |||
90 | if (!bs) | ||
91 | bs = fs_bio_set; | ||
90 | 92 | ||
91 | BUG_ON(bio == NULL); | 93 | BUG_ON(bio == NULL); |
92 | bip = NULL; | 94 | bip = NULL; |
@@ -114,37 +116,22 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, | |||
114 | 116 | ||
115 | return bip; | 117 | return bip; |
116 | } | 118 | } |
117 | EXPORT_SYMBOL(bio_integrity_alloc_bioset); | ||
118 | |||
119 | /** | ||
120 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio | ||
121 | * @bio: bio to attach integrity metadata to | ||
122 | * @gfp_mask: Memory allocation mask | ||
123 | * @nr_vecs: Number of integrity metadata scatter-gather elements | ||
124 | * | ||
125 | * Description: This function prepares a bio for attaching integrity | ||
126 | * metadata. nr_vecs specifies the maximum number of pages containing | ||
127 | * integrity metadata that can be attached. | ||
128 | */ | ||
129 | struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, | ||
130 | gfp_t gfp_mask, | ||
131 | unsigned int nr_vecs) | ||
132 | { | ||
133 | return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); | ||
134 | } | ||
135 | EXPORT_SYMBOL(bio_integrity_alloc); | 119 | EXPORT_SYMBOL(bio_integrity_alloc); |
136 | 120 | ||
137 | /** | 121 | /** |
138 | * bio_integrity_free - Free bio integrity payload | 122 | * bio_integrity_free - Free bio integrity payload |
139 | * @bio: bio containing bip to be freed | 123 | * @bio: bio containing bip to be freed |
140 | * @bs: bio_set this bio was allocated from | ||
141 | * | 124 | * |
142 | * Description: Used to free the integrity portion of a bio. Usually | 125 | * Description: Used to free the integrity portion of a bio. Usually |
143 | * called from bio_free(). | 126 | * called from bio_free(). |
144 | */ | 127 | */ |
145 | void bio_integrity_free(struct bio *bio, struct bio_set *bs) | 128 | void bio_integrity_free(struct bio *bio) |
146 | { | 129 | { |
147 | struct bio_integrity_payload *bip = bio->bi_integrity; | 130 | struct bio_integrity_payload *bip = bio->bi_integrity; |
131 | struct bio_set *bs = bio->bi_pool; | ||
132 | |||
133 | if (!bs) | ||
134 | bs = fs_bio_set; | ||
148 | 135 | ||
149 | BUG_ON(bip == NULL); | 136 | BUG_ON(bip == NULL); |
150 | 137 | ||
@@ -730,19 +717,18 @@ EXPORT_SYMBOL(bio_integrity_split); | |||
730 | * @bio: New bio | 717 | * @bio: New bio |
731 | * @bio_src: Original bio | 718 | * @bio_src: Original bio |
732 | * @gfp_mask: Memory allocation mask | 719 | * @gfp_mask: Memory allocation mask |
733 | * @bs: bio_set to allocate bip from | ||
734 | * | 720 | * |
735 | * Description: Called to allocate a bip when cloning a bio | 721 | * Description: Called to allocate a bip when cloning a bio |
736 | */ | 722 | */ |
737 | int bio_integrity_clone(struct bio *bio, struct bio *bio_src, | 723 | int bio_integrity_clone(struct bio *bio, struct bio *bio_src, |
738 | gfp_t gfp_mask, struct bio_set *bs) | 724 | gfp_t gfp_mask) |
739 | { | 725 | { |
740 | struct bio_integrity_payload *bip_src = bio_src->bi_integrity; | 726 | struct bio_integrity_payload *bip_src = bio_src->bi_integrity; |
741 | struct bio_integrity_payload *bip; | 727 | struct bio_integrity_payload *bip; |
742 | 728 | ||
743 | BUG_ON(bip_src == NULL); | 729 | BUG_ON(bip_src == NULL); |
744 | 730 | ||
745 | bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); | 731 | bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); |
746 | 732 | ||
747 | if (bip == NULL) | 733 | if (bip == NULL) |
748 | return -EIO; | 734 | return -EIO; |
@@ -55,6 +55,7 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { | |||
55 | * IO code that does not need private memory pools. | 55 | * IO code that does not need private memory pools. |
56 | */ | 56 | */ |
57 | struct bio_set *fs_bio_set; | 57 | struct bio_set *fs_bio_set; |
58 | EXPORT_SYMBOL(fs_bio_set); | ||
58 | 59 | ||
59 | /* | 60 | /* |
60 | * Our slab pool management | 61 | * Our slab pool management |
@@ -233,26 +234,37 @@ fallback: | |||
233 | return bvl; | 234 | return bvl; |
234 | } | 235 | } |
235 | 236 | ||
236 | void bio_free(struct bio *bio, struct bio_set *bs) | 237 | static void __bio_free(struct bio *bio) |
237 | { | 238 | { |
239 | bio_disassociate_task(bio); | ||
240 | |||
241 | if (bio_integrity(bio)) | ||
242 | bio_integrity_free(bio); | ||
243 | } | ||
244 | |||
245 | static void bio_free(struct bio *bio) | ||
246 | { | ||
247 | struct bio_set *bs = bio->bi_pool; | ||
238 | void *p; | 248 | void *p; |
239 | 249 | ||
240 | if (bio_has_allocated_vec(bio)) | 250 | __bio_free(bio); |
241 | bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); | ||
242 | 251 | ||
243 | if (bio_integrity(bio)) | 252 | if (bs) { |
244 | bio_integrity_free(bio, bs); | 253 | if (bio_has_allocated_vec(bio)) |
254 | bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); | ||
245 | 255 | ||
246 | /* | 256 | /* |
247 | * If we have front padding, adjust the bio pointer before freeing | 257 | * If we have front padding, adjust the bio pointer before freeing |
248 | */ | 258 | */ |
249 | p = bio; | 259 | p = bio; |
250 | if (bs->front_pad) | ||
251 | p -= bs->front_pad; | 260 | p -= bs->front_pad; |
252 | 261 | ||
253 | mempool_free(p, bs->bio_pool); | 262 | mempool_free(p, bs->bio_pool); |
263 | } else { | ||
264 | /* Bio was allocated by bio_kmalloc() */ | ||
265 | kfree(bio); | ||
266 | } | ||
254 | } | 267 | } |
255 | EXPORT_SYMBOL(bio_free); | ||
256 | 268 | ||
257 | void bio_init(struct bio *bio) | 269 | void bio_init(struct bio *bio) |
258 | { | 270 | { |
@@ -263,48 +275,85 @@ void bio_init(struct bio *bio) | |||
263 | EXPORT_SYMBOL(bio_init); | 275 | EXPORT_SYMBOL(bio_init); |
264 | 276 | ||
265 | /** | 277 | /** |
278 | * bio_reset - reinitialize a bio | ||
279 | * @bio: bio to reset | ||
280 | * | ||
281 | * Description: | ||
282 | * After calling bio_reset(), @bio will be in the same state as a freshly | ||
283 | * allocated bio returned bio bio_alloc_bioset() - the only fields that are | ||
284 | * preserved are the ones that are initialized by bio_alloc_bioset(). See | ||
285 | * comment in struct bio. | ||
286 | */ | ||
287 | void bio_reset(struct bio *bio) | ||
288 | { | ||
289 | unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); | ||
290 | |||
291 | __bio_free(bio); | ||
292 | |||
293 | memset(bio, 0, BIO_RESET_BYTES); | ||
294 | bio->bi_flags = flags|(1 << BIO_UPTODATE); | ||
295 | } | ||
296 | EXPORT_SYMBOL(bio_reset); | ||
297 | |||
298 | /** | ||
266 | * bio_alloc_bioset - allocate a bio for I/O | 299 | * bio_alloc_bioset - allocate a bio for I/O |
267 | * @gfp_mask: the GFP_ mask given to the slab allocator | 300 | * @gfp_mask: the GFP_ mask given to the slab allocator |
268 | * @nr_iovecs: number of iovecs to pre-allocate | 301 | * @nr_iovecs: number of iovecs to pre-allocate |
269 | * @bs: the bio_set to allocate from. | 302 | * @bs: the bio_set to allocate from. |
270 | * | 303 | * |
271 | * Description: | 304 | * Description: |
272 | * bio_alloc_bioset will try its own mempool to satisfy the allocation. | 305 | * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is |
273 | * If %__GFP_WAIT is set then we will block on the internal pool waiting | 306 | * backed by the @bs's mempool. |
274 | * for a &struct bio to become free. | ||
275 | * | 307 | * |
276 | * Note that the caller must set ->bi_destructor on successful return | 308 | * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be |
277 | * of a bio, to do the appropriate freeing of the bio once the reference | 309 | * able to allocate a bio. This is due to the mempool guarantees. To make this |
278 | * count drops to zero. | 310 | * work, callers must never allocate more than 1 bio at a time from this pool. |
279 | **/ | 311 | * Callers that need to allocate more than 1 bio must always submit the |
312 | * previously allocated bio for IO before attempting to allocate a new one. | ||
313 | * Failure to do so can cause deadlocks under memory pressure. | ||
314 | * | ||
315 | * RETURNS: | ||
316 | * Pointer to new bio on success, NULL on failure. | ||
317 | */ | ||
280 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 318 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
281 | { | 319 | { |
320 | unsigned front_pad; | ||
321 | unsigned inline_vecs; | ||
282 | unsigned long idx = BIO_POOL_NONE; | 322 | unsigned long idx = BIO_POOL_NONE; |
283 | struct bio_vec *bvl = NULL; | 323 | struct bio_vec *bvl = NULL; |
284 | struct bio *bio; | 324 | struct bio *bio; |
285 | void *p; | 325 | void *p; |
286 | 326 | ||
287 | p = mempool_alloc(bs->bio_pool, gfp_mask); | 327 | if (!bs) { |
328 | if (nr_iovecs > UIO_MAXIOV) | ||
329 | return NULL; | ||
330 | |||
331 | p = kmalloc(sizeof(struct bio) + | ||
332 | nr_iovecs * sizeof(struct bio_vec), | ||
333 | gfp_mask); | ||
334 | front_pad = 0; | ||
335 | inline_vecs = nr_iovecs; | ||
336 | } else { | ||
337 | p = mempool_alloc(bs->bio_pool, gfp_mask); | ||
338 | front_pad = bs->front_pad; | ||
339 | inline_vecs = BIO_INLINE_VECS; | ||
340 | } | ||
341 | |||
288 | if (unlikely(!p)) | 342 | if (unlikely(!p)) |
289 | return NULL; | 343 | return NULL; |
290 | bio = p + bs->front_pad; | ||
291 | 344 | ||
345 | bio = p + front_pad; | ||
292 | bio_init(bio); | 346 | bio_init(bio); |
293 | 347 | ||
294 | if (unlikely(!nr_iovecs)) | 348 | if (nr_iovecs > inline_vecs) { |
295 | goto out_set; | ||
296 | |||
297 | if (nr_iovecs <= BIO_INLINE_VECS) { | ||
298 | bvl = bio->bi_inline_vecs; | ||
299 | nr_iovecs = BIO_INLINE_VECS; | ||
300 | } else { | ||
301 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); | 349 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); |
302 | if (unlikely(!bvl)) | 350 | if (unlikely(!bvl)) |
303 | goto err_free; | 351 | goto err_free; |
304 | 352 | } else if (nr_iovecs) { | |
305 | nr_iovecs = bvec_nr_vecs(idx); | 353 | bvl = bio->bi_inline_vecs; |
306 | } | 354 | } |
307 | out_set: | 355 | |
356 | bio->bi_pool = bs; | ||
308 | bio->bi_flags |= idx << BIO_POOL_OFFSET; | 357 | bio->bi_flags |= idx << BIO_POOL_OFFSET; |
309 | bio->bi_max_vecs = nr_iovecs; | 358 | bio->bi_max_vecs = nr_iovecs; |
310 | bio->bi_io_vec = bvl; | 359 | bio->bi_io_vec = bvl; |
@@ -316,80 +365,6 @@ err_free: | |||
316 | } | 365 | } |
317 | EXPORT_SYMBOL(bio_alloc_bioset); | 366 | EXPORT_SYMBOL(bio_alloc_bioset); |
318 | 367 | ||
319 | static void bio_fs_destructor(struct bio *bio) | ||
320 | { | ||
321 | bio_free(bio, fs_bio_set); | ||
322 | } | ||
323 | |||
324 | /** | ||
325 | * bio_alloc - allocate a new bio, memory pool backed | ||
326 | * @gfp_mask: allocation mask to use | ||
327 | * @nr_iovecs: number of iovecs | ||
328 | * | ||
329 | * bio_alloc will allocate a bio and associated bio_vec array that can hold | ||
330 | * at least @nr_iovecs entries. Allocations will be done from the | ||
331 | * fs_bio_set. Also see @bio_alloc_bioset and @bio_kmalloc. | ||
332 | * | ||
333 | * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate | ||
334 | * a bio. This is due to the mempool guarantees. To make this work, callers | ||
335 | * must never allocate more than 1 bio at a time from this pool. Callers | ||
336 | * that need to allocate more than 1 bio must always submit the previously | ||
337 | * allocated bio for IO before attempting to allocate a new one. Failure to | ||
338 | * do so can cause livelocks under memory pressure. | ||
339 | * | ||
340 | * RETURNS: | ||
341 | * Pointer to new bio on success, NULL on failure. | ||
342 | */ | ||
343 | struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
344 | { | ||
345 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | ||
346 | |||
347 | if (bio) | ||
348 | bio->bi_destructor = bio_fs_destructor; | ||
349 | |||
350 | return bio; | ||
351 | } | ||
352 | EXPORT_SYMBOL(bio_alloc); | ||
353 | |||
354 | static void bio_kmalloc_destructor(struct bio *bio) | ||
355 | { | ||
356 | if (bio_integrity(bio)) | ||
357 | bio_integrity_free(bio, fs_bio_set); | ||
358 | kfree(bio); | ||
359 | } | ||
360 | |||
361 | /** | ||
362 | * bio_kmalloc - allocate a bio for I/O using kmalloc() | ||
363 | * @gfp_mask: the GFP_ mask given to the slab allocator | ||
364 | * @nr_iovecs: number of iovecs to pre-allocate | ||
365 | * | ||
366 | * Description: | ||
367 | * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask contains | ||
368 | * %__GFP_WAIT, the allocation is guaranteed to succeed. | ||
369 | * | ||
370 | **/ | ||
371 | struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
372 | { | ||
373 | struct bio *bio; | ||
374 | |||
375 | if (nr_iovecs > UIO_MAXIOV) | ||
376 | return NULL; | ||
377 | |||
378 | bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), | ||
379 | gfp_mask); | ||
380 | if (unlikely(!bio)) | ||
381 | return NULL; | ||
382 | |||
383 | bio_init(bio); | ||
384 | bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; | ||
385 | bio->bi_max_vecs = nr_iovecs; | ||
386 | bio->bi_io_vec = bio->bi_inline_vecs; | ||
387 | bio->bi_destructor = bio_kmalloc_destructor; | ||
388 | |||
389 | return bio; | ||
390 | } | ||
391 | EXPORT_SYMBOL(bio_kmalloc); | ||
392 | |||
393 | void zero_fill_bio(struct bio *bio) | 368 | void zero_fill_bio(struct bio *bio) |
394 | { | 369 | { |
395 | unsigned long flags; | 370 | unsigned long flags; |
@@ -420,11 +395,8 @@ void bio_put(struct bio *bio) | |||
420 | /* | 395 | /* |
421 | * last put frees it | 396 | * last put frees it |
422 | */ | 397 | */ |
423 | if (atomic_dec_and_test(&bio->bi_cnt)) { | 398 | if (atomic_dec_and_test(&bio->bi_cnt)) |
424 | bio_disassociate_task(bio); | 399 | bio_free(bio); |
425 | bio->bi_next = NULL; | ||
426 | bio->bi_destructor(bio); | ||
427 | } | ||
428 | } | 400 | } |
429 | EXPORT_SYMBOL(bio_put); | 401 | EXPORT_SYMBOL(bio_put); |
430 | 402 | ||
@@ -466,26 +438,28 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) | |||
466 | EXPORT_SYMBOL(__bio_clone); | 438 | EXPORT_SYMBOL(__bio_clone); |
467 | 439 | ||
468 | /** | 440 | /** |
469 | * bio_clone - clone a bio | 441 | * bio_clone_bioset - clone a bio |
470 | * @bio: bio to clone | 442 | * @bio: bio to clone |
471 | * @gfp_mask: allocation priority | 443 | * @gfp_mask: allocation priority |
444 | * @bs: bio_set to allocate from | ||
472 | * | 445 | * |
473 | * Like __bio_clone, only also allocates the returned bio | 446 | * Like __bio_clone, only also allocates the returned bio |
474 | */ | 447 | */ |
475 | struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | 448 | struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask, |
449 | struct bio_set *bs) | ||
476 | { | 450 | { |
477 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); | 451 | struct bio *b; |
478 | 452 | ||
453 | b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs); | ||
479 | if (!b) | 454 | if (!b) |
480 | return NULL; | 455 | return NULL; |
481 | 456 | ||
482 | b->bi_destructor = bio_fs_destructor; | ||
483 | __bio_clone(b, bio); | 457 | __bio_clone(b, bio); |
484 | 458 | ||
485 | if (bio_integrity(bio)) { | 459 | if (bio_integrity(bio)) { |
486 | int ret; | 460 | int ret; |
487 | 461 | ||
488 | ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); | 462 | ret = bio_integrity_clone(b, bio, gfp_mask); |
489 | 463 | ||
490 | if (ret < 0) { | 464 | if (ret < 0) { |
491 | bio_put(b); | 465 | bio_put(b); |
@@ -495,7 +469,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | |||
495 | 469 | ||
496 | return b; | 470 | return b; |
497 | } | 471 | } |
498 | EXPORT_SYMBOL(bio_clone); | 472 | EXPORT_SYMBOL(bio_clone_bioset); |
499 | 473 | ||
500 | /** | 474 | /** |
501 | * bio_get_nr_vecs - return approx number of vecs | 475 | * bio_get_nr_vecs - return approx number of vecs |
@@ -1501,7 +1475,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) | |||
1501 | trace_block_split(bdev_get_queue(bi->bi_bdev), bi, | 1475 | trace_block_split(bdev_get_queue(bi->bi_bdev), bi, |
1502 | bi->bi_sector + first_sectors); | 1476 | bi->bi_sector + first_sectors); |
1503 | 1477 | ||
1504 | BUG_ON(bi->bi_vcnt != 1); | 1478 | BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0); |
1505 | BUG_ON(bi->bi_idx != 0); | 1479 | BUG_ON(bi->bi_idx != 0); |
1506 | atomic_set(&bp->cnt, 3); | 1480 | atomic_set(&bp->cnt, 3); |
1507 | bp->error = 0; | 1481 | bp->error = 0; |
@@ -1511,17 +1485,22 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) | |||
1511 | bp->bio2.bi_size -= first_sectors << 9; | 1485 | bp->bio2.bi_size -= first_sectors << 9; |
1512 | bp->bio1.bi_size = first_sectors << 9; | 1486 | bp->bio1.bi_size = first_sectors << 9; |
1513 | 1487 | ||
1514 | bp->bv1 = bi->bi_io_vec[0]; | 1488 | if (bi->bi_vcnt != 0) { |
1515 | bp->bv2 = bi->bi_io_vec[0]; | 1489 | bp->bv1 = bi->bi_io_vec[0]; |
1516 | bp->bv2.bv_offset += first_sectors << 9; | 1490 | bp->bv2 = bi->bi_io_vec[0]; |
1517 | bp->bv2.bv_len -= first_sectors << 9; | 1491 | |
1518 | bp->bv1.bv_len = first_sectors << 9; | 1492 | if (bio_is_rw(bi)) { |
1493 | bp->bv2.bv_offset += first_sectors << 9; | ||
1494 | bp->bv2.bv_len -= first_sectors << 9; | ||
1495 | bp->bv1.bv_len = first_sectors << 9; | ||
1496 | } | ||
1519 | 1497 | ||
1520 | bp->bio1.bi_io_vec = &bp->bv1; | 1498 | bp->bio1.bi_io_vec = &bp->bv1; |
1521 | bp->bio2.bi_io_vec = &bp->bv2; | 1499 | bp->bio2.bi_io_vec = &bp->bv2; |
1522 | 1500 | ||
1523 | bp->bio1.bi_max_vecs = 1; | 1501 | bp->bio1.bi_max_vecs = 1; |
1524 | bp->bio2.bi_max_vecs = 1; | 1502 | bp->bio2.bi_max_vecs = 1; |
1503 | } | ||
1525 | 1504 | ||
1526 | bp->bio1.bi_end_io = bio_pair_end_1; | 1505 | bp->bio1.bi_end_io = bio_pair_end_1; |
1527 | bp->bio2.bi_end_io = bio_pair_end_2; | 1506 | bp->bio2.bi_end_io = bio_pair_end_2; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 38e721b35d45..b3c1d3dae77d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -116,6 +116,8 @@ EXPORT_SYMBOL(invalidate_bdev); | |||
116 | 116 | ||
117 | int set_blocksize(struct block_device *bdev, int size) | 117 | int set_blocksize(struct block_device *bdev, int size) |
118 | { | 118 | { |
119 | struct address_space *mapping; | ||
120 | |||
119 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ | 121 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ |
120 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) | 122 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) |
121 | return -EINVAL; | 123 | return -EINVAL; |
@@ -124,6 +126,19 @@ int set_blocksize(struct block_device *bdev, int size) | |||
124 | if (size < bdev_logical_block_size(bdev)) | 126 | if (size < bdev_logical_block_size(bdev)) |
125 | return -EINVAL; | 127 | return -EINVAL; |
126 | 128 | ||
129 | /* Prevent starting I/O or mapping the device */ | ||
130 | percpu_down_write(&bdev->bd_block_size_semaphore); | ||
131 | |||
132 | /* Check that the block device is not memory mapped */ | ||
133 | mapping = bdev->bd_inode->i_mapping; | ||
134 | mutex_lock(&mapping->i_mmap_mutex); | ||
135 | if (mapping_mapped(mapping)) { | ||
136 | mutex_unlock(&mapping->i_mmap_mutex); | ||
137 | percpu_up_write(&bdev->bd_block_size_semaphore); | ||
138 | return -EBUSY; | ||
139 | } | ||
140 | mutex_unlock(&mapping->i_mmap_mutex); | ||
141 | |||
127 | /* Don't change the size if it is same as current */ | 142 | /* Don't change the size if it is same as current */ |
128 | if (bdev->bd_block_size != size) { | 143 | if (bdev->bd_block_size != size) { |
129 | sync_blockdev(bdev); | 144 | sync_blockdev(bdev); |
@@ -131,6 +146,9 @@ int set_blocksize(struct block_device *bdev, int size) | |||
131 | bdev->bd_inode->i_blkbits = blksize_bits(size); | 146 | bdev->bd_inode->i_blkbits = blksize_bits(size); |
132 | kill_bdev(bdev); | 147 | kill_bdev(bdev); |
133 | } | 148 | } |
149 | |||
150 | percpu_up_write(&bdev->bd_block_size_semaphore); | ||
151 | |||
134 | return 0; | 152 | return 0; |
135 | } | 153 | } |
136 | 154 | ||
@@ -441,6 +459,12 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) | |||
441 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); | 459 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); |
442 | if (!ei) | 460 | if (!ei) |
443 | return NULL; | 461 | return NULL; |
462 | |||
463 | if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) { | ||
464 | kmem_cache_free(bdev_cachep, ei); | ||
465 | return NULL; | ||
466 | } | ||
467 | |||
444 | return &ei->vfs_inode; | 468 | return &ei->vfs_inode; |
445 | } | 469 | } |
446 | 470 | ||
@@ -449,6 +473,8 @@ static void bdev_i_callback(struct rcu_head *head) | |||
449 | struct inode *inode = container_of(head, struct inode, i_rcu); | 473 | struct inode *inode = container_of(head, struct inode, i_rcu); |
450 | struct bdev_inode *bdi = BDEV_I(inode); | 474 | struct bdev_inode *bdi = BDEV_I(inode); |
451 | 475 | ||
476 | percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore); | ||
477 | |||
452 | kmem_cache_free(bdev_cachep, bdi); | 478 | kmem_cache_free(bdev_cachep, bdi); |
453 | } | 479 | } |
454 | 480 | ||
@@ -1567,6 +1593,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
1567 | return blkdev_ioctl(bdev, mode, cmd, arg); | 1593 | return blkdev_ioctl(bdev, mode, cmd, arg); |
1568 | } | 1594 | } |
1569 | 1595 | ||
1596 | ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, | ||
1597 | unsigned long nr_segs, loff_t pos) | ||
1598 | { | ||
1599 | ssize_t ret; | ||
1600 | struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); | ||
1601 | |||
1602 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1603 | |||
1604 | ret = generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
1605 | |||
1606 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1607 | |||
1608 | return ret; | ||
1609 | } | ||
1610 | EXPORT_SYMBOL_GPL(blkdev_aio_read); | ||
1611 | |||
1570 | /* | 1612 | /* |
1571 | * Write data to the block device. Only intended for the block device itself | 1613 | * Write data to the block device. Only intended for the block device itself |
1572 | * and the raw driver which basically is a fake block device. | 1614 | * and the raw driver which basically is a fake block device. |
@@ -1578,12 +1620,16 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1578 | unsigned long nr_segs, loff_t pos) | 1620 | unsigned long nr_segs, loff_t pos) |
1579 | { | 1621 | { |
1580 | struct file *file = iocb->ki_filp; | 1622 | struct file *file = iocb->ki_filp; |
1623 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | ||
1581 | struct blk_plug plug; | 1624 | struct blk_plug plug; |
1582 | ssize_t ret; | 1625 | ssize_t ret; |
1583 | 1626 | ||
1584 | BUG_ON(iocb->ki_pos != pos); | 1627 | BUG_ON(iocb->ki_pos != pos); |
1585 | 1628 | ||
1586 | blk_start_plug(&plug); | 1629 | blk_start_plug(&plug); |
1630 | |||
1631 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1632 | |||
1587 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | 1633 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
1588 | if (ret > 0 || ret == -EIOCBQUEUED) { | 1634 | if (ret > 0 || ret == -EIOCBQUEUED) { |
1589 | ssize_t err; | 1635 | ssize_t err; |
@@ -1592,11 +1638,29 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1592 | if (err < 0 && ret > 0) | 1638 | if (err < 0 && ret > 0) |
1593 | ret = err; | 1639 | ret = err; |
1594 | } | 1640 | } |
1641 | |||
1642 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1643 | |||
1595 | blk_finish_plug(&plug); | 1644 | blk_finish_plug(&plug); |
1645 | |||
1596 | return ret; | 1646 | return ret; |
1597 | } | 1647 | } |
1598 | EXPORT_SYMBOL_GPL(blkdev_aio_write); | 1648 | EXPORT_SYMBOL_GPL(blkdev_aio_write); |
1599 | 1649 | ||
1650 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) | ||
1651 | { | ||
1652 | int ret; | ||
1653 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | ||
1654 | |||
1655 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
1656 | |||
1657 | ret = generic_file_mmap(file, vma); | ||
1658 | |||
1659 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
1660 | |||
1661 | return ret; | ||
1662 | } | ||
1663 | |||
1600 | /* | 1664 | /* |
1601 | * Try to release a page associated with block device when the system | 1665 | * Try to release a page associated with block device when the system |
1602 | * is under memory pressure. | 1666 | * is under memory pressure. |
@@ -1627,9 +1691,9 @@ const struct file_operations def_blk_fops = { | |||
1627 | .llseek = block_llseek, | 1691 | .llseek = block_llseek, |
1628 | .read = do_sync_read, | 1692 | .read = do_sync_read, |
1629 | .write = do_sync_write, | 1693 | .write = do_sync_write, |
1630 | .aio_read = generic_file_aio_read, | 1694 | .aio_read = blkdev_aio_read, |
1631 | .aio_write = blkdev_aio_write, | 1695 | .aio_write = blkdev_aio_write, |
1632 | .mmap = generic_file_mmap, | 1696 | .mmap = blkdev_mmap, |
1633 | .fsync = blkdev_fsync, | 1697 | .fsync = blkdev_fsync, |
1634 | .unlocked_ioctl = block_ioctl, | 1698 | .unlocked_ioctl = block_ioctl, |
1635 | #ifdef CONFIG_COMPAT | 1699 | #ifdef CONFIG_COMPAT |
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 1585db1aa365..f936cb50dc0d 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
@@ -814,8 +814,8 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
814 | struct bio *bio; | 814 | struct bio *bio; |
815 | 815 | ||
816 | if (per_dev != master_dev) { | 816 | if (per_dev != master_dev) { |
817 | bio = bio_kmalloc(GFP_KERNEL, | 817 | bio = bio_clone_kmalloc(master_dev->bio, |
818 | master_dev->bio->bi_max_vecs); | 818 | GFP_KERNEL); |
819 | if (unlikely(!bio)) { | 819 | if (unlikely(!bio)) { |
820 | ORE_DBGMSG( | 820 | ORE_DBGMSG( |
821 | "Failed to allocate BIO size=%u\n", | 821 | "Failed to allocate BIO size=%u\n", |
@@ -824,7 +824,6 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
824 | goto out; | 824 | goto out; |
825 | } | 825 | } |
826 | 826 | ||
827 | __bio_clone(bio, master_dev->bio); | ||
828 | bio->bi_bdev = NULL; | 827 | bio->bi_bdev = NULL; |
829 | bio->bi_next = NULL; | 828 | bio->bi_next = NULL; |
830 | per_dev->offset = master_dev->offset; | 829 | per_dev->offset = master_dev->offset; |
diff --git a/include/linux/bio.h b/include/linux/bio.h index 26435890dc87..820e7aaad4fd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
@@ -212,20 +212,41 @@ extern void bio_pair_release(struct bio_pair *dbio); | |||
212 | extern struct bio_set *bioset_create(unsigned int, unsigned int); | 212 | extern struct bio_set *bioset_create(unsigned int, unsigned int); |
213 | extern void bioset_free(struct bio_set *); | 213 | extern void bioset_free(struct bio_set *); |
214 | 214 | ||
215 | extern struct bio *bio_alloc(gfp_t, unsigned int); | ||
216 | extern struct bio *bio_kmalloc(gfp_t, unsigned int); | ||
217 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); | 215 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); |
218 | extern void bio_put(struct bio *); | 216 | extern void bio_put(struct bio *); |
219 | extern void bio_free(struct bio *, struct bio_set *); | 217 | |
218 | extern void __bio_clone(struct bio *, struct bio *); | ||
219 | extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); | ||
220 | |||
221 | extern struct bio_set *fs_bio_set; | ||
222 | |||
223 | static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
224 | { | ||
225 | return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | ||
226 | } | ||
227 | |||
228 | static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | ||
229 | { | ||
230 | return bio_clone_bioset(bio, gfp_mask, fs_bio_set); | ||
231 | } | ||
232 | |||
233 | static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
234 | { | ||
235 | return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); | ||
236 | } | ||
237 | |||
238 | static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) | ||
239 | { | ||
240 | return bio_clone_bioset(bio, gfp_mask, NULL); | ||
241 | |||
242 | } | ||
220 | 243 | ||
221 | extern void bio_endio(struct bio *, int); | 244 | extern void bio_endio(struct bio *, int); |
222 | struct request_queue; | 245 | struct request_queue; |
223 | extern int bio_phys_segments(struct request_queue *, struct bio *); | 246 | extern int bio_phys_segments(struct request_queue *, struct bio *); |
224 | 247 | ||
225 | extern void __bio_clone(struct bio *, struct bio *); | ||
226 | extern struct bio *bio_clone(struct bio *, gfp_t); | ||
227 | |||
228 | extern void bio_init(struct bio *); | 248 | extern void bio_init(struct bio *); |
249 | extern void bio_reset(struct bio *); | ||
229 | 250 | ||
230 | extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); | 251 | extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); |
231 | extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, | 252 | extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, |
@@ -304,8 +325,6 @@ struct biovec_slab { | |||
304 | struct kmem_cache *slab; | 325 | struct kmem_cache *slab; |
305 | }; | 326 | }; |
306 | 327 | ||
307 | extern struct bio_set *fs_bio_set; | ||
308 | |||
309 | /* | 328 | /* |
310 | * a small number of entries is fine, not going to be performance critical. | 329 | * a small number of entries is fine, not going to be performance critical. |
311 | * basically we just need to survive | 330 | * basically we just need to survive |
@@ -367,9 +386,31 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, | |||
367 | /* | 386 | /* |
368 | * Check whether this bio carries any data or not. A NULL bio is allowed. | 387 | * Check whether this bio carries any data or not. A NULL bio is allowed. |
369 | */ | 388 | */ |
370 | static inline int bio_has_data(struct bio *bio) | 389 | static inline bool bio_has_data(struct bio *bio) |
371 | { | 390 | { |
372 | return bio && bio->bi_io_vec != NULL; | 391 | if (bio && bio->bi_vcnt) |
392 | return true; | ||
393 | |||
394 | return false; | ||
395 | } | ||
396 | |||
397 | static inline bool bio_is_rw(struct bio *bio) | ||
398 | { | ||
399 | if (!bio_has_data(bio)) | ||
400 | return false; | ||
401 | |||
402 | if (bio->bi_rw & REQ_WRITE_SAME) | ||
403 | return false; | ||
404 | |||
405 | return true; | ||
406 | } | ||
407 | |||
408 | static inline bool bio_mergeable(struct bio *bio) | ||
409 | { | ||
410 | if (bio->bi_rw & REQ_NOMERGE_FLAGS) | ||
411 | return false; | ||
412 | |||
413 | return true; | ||
373 | } | 414 | } |
374 | 415 | ||
375 | /* | 416 | /* |
@@ -505,9 +546,8 @@ static inline struct bio *bio_list_get(struct bio_list *bl) | |||
505 | 546 | ||
506 | #define bio_integrity(bio) (bio->bi_integrity != NULL) | 547 | #define bio_integrity(bio) (bio->bi_integrity != NULL) |
507 | 548 | ||
508 | extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); | ||
509 | extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); | 549 | extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); |
510 | extern void bio_integrity_free(struct bio *, struct bio_set *); | 550 | extern void bio_integrity_free(struct bio *); |
511 | extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); | 551 | extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); |
512 | extern int bio_integrity_enabled(struct bio *bio); | 552 | extern int bio_integrity_enabled(struct bio *bio); |
513 | extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); | 553 | extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); |
@@ -517,7 +557,7 @@ extern void bio_integrity_endio(struct bio *, int); | |||
517 | extern void bio_integrity_advance(struct bio *, unsigned int); | 557 | extern void bio_integrity_advance(struct bio *, unsigned int); |
518 | extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); | 558 | extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); |
519 | extern void bio_integrity_split(struct bio *, struct bio_pair *, int); | 559 | extern void bio_integrity_split(struct bio *, struct bio_pair *, int); |
520 | extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *); | 560 | extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); |
521 | extern int bioset_integrity_create(struct bio_set *, int); | 561 | extern int bioset_integrity_create(struct bio_set *, int); |
522 | extern void bioset_integrity_free(struct bio_set *); | 562 | extern void bioset_integrity_free(struct bio_set *); |
523 | extern void bio_integrity_init(void); | 563 | extern void bio_integrity_init(void); |
@@ -549,13 +589,13 @@ static inline int bio_integrity_prep(struct bio *bio) | |||
549 | return 0; | 589 | return 0; |
550 | } | 590 | } |
551 | 591 | ||
552 | static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs) | 592 | static inline void bio_integrity_free(struct bio *bio) |
553 | { | 593 | { |
554 | return; | 594 | return; |
555 | } | 595 | } |
556 | 596 | ||
557 | static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, | 597 | static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, |
558 | gfp_t gfp_mask, struct bio_set *bs) | 598 | gfp_t gfp_mask) |
559 | { | 599 | { |
560 | return 0; | 600 | return 0; |
561 | } | 601 | } |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 7b7ac9ccec7a..cdf11191e645 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
@@ -59,12 +59,6 @@ struct bio { | |||
59 | unsigned int bi_seg_front_size; | 59 | unsigned int bi_seg_front_size; |
60 | unsigned int bi_seg_back_size; | 60 | unsigned int bi_seg_back_size; |
61 | 61 | ||
62 | unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ | ||
63 | |||
64 | atomic_t bi_cnt; /* pin count */ | ||
65 | |||
66 | struct bio_vec *bi_io_vec; /* the actual vec list */ | ||
67 | |||
68 | bio_end_io_t *bi_end_io; | 62 | bio_end_io_t *bi_end_io; |
69 | 63 | ||
70 | void *bi_private; | 64 | void *bi_private; |
@@ -80,7 +74,17 @@ struct bio { | |||
80 | struct bio_integrity_payload *bi_integrity; /* data integrity */ | 74 | struct bio_integrity_payload *bi_integrity; /* data integrity */ |
81 | #endif | 75 | #endif |
82 | 76 | ||
83 | bio_destructor_t *bi_destructor; /* destructor */ | 77 | /* |
78 | * Everything starting with bi_max_vecs will be preserved by bio_reset() | ||
79 | */ | ||
80 | |||
81 | unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ | ||
82 | |||
83 | atomic_t bi_cnt; /* pin count */ | ||
84 | |||
85 | struct bio_vec *bi_io_vec; /* the actual vec list */ | ||
86 | |||
87 | struct bio_set *bi_pool; | ||
84 | 88 | ||
85 | /* | 89 | /* |
86 | * We can inline a number of vecs at the end of the bio, to avoid | 90 | * We can inline a number of vecs at the end of the bio, to avoid |
@@ -90,6 +94,8 @@ struct bio { | |||
90 | struct bio_vec bi_inline_vecs[0]; | 94 | struct bio_vec bi_inline_vecs[0]; |
91 | }; | 95 | }; |
92 | 96 | ||
97 | #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) | ||
98 | |||
93 | /* | 99 | /* |
94 | * bio flags | 100 | * bio flags |
95 | */ | 101 | */ |
@@ -105,6 +111,13 @@ struct bio { | |||
105 | #define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ | 111 | #define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ |
106 | #define BIO_QUIET 10 /* Make BIO Quiet */ | 112 | #define BIO_QUIET 10 /* Make BIO Quiet */ |
107 | #define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ | 113 | #define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ |
114 | |||
115 | /* | ||
116 | * Flags starting here get preserved by bio_reset() - this includes | ||
117 | * BIO_POOL_IDX() | ||
118 | */ | ||
119 | #define BIO_RESET_BITS 12 | ||
120 | |||
108 | #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) | 121 | #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) |
109 | 122 | ||
110 | /* | 123 | /* |
@@ -134,6 +147,7 @@ enum rq_flag_bits { | |||
134 | __REQ_PRIO, /* boost priority in cfq */ | 147 | __REQ_PRIO, /* boost priority in cfq */ |
135 | __REQ_DISCARD, /* request to discard sectors */ | 148 | __REQ_DISCARD, /* request to discard sectors */ |
136 | __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ | 149 | __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ |
150 | __REQ_WRITE_SAME, /* write same block many times */ | ||
137 | 151 | ||
138 | __REQ_NOIDLE, /* don't anticipate more IO after this one */ | 152 | __REQ_NOIDLE, /* don't anticipate more IO after this one */ |
139 | __REQ_FUA, /* forced unit access */ | 153 | __REQ_FUA, /* forced unit access */ |
@@ -172,15 +186,21 @@ enum rq_flag_bits { | |||
172 | #define REQ_META (1 << __REQ_META) | 186 | #define REQ_META (1 << __REQ_META) |
173 | #define REQ_PRIO (1 << __REQ_PRIO) | 187 | #define REQ_PRIO (1 << __REQ_PRIO) |
174 | #define REQ_DISCARD (1 << __REQ_DISCARD) | 188 | #define REQ_DISCARD (1 << __REQ_DISCARD) |
189 | #define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME) | ||
175 | #define REQ_NOIDLE (1 << __REQ_NOIDLE) | 190 | #define REQ_NOIDLE (1 << __REQ_NOIDLE) |
176 | 191 | ||
177 | #define REQ_FAILFAST_MASK \ | 192 | #define REQ_FAILFAST_MASK \ |
178 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) | 193 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) |
179 | #define REQ_COMMON_MASK \ | 194 | #define REQ_COMMON_MASK \ |
180 | (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ | 195 | (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ |
181 | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) | 196 | REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ |
197 | REQ_SECURE) | ||
182 | #define REQ_CLONE_MASK REQ_COMMON_MASK | 198 | #define REQ_CLONE_MASK REQ_COMMON_MASK |
183 | 199 | ||
200 | /* This mask is used for both bio and request merge checking */ | ||
201 | #define REQ_NOMERGE_FLAGS \ | ||
202 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) | ||
203 | |||
184 | #define REQ_RAHEAD (1 << __REQ_RAHEAD) | 204 | #define REQ_RAHEAD (1 << __REQ_RAHEAD) |
185 | #define REQ_THROTTLED (1 << __REQ_THROTTLED) | 205 | #define REQ_THROTTLED (1 << __REQ_THROTTLED) |
186 | 206 | ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a2ab7c85393..1756001210d2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -270,6 +270,7 @@ struct queue_limits { | |||
270 | unsigned int io_min; | 270 | unsigned int io_min; |
271 | unsigned int io_opt; | 271 | unsigned int io_opt; |
272 | unsigned int max_discard_sectors; | 272 | unsigned int max_discard_sectors; |
273 | unsigned int max_write_same_sectors; | ||
273 | unsigned int discard_granularity; | 274 | unsigned int discard_granularity; |
274 | unsigned int discard_alignment; | 275 | unsigned int discard_alignment; |
275 | 276 | ||
@@ -540,8 +541,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) | |||
540 | 541 | ||
541 | #define blk_account_rq(rq) \ | 542 | #define blk_account_rq(rq) \ |
542 | (((rq)->cmd_flags & REQ_STARTED) && \ | 543 | (((rq)->cmd_flags & REQ_STARTED) && \ |
543 | ((rq)->cmd_type == REQ_TYPE_FS || \ | 544 | ((rq)->cmd_type == REQ_TYPE_FS)) |
544 | ((rq)->cmd_flags & REQ_DISCARD))) | ||
545 | 545 | ||
546 | #define blk_pm_request(rq) \ | 546 | #define blk_pm_request(rq) \ |
547 | ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ | 547 | ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ |
@@ -595,17 +595,39 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync) | |||
595 | rl->flags &= ~flag; | 595 | rl->flags &= ~flag; |
596 | } | 596 | } |
597 | 597 | ||
598 | static inline bool rq_mergeable(struct request *rq) | ||
599 | { | ||
600 | if (rq->cmd_type != REQ_TYPE_FS) | ||
601 | return false; | ||
598 | 602 | ||
599 | /* | 603 | if (rq->cmd_flags & REQ_NOMERGE_FLAGS) |
600 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may | 604 | return false; |
601 | * it already be started by driver. | 605 | |
602 | */ | 606 | return true; |
603 | #define RQ_NOMERGE_FLAGS \ | 607 | } |
604 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD) | 608 | |
605 | #define rq_mergeable(rq) \ | 609 | static inline bool blk_check_merge_flags(unsigned int flags1, |
606 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ | 610 | unsigned int flags2) |
607 | (((rq)->cmd_flags & REQ_DISCARD) || \ | 611 | { |
608 | (rq)->cmd_type == REQ_TYPE_FS)) | 612 | if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD)) |
613 | return false; | ||
614 | |||
615 | if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE)) | ||
616 | return false; | ||
617 | |||
618 | if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME)) | ||
619 | return false; | ||
620 | |||
621 | return true; | ||
622 | } | ||
623 | |||
624 | static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) | ||
625 | { | ||
626 | if (bio_data(a) == bio_data(b)) | ||
627 | return true; | ||
628 | |||
629 | return false; | ||
630 | } | ||
609 | 631 | ||
610 | /* | 632 | /* |
611 | * q->prep_rq_fn return values | 633 | * q->prep_rq_fn return values |
@@ -802,6 +824,28 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) | |||
802 | return blk_rq_cur_bytes(rq) >> 9; | 824 | return blk_rq_cur_bytes(rq) >> 9; |
803 | } | 825 | } |
804 | 826 | ||
827 | static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, | ||
828 | unsigned int cmd_flags) | ||
829 | { | ||
830 | if (unlikely(cmd_flags & REQ_DISCARD)) | ||
831 | return q->limits.max_discard_sectors; | ||
832 | |||
833 | if (unlikely(cmd_flags & REQ_WRITE_SAME)) | ||
834 | return q->limits.max_write_same_sectors; | ||
835 | |||
836 | return q->limits.max_sectors; | ||
837 | } | ||
838 | |||
839 | static inline unsigned int blk_rq_get_max_sectors(struct request *rq) | ||
840 | { | ||
841 | struct request_queue *q = rq->q; | ||
842 | |||
843 | if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) | ||
844 | return q->limits.max_hw_sectors; | ||
845 | |||
846 | return blk_queue_get_max_sectors(q, rq->cmd_flags); | ||
847 | } | ||
848 | |||
805 | /* | 849 | /* |
806 | * Request issue related functions. | 850 | * Request issue related functions. |
807 | */ | 851 | */ |
@@ -857,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short); | |||
857 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); | 901 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); |
858 | extern void blk_queue_max_discard_sectors(struct request_queue *q, | 902 | extern void blk_queue_max_discard_sectors(struct request_queue *q, |
859 | unsigned int max_discard_sectors); | 903 | unsigned int max_discard_sectors); |
904 | extern void blk_queue_max_write_same_sectors(struct request_queue *q, | ||
905 | unsigned int max_write_same_sectors); | ||
860 | extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); | 906 | extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); |
861 | extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); | 907 | extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); |
862 | extern void blk_queue_alignment_offset(struct request_queue *q, | 908 | extern void blk_queue_alignment_offset(struct request_queue *q, |
@@ -987,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, | |||
987 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); | 1033 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); |
988 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | 1034 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, |
989 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | 1035 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); |
1036 | extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, | ||
1037 | sector_t nr_sects, gfp_t gfp_mask, struct page *page); | ||
990 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 1038 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
991 | sector_t nr_sects, gfp_t gfp_mask); | 1039 | sector_t nr_sects, gfp_t gfp_mask); |
992 | static inline int sb_issue_discard(struct super_block *sb, sector_t block, | 1040 | static inline int sb_issue_discard(struct super_block *sb, sector_t block, |
@@ -1164,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) | |||
1164 | return queue_discard_zeroes_data(bdev_get_queue(bdev)); | 1212 | return queue_discard_zeroes_data(bdev_get_queue(bdev)); |
1165 | } | 1213 | } |
1166 | 1214 | ||
1215 | static inline unsigned int bdev_write_same(struct block_device *bdev) | ||
1216 | { | ||
1217 | struct request_queue *q = bdev_get_queue(bdev); | ||
1218 | |||
1219 | if (q) | ||
1220 | return q->limits.max_write_same_sectors; | ||
1221 | |||
1222 | return 0; | ||
1223 | } | ||
1224 | |||
1167 | static inline int queue_dma_alignment(struct request_queue *q) | 1225 | static inline int queue_dma_alignment(struct request_queue *q) |
1168 | { | 1226 | { |
1169 | return q ? q->dma_alignment : 511; | 1227 | return q ? q->dma_alignment : 511; |
diff --git a/include/linux/fs.h b/include/linux/fs.h index c617ed024df8..39f3e12ca752 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -335,6 +335,7 @@ struct inodes_stat_t { | |||
335 | #define BLKDISCARDZEROES _IO(0x12,124) | 335 | #define BLKDISCARDZEROES _IO(0x12,124) |
336 | #define BLKSECDISCARD _IO(0x12,125) | 336 | #define BLKSECDISCARD _IO(0x12,125) |
337 | #define BLKROTATIONAL _IO(0x12,126) | 337 | #define BLKROTATIONAL _IO(0x12,126) |
338 | #define BLKZEROOUT _IO(0x12,127) | ||
338 | 339 | ||
339 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ | 340 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ |
340 | #define FIBMAP _IO(0x00,1) /* bmap access */ | 341 | #define FIBMAP _IO(0x00,1) /* bmap access */ |
@@ -415,6 +416,7 @@ struct inodes_stat_t { | |||
415 | #include <linux/migrate_mode.h> | 416 | #include <linux/migrate_mode.h> |
416 | #include <linux/uidgid.h> | 417 | #include <linux/uidgid.h> |
417 | #include <linux/lockdep.h> | 418 | #include <linux/lockdep.h> |
419 | #include <linux/percpu-rwsem.h> | ||
418 | 420 | ||
419 | #include <asm/byteorder.h> | 421 | #include <asm/byteorder.h> |
420 | 422 | ||
@@ -724,6 +726,8 @@ struct block_device { | |||
724 | int bd_fsfreeze_count; | 726 | int bd_fsfreeze_count; |
725 | /* Mutex for freeze */ | 727 | /* Mutex for freeze */ |
726 | struct mutex bd_fsfreeze_mutex; | 728 | struct mutex bd_fsfreeze_mutex; |
729 | /* A semaphore that prevents I/O while block size is being changed */ | ||
730 | struct percpu_rw_semaphore bd_block_size_semaphore; | ||
727 | }; | 731 | }; |
728 | 732 | ||
729 | /* | 733 | /* |
@@ -2570,6 +2574,8 @@ extern int generic_segment_checks(const struct iovec *iov, | |||
2570 | unsigned long *nr_segs, size_t *count, int access_flags); | 2574 | unsigned long *nr_segs, size_t *count, int access_flags); |
2571 | 2575 | ||
2572 | /* fs/block_dev.c */ | 2576 | /* fs/block_dev.c */ |
2577 | extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, | ||
2578 | unsigned long nr_segs, loff_t pos); | ||
2573 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2579 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, |
2574 | unsigned long nr_segs, loff_t pos); | 2580 | unsigned long nr_segs, loff_t pos); |
2575 | extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, | 2581 | extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, |
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h new file mode 100644 index 000000000000..cf80f7e5277f --- /dev/null +++ b/include/linux/percpu-rwsem.h | |||
@@ -0,0 +1,89 @@ | |||
1 | #ifndef _LINUX_PERCPU_RWSEM_H | ||
2 | #define _LINUX_PERCPU_RWSEM_H | ||
3 | |||
4 | #include <linux/mutex.h> | ||
5 | #include <linux/percpu.h> | ||
6 | #include <linux/rcupdate.h> | ||
7 | #include <linux/delay.h> | ||
8 | |||
9 | struct percpu_rw_semaphore { | ||
10 | unsigned __percpu *counters; | ||
11 | bool locked; | ||
12 | struct mutex mtx; | ||
13 | }; | ||
14 | |||
15 | static inline void percpu_down_read(struct percpu_rw_semaphore *p) | ||
16 | { | ||
17 | rcu_read_lock(); | ||
18 | if (unlikely(p->locked)) { | ||
19 | rcu_read_unlock(); | ||
20 | mutex_lock(&p->mtx); | ||
21 | this_cpu_inc(*p->counters); | ||
22 | mutex_unlock(&p->mtx); | ||
23 | return; | ||
24 | } | ||
25 | this_cpu_inc(*p->counters); | ||
26 | rcu_read_unlock(); | ||
27 | } | ||
28 | |||
29 | static inline void percpu_up_read(struct percpu_rw_semaphore *p) | ||
30 | { | ||
31 | /* | ||
32 | * On X86, write operation in this_cpu_dec serves as a memory unlock | ||
33 | * barrier (i.e. memory accesses may be moved before the write, but | ||
34 | * no memory accesses are moved past the write). | ||
35 | * On other architectures this may not be the case, so we need smp_mb() | ||
36 | * there. | ||
37 | */ | ||
38 | #if defined(CONFIG_X86) && (!defined(CONFIG_X86_PPRO_FENCE) && !defined(CONFIG_X86_OOSTORE)) | ||
39 | barrier(); | ||
40 | #else | ||
41 | smp_mb(); | ||
42 | #endif | ||
43 | this_cpu_dec(*p->counters); | ||
44 | } | ||
45 | |||
46 | static inline unsigned __percpu_count(unsigned __percpu *counters) | ||
47 | { | ||
48 | unsigned total = 0; | ||
49 | int cpu; | ||
50 | |||
51 | for_each_possible_cpu(cpu) | ||
52 | total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu)); | ||
53 | |||
54 | return total; | ||
55 | } | ||
56 | |||
57 | static inline void percpu_down_write(struct percpu_rw_semaphore *p) | ||
58 | { | ||
59 | mutex_lock(&p->mtx); | ||
60 | p->locked = true; | ||
61 | synchronize_rcu(); | ||
62 | while (__percpu_count(p->counters)) | ||
63 | msleep(1); | ||
64 | smp_rmb(); /* paired with smp_mb() in percpu_sem_up_read() */ | ||
65 | } | ||
66 | |||
67 | static inline void percpu_up_write(struct percpu_rw_semaphore *p) | ||
68 | { | ||
69 | p->locked = false; | ||
70 | mutex_unlock(&p->mtx); | ||
71 | } | ||
72 | |||
73 | static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p) | ||
74 | { | ||
75 | p->counters = alloc_percpu(unsigned); | ||
76 | if (unlikely(!p->counters)) | ||
77 | return -ENOMEM; | ||
78 | p->locked = false; | ||
79 | mutex_init(&p->mtx); | ||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p) | ||
84 | { | ||
85 | free_percpu(p->counters); | ||
86 | p->counters = NULL; /* catch use after free bugs */ | ||
87 | } | ||
88 | |||
89 | #endif | ||
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 7b600da9a635..4bd6c06eb28e 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h | |||
@@ -201,6 +201,7 @@ static inline void *sg_virt(struct scatterlist *sg) | |||
201 | return page_address(sg_page(sg)) + sg->offset; | 201 | return page_address(sg_page(sg)) + sg->offset; |
202 | } | 202 | } |
203 | 203 | ||
204 | int sg_nents(struct scatterlist *sg); | ||
204 | struct scatterlist *sg_next(struct scatterlist *); | 205 | struct scatterlist *sg_next(struct scatterlist *); |
205 | struct scatterlist *sg_last(struct scatterlist *s, unsigned int); | 206 | struct scatterlist *sg_last(struct scatterlist *s, unsigned int); |
206 | void sg_init_table(struct scatterlist *, unsigned int); | 207 | void sg_init_table(struct scatterlist *, unsigned int); |
diff --git a/lib/scatterlist.c b/lib/scatterlist.c index e76d85cf3175..3675452b23ca 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c | |||
@@ -39,6 +39,25 @@ struct scatterlist *sg_next(struct scatterlist *sg) | |||
39 | EXPORT_SYMBOL(sg_next); | 39 | EXPORT_SYMBOL(sg_next); |
40 | 40 | ||
41 | /** | 41 | /** |
42 | * sg_nents - return total count of entries in scatterlist | ||
43 | * @sg: The scatterlist | ||
44 | * | ||
45 | * Description: | ||
46 | * Allows to know how many entries are in sg, taking into acount | ||
47 | * chaining as well | ||
48 | * | ||
49 | **/ | ||
50 | int sg_nents(struct scatterlist *sg) | ||
51 | { | ||
52 | int nents; | ||
53 | for (nents = 0; sg; sg = sg_next(sg)) | ||
54 | nents++; | ||
55 | return nents; | ||
56 | } | ||
57 | EXPORT_SYMBOL(sg_nents); | ||
58 | |||
59 | |||
60 | /** | ||
42 | * sg_last - return the last scatterlist entry in a list | 61 | * sg_last - return the last scatterlist entry in a list |
43 | * @sgl: First entry in the scatterlist | 62 | * @sgl: First entry in the scatterlist |
44 | * @nents: Number of entries in the scatterlist | 63 | * @nents: Number of entries in the scatterlist |