diff options
33 files changed, 770 insertions, 464 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index c1eb41cb9876..279da08f7541 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block | |||
| @@ -206,3 +206,17 @@ Description: | |||
| 206 | when a discarded area is read the discard_zeroes_data | 206 | when a discarded area is read the discard_zeroes_data |
| 207 | parameter will be set to one. Otherwise it will be 0 and | 207 | parameter will be set to one. Otherwise it will be 0 and |
| 208 | the result of reading a discarded area is undefined. | 208 | the result of reading a discarded area is undefined. |
| 209 | |||
| 210 | What: /sys/block/<disk>/queue/write_same_max_bytes | ||
| 211 | Date: January 2012 | ||
| 212 | Contact: Martin K. Petersen <martin.petersen@oracle.com> | ||
| 213 | Description: | ||
| 214 | Some devices support a write same operation in which a | ||
| 215 | single data block can be written to a range of several | ||
| 216 | contiguous blocks on storage. This can be used to wipe | ||
| 217 | areas on disk or to initialize drives in a RAID | ||
| 218 | configuration. write_same_max_bytes indicates how many | ||
| 219 | bytes can be written in a single write same command. If | ||
| 220 | write_same_max_bytes is 0, write same is not supported | ||
| 221 | by the device. | ||
| 222 | |||
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index e418dc0a7086..8df5e8e6dceb 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt | |||
| @@ -465,7 +465,6 @@ struct bio { | |||
| 465 | bio_end_io_t *bi_end_io; /* bi_end_io (bio) */ | 465 | bio_end_io_t *bi_end_io; /* bi_end_io (bio) */ |
| 466 | atomic_t bi_cnt; /* pin count: free when it hits zero */ | 466 | atomic_t bi_cnt; /* pin count: free when it hits zero */ |
| 467 | void *bi_private; | 467 | void *bi_private; |
| 468 | bio_destructor_t *bi_destructor; /* bi_destructor (bio) */ | ||
| 469 | }; | 468 | }; |
| 470 | 469 | ||
| 471 | With this multipage bio design: | 470 | With this multipage bio design: |
| @@ -647,10 +646,6 @@ for a non-clone bio. There are the 6 pools setup for different size biovecs, | |||
| 647 | so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the | 646 | so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the |
| 648 | given size from these slabs. | 647 | given size from these slabs. |
| 649 | 648 | ||
| 650 | The bi_destructor() routine takes into account the possibility of the bio | ||
| 651 | having originated from a different source (see later discussions on | ||
| 652 | n/w to block transfers and kvec_cb) | ||
| 653 | |||
| 654 | The bio_get() routine may be used to hold an extra reference on a bio prior | 649 | The bio_get() routine may be used to hold an extra reference on a bio prior |
| 655 | to i/o submission, if the bio fields are likely to be accessed after the | 650 | to i/o submission, if the bio fields are likely to be accessed after the |
| 656 | i/o is issued (since the bio may otherwise get freed in case i/o completion | 651 | i/o is issued (since the bio may otherwise get freed in case i/o completion |
diff --git a/Documentation/percpu-rw-semaphore.txt b/Documentation/percpu-rw-semaphore.txt new file mode 100644 index 000000000000..7d3c82431909 --- /dev/null +++ b/Documentation/percpu-rw-semaphore.txt | |||
| @@ -0,0 +1,27 @@ | |||
| 1 | Percpu rw semaphores | ||
| 2 | -------------------- | ||
| 3 | |||
| 4 | Percpu rw semaphores is a new read-write semaphore design that is | ||
| 5 | optimized for locking for reading. | ||
| 6 | |||
| 7 | The problem with traditional read-write semaphores is that when multiple | ||
| 8 | cores take the lock for reading, the cache line containing the semaphore | ||
| 9 | is bouncing between L1 caches of the cores, causing performance | ||
| 10 | degradation. | ||
| 11 | |||
| 12 | Locking for reading is very fast, it uses RCU and it avoids any atomic | ||
| 13 | instruction in the lock and unlock path. On the other hand, locking for | ||
| 14 | writing is very expensive, it calls synchronize_rcu() that can take | ||
| 15 | hundreds of milliseconds. | ||
| 16 | |||
| 17 | The lock is declared with "struct percpu_rw_semaphore" type. | ||
| 18 | The lock is initialized percpu_init_rwsem, it returns 0 on success and | ||
| 19 | -ENOMEM on allocation failure. | ||
| 20 | The lock must be freed with percpu_free_rwsem to avoid memory leak. | ||
| 21 | |||
| 22 | The lock is locked for read with percpu_down_read, percpu_up_read and | ||
| 23 | for write with percpu_down_write, percpu_up_write. | ||
| 24 | |||
| 25 | The idea of using RCU for optimized rw-lock was introduced by | ||
| 26 | Eric Dumazet <eric.dumazet@gmail.com>. | ||
| 27 | The code was written by Mikulas Patocka <mpatocka@redhat.com> | ||
diff --git a/block/blk-core.c b/block/blk-core.c index d2da64170513..a33870b1847b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
| @@ -606,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) | |||
| 606 | /* | 606 | /* |
| 607 | * A queue starts its life with bypass turned on to avoid | 607 | * A queue starts its life with bypass turned on to avoid |
| 608 | * unnecessary bypass on/off overhead and nasty surprises during | 608 | * unnecessary bypass on/off overhead and nasty surprises during |
| 609 | * init. The initial bypass will be finished at the end of | 609 | * init. The initial bypass will be finished when the queue is |
| 610 | * blk_init_allocated_queue(). | 610 | * registered by blk_register_queue(). |
| 611 | */ | 611 | */ |
| 612 | q->bypass_depth = 1; | 612 | q->bypass_depth = 1; |
| 613 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); | 613 | __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); |
| @@ -694,7 +694,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | |||
| 694 | q->request_fn = rfn; | 694 | q->request_fn = rfn; |
| 695 | q->prep_rq_fn = NULL; | 695 | q->prep_rq_fn = NULL; |
| 696 | q->unprep_rq_fn = NULL; | 696 | q->unprep_rq_fn = NULL; |
| 697 | q->queue_flags = QUEUE_FLAG_DEFAULT; | 697 | q->queue_flags |= QUEUE_FLAG_DEFAULT; |
| 698 | 698 | ||
| 699 | /* Override internal queue lock with supplied lock pointer */ | 699 | /* Override internal queue lock with supplied lock pointer */ |
| 700 | if (lock) | 700 | if (lock) |
| @@ -710,11 +710,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, | |||
| 710 | /* init elevator */ | 710 | /* init elevator */ |
| 711 | if (elevator_init(q, NULL)) | 711 | if (elevator_init(q, NULL)) |
| 712 | return NULL; | 712 | return NULL; |
| 713 | |||
| 714 | blk_queue_congestion_threshold(q); | ||
| 715 | |||
| 716 | /* all done, end the initial bypass */ | ||
| 717 | blk_queue_bypass_end(q); | ||
| 718 | return q; | 713 | return q; |
| 719 | } | 714 | } |
| 720 | EXPORT_SYMBOL(blk_init_allocated_queue); | 715 | EXPORT_SYMBOL(blk_init_allocated_queue); |
| @@ -1657,8 +1652,8 @@ generic_make_request_checks(struct bio *bio) | |||
| 1657 | goto end_io; | 1652 | goto end_io; |
| 1658 | } | 1653 | } |
| 1659 | 1654 | ||
| 1660 | if (unlikely(!(bio->bi_rw & REQ_DISCARD) && | 1655 | if (likely(bio_is_rw(bio) && |
| 1661 | nr_sectors > queue_max_hw_sectors(q))) { | 1656 | nr_sectors > queue_max_hw_sectors(q))) { |
| 1662 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", | 1657 | printk(KERN_ERR "bio too big device %s (%u > %u)\n", |
| 1663 | bdevname(bio->bi_bdev, b), | 1658 | bdevname(bio->bi_bdev, b), |
| 1664 | bio_sectors(bio), | 1659 | bio_sectors(bio), |
| @@ -1699,8 +1694,12 @@ generic_make_request_checks(struct bio *bio) | |||
| 1699 | 1694 | ||
| 1700 | if ((bio->bi_rw & REQ_DISCARD) && | 1695 | if ((bio->bi_rw & REQ_DISCARD) && |
| 1701 | (!blk_queue_discard(q) || | 1696 | (!blk_queue_discard(q) || |
| 1702 | ((bio->bi_rw & REQ_SECURE) && | 1697 | ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) { |
| 1703 | !blk_queue_secdiscard(q)))) { | 1698 | err = -EOPNOTSUPP; |
| 1699 | goto end_io; | ||
| 1700 | } | ||
| 1701 | |||
| 1702 | if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { | ||
| 1704 | err = -EOPNOTSUPP; | 1703 | err = -EOPNOTSUPP; |
| 1705 | goto end_io; | 1704 | goto end_io; |
| 1706 | } | 1705 | } |
| @@ -1810,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request); | |||
| 1810 | */ | 1809 | */ |
| 1811 | void submit_bio(int rw, struct bio *bio) | 1810 | void submit_bio(int rw, struct bio *bio) |
| 1812 | { | 1811 | { |
| 1813 | int count = bio_sectors(bio); | ||
| 1814 | |||
| 1815 | bio->bi_rw |= rw; | 1812 | bio->bi_rw |= rw; |
| 1816 | 1813 | ||
| 1817 | /* | 1814 | /* |
| 1818 | * If it's a regular read/write or a barrier with data attached, | 1815 | * If it's a regular read/write or a barrier with data attached, |
| 1819 | * go through the normal accounting stuff before submission. | 1816 | * go through the normal accounting stuff before submission. |
| 1820 | */ | 1817 | */ |
| 1821 | if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { | 1818 | if (bio_has_data(bio)) { |
| 1819 | unsigned int count; | ||
| 1820 | |||
| 1821 | if (unlikely(rw & REQ_WRITE_SAME)) | ||
| 1822 | count = bdev_logical_block_size(bio->bi_bdev) >> 9; | ||
| 1823 | else | ||
| 1824 | count = bio_sectors(bio); | ||
| 1825 | |||
| 1822 | if (rw & WRITE) { | 1826 | if (rw & WRITE) { |
| 1823 | count_vm_events(PGPGOUT, count); | 1827 | count_vm_events(PGPGOUT, count); |
| 1824 | } else { | 1828 | } else { |
| @@ -1864,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio); | |||
| 1864 | */ | 1868 | */ |
| 1865 | int blk_rq_check_limits(struct request_queue *q, struct request *rq) | 1869 | int blk_rq_check_limits(struct request_queue *q, struct request *rq) |
| 1866 | { | 1870 | { |
| 1867 | if (rq->cmd_flags & REQ_DISCARD) | 1871 | if (!rq_mergeable(rq)) |
| 1868 | return 0; | 1872 | return 0; |
| 1869 | 1873 | ||
| 1870 | if (blk_rq_sectors(rq) > queue_max_sectors(q) || | 1874 | if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) { |
| 1871 | blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { | ||
| 1872 | printk(KERN_ERR "%s: over max size limit.\n", __func__); | 1875 | printk(KERN_ERR "%s: over max size limit.\n", __func__); |
| 1873 | return -EIO; | 1876 | return -EIO; |
| 1874 | } | 1877 | } |
| @@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) | |||
| 2340 | req->buffer = bio_data(req->bio); | 2343 | req->buffer = bio_data(req->bio); |
| 2341 | 2344 | ||
| 2342 | /* update sector only for requests with clear definition of sector */ | 2345 | /* update sector only for requests with clear definition of sector */ |
| 2343 | if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) | 2346 | if (req->cmd_type == REQ_TYPE_FS) |
| 2344 | req->__sector += total_bytes >> 9; | 2347 | req->__sector += total_bytes >> 9; |
| 2345 | 2348 | ||
| 2346 | /* mixed attributes always follow the first bio */ | 2349 | /* mixed attributes always follow the first bio */ |
| @@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | |||
| 2781 | blk_rq_init(NULL, rq); | 2784 | blk_rq_init(NULL, rq); |
| 2782 | 2785 | ||
| 2783 | __rq_for_each_bio(bio_src, rq_src) { | 2786 | __rq_for_each_bio(bio_src, rq_src) { |
| 2784 | bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); | 2787 | bio = bio_clone_bioset(bio_src, gfp_mask, bs); |
| 2785 | if (!bio) | 2788 | if (!bio) |
| 2786 | goto free_and_out; | 2789 | goto free_and_out; |
| 2787 | 2790 | ||
| 2788 | __bio_clone(bio, bio_src); | ||
| 2789 | |||
| 2790 | if (bio_integrity(bio_src) && | ||
| 2791 | bio_integrity_clone(bio, bio_src, gfp_mask, bs)) | ||
| 2792 | goto free_and_out; | ||
| 2793 | |||
| 2794 | if (bio_ctr && bio_ctr(bio, bio_src, data)) | 2791 | if (bio_ctr && bio_ctr(bio, bio_src, data)) |
| 2795 | goto free_and_out; | 2792 | goto free_and_out; |
| 2796 | 2793 | ||
| @@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, | |||
| 2807 | 2804 | ||
| 2808 | free_and_out: | 2805 | free_and_out: |
| 2809 | if (bio) | 2806 | if (bio) |
| 2810 | bio_free(bio, bs); | 2807 | bio_put(bio); |
| 2811 | blk_rq_unprep_clone(rq); | 2808 | blk_rq_unprep_clone(rq); |
| 2812 | 2809 | ||
| 2813 | return -ENOMEM; | 2810 | return -ENOMEM; |
diff --git a/block/blk-lib.c b/block/blk-lib.c index 19cc761cacb2..9373b58dfab1 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
| @@ -130,6 +130,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
| 130 | EXPORT_SYMBOL(blkdev_issue_discard); | 130 | EXPORT_SYMBOL(blkdev_issue_discard); |
| 131 | 131 | ||
| 132 | /** | 132 | /** |
| 133 | * blkdev_issue_write_same - queue a write same operation | ||
| 134 | * @bdev: target blockdev | ||
| 135 | * @sector: start sector | ||
| 136 | * @nr_sects: number of sectors to write | ||
| 137 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
| 138 | * @page: page containing data to write | ||
| 139 | * | ||
| 140 | * Description: | ||
| 141 | * Issue a write same request for the sectors in question. | ||
| 142 | */ | ||
| 143 | int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, | ||
| 144 | sector_t nr_sects, gfp_t gfp_mask, | ||
| 145 | struct page *page) | ||
| 146 | { | ||
| 147 | DECLARE_COMPLETION_ONSTACK(wait); | ||
| 148 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 149 | unsigned int max_write_same_sectors; | ||
| 150 | struct bio_batch bb; | ||
| 151 | struct bio *bio; | ||
| 152 | int ret = 0; | ||
| 153 | |||
| 154 | if (!q) | ||
| 155 | return -ENXIO; | ||
| 156 | |||
| 157 | max_write_same_sectors = q->limits.max_write_same_sectors; | ||
| 158 | |||
| 159 | if (max_write_same_sectors == 0) | ||
| 160 | return -EOPNOTSUPP; | ||
| 161 | |||
| 162 | atomic_set(&bb.done, 1); | ||
| 163 | bb.flags = 1 << BIO_UPTODATE; | ||
| 164 | bb.wait = &wait; | ||
| 165 | |||
| 166 | while (nr_sects) { | ||
| 167 | bio = bio_alloc(gfp_mask, 1); | ||
| 168 | if (!bio) { | ||
| 169 | ret = -ENOMEM; | ||
| 170 | break; | ||
| 171 | } | ||
| 172 | |||
| 173 | bio->bi_sector = sector; | ||
| 174 | bio->bi_end_io = bio_batch_end_io; | ||
| 175 | bio->bi_bdev = bdev; | ||
| 176 | bio->bi_private = &bb; | ||
| 177 | bio->bi_vcnt = 1; | ||
| 178 | bio->bi_io_vec->bv_page = page; | ||
| 179 | bio->bi_io_vec->bv_offset = 0; | ||
| 180 | bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); | ||
| 181 | |||
| 182 | if (nr_sects > max_write_same_sectors) { | ||
| 183 | bio->bi_size = max_write_same_sectors << 9; | ||
| 184 | nr_sects -= max_write_same_sectors; | ||
| 185 | sector += max_write_same_sectors; | ||
| 186 | } else { | ||
| 187 | bio->bi_size = nr_sects << 9; | ||
| 188 | nr_sects = 0; | ||
| 189 | } | ||
| 190 | |||
| 191 | atomic_inc(&bb.done); | ||
| 192 | submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); | ||
| 193 | } | ||
| 194 | |||
| 195 | /* Wait for bios in-flight */ | ||
| 196 | if (!atomic_dec_and_test(&bb.done)) | ||
| 197 | wait_for_completion(&wait); | ||
| 198 | |||
| 199 | if (!test_bit(BIO_UPTODATE, &bb.flags)) | ||
| 200 | ret = -ENOTSUPP; | ||
| 201 | |||
| 202 | return ret; | ||
| 203 | } | ||
| 204 | EXPORT_SYMBOL(blkdev_issue_write_same); | ||
| 205 | |||
| 206 | /** | ||
| 133 | * blkdev_issue_zeroout - generate number of zero filed write bios | 207 | * blkdev_issue_zeroout - generate number of zero filed write bios |
| 134 | * @bdev: blockdev to issue | 208 | * @bdev: blockdev to issue |
| 135 | * @sector: start sector | 209 | * @sector: start sector |
| @@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard); | |||
| 140 | * Generate and issue number of bios with zerofiled pages. | 214 | * Generate and issue number of bios with zerofiled pages. |
| 141 | */ | 215 | */ |
| 142 | 216 | ||
| 143 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 217 | int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
| 144 | sector_t nr_sects, gfp_t gfp_mask) | 218 | sector_t nr_sects, gfp_t gfp_mask) |
| 145 | { | 219 | { |
| 146 | int ret; | 220 | int ret; |
| @@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | |||
| 190 | 264 | ||
| 191 | return ret; | 265 | return ret; |
| 192 | } | 266 | } |
| 267 | |||
| 268 | /** | ||
| 269 | * blkdev_issue_zeroout - zero-fill a block range | ||
| 270 | * @bdev: blockdev to write | ||
| 271 | * @sector: start sector | ||
| 272 | * @nr_sects: number of sectors to write | ||
| 273 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
| 274 | * | ||
| 275 | * Description: | ||
| 276 | * Generate and issue number of bios with zerofiled pages. | ||
| 277 | */ | ||
| 278 | |||
| 279 | int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | ||
| 280 | sector_t nr_sects, gfp_t gfp_mask) | ||
| 281 | { | ||
| 282 | if (bdev_write_same(bdev)) { | ||
| 283 | unsigned char bdn[BDEVNAME_SIZE]; | ||
| 284 | |||
| 285 | if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, | ||
| 286 | ZERO_PAGE(0))) | ||
| 287 | return 0; | ||
| 288 | |||
| 289 | bdevname(bdev, bdn); | ||
| 290 | pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn); | ||
| 291 | } | ||
| 292 | |||
| 293 | return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask); | ||
| 294 | } | ||
| 193 | EXPORT_SYMBOL(blkdev_issue_zeroout); | 295 | EXPORT_SYMBOL(blkdev_issue_zeroout); |
diff --git a/block/blk-merge.c b/block/blk-merge.c index e76279e41162..936a110de0b9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
| @@ -275,14 +275,8 @@ no_merge: | |||
| 275 | int ll_back_merge_fn(struct request_queue *q, struct request *req, | 275 | int ll_back_merge_fn(struct request_queue *q, struct request *req, |
| 276 | struct bio *bio) | 276 | struct bio *bio) |
| 277 | { | 277 | { |
| 278 | unsigned short max_sectors; | 278 | if (blk_rq_sectors(req) + bio_sectors(bio) > |
| 279 | 279 | blk_rq_get_max_sectors(req)) { | |
| 280 | if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) | ||
| 281 | max_sectors = queue_max_hw_sectors(q); | ||
| 282 | else | ||
| 283 | max_sectors = queue_max_sectors(q); | ||
| 284 | |||
| 285 | if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { | ||
| 286 | req->cmd_flags |= REQ_NOMERGE; | 280 | req->cmd_flags |= REQ_NOMERGE; |
| 287 | if (req == q->last_merge) | 281 | if (req == q->last_merge) |
| 288 | q->last_merge = NULL; | 282 | q->last_merge = NULL; |
| @@ -299,15 +293,8 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, | |||
| 299 | int ll_front_merge_fn(struct request_queue *q, struct request *req, | 293 | int ll_front_merge_fn(struct request_queue *q, struct request *req, |
| 300 | struct bio *bio) | 294 | struct bio *bio) |
| 301 | { | 295 | { |
| 302 | unsigned short max_sectors; | 296 | if (blk_rq_sectors(req) + bio_sectors(bio) > |
| 303 | 297 | blk_rq_get_max_sectors(req)) { | |
| 304 | if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) | ||
| 305 | max_sectors = queue_max_hw_sectors(q); | ||
| 306 | else | ||
| 307 | max_sectors = queue_max_sectors(q); | ||
| 308 | |||
| 309 | |||
| 310 | if (blk_rq_sectors(req) + bio_sectors(bio) > max_sectors) { | ||
| 311 | req->cmd_flags |= REQ_NOMERGE; | 298 | req->cmd_flags |= REQ_NOMERGE; |
| 312 | if (req == q->last_merge) | 299 | if (req == q->last_merge) |
| 313 | q->last_merge = NULL; | 300 | q->last_merge = NULL; |
| @@ -338,7 +325,8 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, | |||
| 338 | /* | 325 | /* |
| 339 | * Will it become too large? | 326 | * Will it become too large? |
| 340 | */ | 327 | */ |
| 341 | if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > queue_max_sectors(q)) | 328 | if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > |
| 329 | blk_rq_get_max_sectors(req)) | ||
| 342 | return 0; | 330 | return 0; |
| 343 | 331 | ||
| 344 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; | 332 | total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; |
| @@ -417,16 +405,7 @@ static int attempt_merge(struct request_queue *q, struct request *req, | |||
| 417 | if (!rq_mergeable(req) || !rq_mergeable(next)) | 405 | if (!rq_mergeable(req) || !rq_mergeable(next)) |
| 418 | return 0; | 406 | return 0; |
| 419 | 407 | ||
| 420 | /* | 408 | if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) |
| 421 | * Don't merge file system requests and discard requests | ||
| 422 | */ | ||
| 423 | if ((req->cmd_flags & REQ_DISCARD) != (next->cmd_flags & REQ_DISCARD)) | ||
| 424 | return 0; | ||
| 425 | |||
| 426 | /* | ||
| 427 | * Don't merge discard requests and secure discard requests | ||
| 428 | */ | ||
| 429 | if ((req->cmd_flags & REQ_SECURE) != (next->cmd_flags & REQ_SECURE)) | ||
| 430 | return 0; | 409 | return 0; |
| 431 | 410 | ||
| 432 | /* | 411 | /* |
| @@ -440,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, | |||
| 440 | || next->special) | 419 | || next->special) |
| 441 | return 0; | 420 | return 0; |
| 442 | 421 | ||
| 422 | if (req->cmd_flags & REQ_WRITE_SAME && | ||
| 423 | !blk_write_same_mergeable(req->bio, next->bio)) | ||
| 424 | return 0; | ||
| 425 | |||
| 443 | /* | 426 | /* |
| 444 | * If we are allowed to merge, then append bio list | 427 | * If we are allowed to merge, then append bio list |
| 445 | * from next to rq and release next. merge_requests_fn | 428 | * from next to rq and release next. merge_requests_fn |
| @@ -521,15 +504,10 @@ int blk_attempt_req_merge(struct request_queue *q, struct request *rq, | |||
| 521 | 504 | ||
| 522 | bool blk_rq_merge_ok(struct request *rq, struct bio *bio) | 505 | bool blk_rq_merge_ok(struct request *rq, struct bio *bio) |
| 523 | { | 506 | { |
| 524 | if (!rq_mergeable(rq)) | 507 | if (!rq_mergeable(rq) || !bio_mergeable(bio)) |
| 525 | return false; | 508 | return false; |
| 526 | 509 | ||
| 527 | /* don't merge file system requests and discard requests */ | 510 | if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) |
| 528 | if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD)) | ||
| 529 | return false; | ||
| 530 | |||
| 531 | /* don't merge discard requests and secure discard requests */ | ||
| 532 | if ((bio->bi_rw & REQ_SECURE) != (rq->bio->bi_rw & REQ_SECURE)) | ||
| 533 | return false; | 511 | return false; |
| 534 | 512 | ||
| 535 | /* different data direction or already started, don't merge */ | 513 | /* different data direction or already started, don't merge */ |
| @@ -544,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) | |||
| 544 | if (bio_integrity(bio) != blk_integrity_rq(rq)) | 522 | if (bio_integrity(bio) != blk_integrity_rq(rq)) |
| 545 | return false; | 523 | return false; |
| 546 | 524 | ||
| 525 | /* must be using the same buffer */ | ||
| 526 | if (rq->cmd_flags & REQ_WRITE_SAME && | ||
| 527 | !blk_write_same_mergeable(rq->bio, bio)) | ||
| 528 | return false; | ||
| 529 | |||
| 547 | return true; | 530 | return true; |
| 548 | } | 531 | } |
| 549 | 532 | ||
diff --git a/block/blk-settings.c b/block/blk-settings.c index 565a6786032f..779bb7646bcd 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
| @@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim) | |||
| 113 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; | 113 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; |
| 114 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; | 114 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; |
| 115 | lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; | 115 | lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; |
| 116 | lim->max_write_same_sectors = 0; | ||
| 116 | lim->max_discard_sectors = 0; | 117 | lim->max_discard_sectors = 0; |
| 117 | lim->discard_granularity = 0; | 118 | lim->discard_granularity = 0; |
| 118 | lim->discard_alignment = 0; | 119 | lim->discard_alignment = 0; |
| @@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) | |||
| 144 | lim->max_segments = USHRT_MAX; | 145 | lim->max_segments = USHRT_MAX; |
| 145 | lim->max_hw_sectors = UINT_MAX; | 146 | lim->max_hw_sectors = UINT_MAX; |
| 146 | lim->max_sectors = UINT_MAX; | 147 | lim->max_sectors = UINT_MAX; |
| 148 | lim->max_write_same_sectors = UINT_MAX; | ||
| 147 | } | 149 | } |
| 148 | EXPORT_SYMBOL(blk_set_stacking_limits); | 150 | EXPORT_SYMBOL(blk_set_stacking_limits); |
| 149 | 151 | ||
| @@ -286,6 +288,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, | |||
| 286 | EXPORT_SYMBOL(blk_queue_max_discard_sectors); | 288 | EXPORT_SYMBOL(blk_queue_max_discard_sectors); |
| 287 | 289 | ||
| 288 | /** | 290 | /** |
| 291 | * blk_queue_max_write_same_sectors - set max sectors for a single write same | ||
| 292 | * @q: the request queue for the device | ||
| 293 | * @max_write_same_sectors: maximum number of sectors to write per command | ||
| 294 | **/ | ||
| 295 | void blk_queue_max_write_same_sectors(struct request_queue *q, | ||
| 296 | unsigned int max_write_same_sectors) | ||
| 297 | { | ||
| 298 | q->limits.max_write_same_sectors = max_write_same_sectors; | ||
| 299 | } | ||
| 300 | EXPORT_SYMBOL(blk_queue_max_write_same_sectors); | ||
| 301 | |||
| 302 | /** | ||
| 289 | * blk_queue_max_segments - set max hw segments for a request for this queue | 303 | * blk_queue_max_segments - set max hw segments for a request for this queue |
| 290 | * @q: the request queue for the device | 304 | * @q: the request queue for the device |
| 291 | * @max_segments: max number of segments | 305 | * @max_segments: max number of segments |
| @@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, | |||
| 510 | 524 | ||
| 511 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); | 525 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); |
| 512 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); | 526 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); |
| 527 | t->max_write_same_sectors = min(t->max_write_same_sectors, | ||
| 528 | b->max_write_same_sectors); | ||
| 513 | t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); | 529 | t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); |
| 514 | 530 | ||
| 515 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, | 531 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 9628b291f960..ce6204608822 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
| @@ -26,9 +26,15 @@ queue_var_show(unsigned long var, char *page) | |||
| 26 | static ssize_t | 26 | static ssize_t |
| 27 | queue_var_store(unsigned long *var, const char *page, size_t count) | 27 | queue_var_store(unsigned long *var, const char *page, size_t count) |
| 28 | { | 28 | { |
| 29 | char *p = (char *) page; | 29 | int err; |
| 30 | unsigned long v; | ||
| 31 | |||
| 32 | err = strict_strtoul(page, 10, &v); | ||
| 33 | if (err || v > UINT_MAX) | ||
| 34 | return -EINVAL; | ||
| 35 | |||
| 36 | *var = v; | ||
| 30 | 37 | ||
| 31 | *var = simple_strtoul(p, &p, 10); | ||
| 32 | return count; | 38 | return count; |
| 33 | } | 39 | } |
| 34 | 40 | ||
| @@ -48,6 +54,9 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) | |||
| 48 | return -EINVAL; | 54 | return -EINVAL; |
| 49 | 55 | ||
| 50 | ret = queue_var_store(&nr, page, count); | 56 | ret = queue_var_store(&nr, page, count); |
| 57 | if (ret < 0) | ||
| 58 | return ret; | ||
| 59 | |||
| 51 | if (nr < BLKDEV_MIN_RQ) | 60 | if (nr < BLKDEV_MIN_RQ) |
| 52 | nr = BLKDEV_MIN_RQ; | 61 | nr = BLKDEV_MIN_RQ; |
| 53 | 62 | ||
| @@ -102,6 +111,9 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) | |||
| 102 | unsigned long ra_kb; | 111 | unsigned long ra_kb; |
| 103 | ssize_t ret = queue_var_store(&ra_kb, page, count); | 112 | ssize_t ret = queue_var_store(&ra_kb, page, count); |
| 104 | 113 | ||
| 114 | if (ret < 0) | ||
| 115 | return ret; | ||
| 116 | |||
| 105 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); | 117 | q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); |
| 106 | 118 | ||
| 107 | return ret; | 119 | return ret; |
| @@ -168,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag | |||
| 168 | return queue_var_show(queue_discard_zeroes_data(q), page); | 180 | return queue_var_show(queue_discard_zeroes_data(q), page); |
| 169 | } | 181 | } |
| 170 | 182 | ||
| 183 | static ssize_t queue_write_same_max_show(struct request_queue *q, char *page) | ||
| 184 | { | ||
| 185 | return sprintf(page, "%llu\n", | ||
| 186 | (unsigned long long)q->limits.max_write_same_sectors << 9); | ||
| 187 | } | ||
| 188 | |||
| 189 | |||
| 171 | static ssize_t | 190 | static ssize_t |
| 172 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | 191 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) |
| 173 | { | 192 | { |
| @@ -176,6 +195,9 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | |||
| 176 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); | 195 | page_kb = 1 << (PAGE_CACHE_SHIFT - 10); |
| 177 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); | 196 | ssize_t ret = queue_var_store(&max_sectors_kb, page, count); |
| 178 | 197 | ||
| 198 | if (ret < 0) | ||
| 199 | return ret; | ||
| 200 | |||
| 179 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) | 201 | if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) |
| 180 | return -EINVAL; | 202 | return -EINVAL; |
| 181 | 203 | ||
| @@ -236,6 +258,9 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page, | |||
| 236 | unsigned long nm; | 258 | unsigned long nm; |
| 237 | ssize_t ret = queue_var_store(&nm, page, count); | 259 | ssize_t ret = queue_var_store(&nm, page, count); |
| 238 | 260 | ||
| 261 | if (ret < 0) | ||
| 262 | return ret; | ||
| 263 | |||
| 239 | spin_lock_irq(q->queue_lock); | 264 | spin_lock_irq(q->queue_lock); |
| 240 | queue_flag_clear(QUEUE_FLAG_NOMERGES, q); | 265 | queue_flag_clear(QUEUE_FLAG_NOMERGES, q); |
| 241 | queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); | 266 | queue_flag_clear(QUEUE_FLAG_NOXMERGES, q); |
| @@ -264,6 +289,9 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) | |||
| 264 | unsigned long val; | 289 | unsigned long val; |
| 265 | 290 | ||
| 266 | ret = queue_var_store(&val, page, count); | 291 | ret = queue_var_store(&val, page, count); |
| 292 | if (ret < 0) | ||
| 293 | return ret; | ||
| 294 | |||
| 267 | spin_lock_irq(q->queue_lock); | 295 | spin_lock_irq(q->queue_lock); |
| 268 | if (val == 2) { | 296 | if (val == 2) { |
| 269 | queue_flag_set(QUEUE_FLAG_SAME_COMP, q); | 297 | queue_flag_set(QUEUE_FLAG_SAME_COMP, q); |
| @@ -364,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { | |||
| 364 | .show = queue_discard_zeroes_data_show, | 392 | .show = queue_discard_zeroes_data_show, |
| 365 | }; | 393 | }; |
| 366 | 394 | ||
| 395 | static struct queue_sysfs_entry queue_write_same_max_entry = { | ||
| 396 | .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO }, | ||
| 397 | .show = queue_write_same_max_show, | ||
| 398 | }; | ||
| 399 | |||
| 367 | static struct queue_sysfs_entry queue_nonrot_entry = { | 400 | static struct queue_sysfs_entry queue_nonrot_entry = { |
| 368 | .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, | 401 | .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, |
| 369 | .show = queue_show_nonrot, | 402 | .show = queue_show_nonrot, |
| @@ -411,6 +444,7 @@ static struct attribute *default_attrs[] = { | |||
| 411 | &queue_discard_granularity_entry.attr, | 444 | &queue_discard_granularity_entry.attr, |
| 412 | &queue_discard_max_entry.attr, | 445 | &queue_discard_max_entry.attr, |
| 413 | &queue_discard_zeroes_data_entry.attr, | 446 | &queue_discard_zeroes_data_entry.attr, |
| 447 | &queue_write_same_max_entry.attr, | ||
| 414 | &queue_nonrot_entry.attr, | 448 | &queue_nonrot_entry.attr, |
| 415 | &queue_nomerges_entry.attr, | 449 | &queue_nomerges_entry.attr, |
| 416 | &queue_rq_affinity_entry.attr, | 450 | &queue_rq_affinity_entry.attr, |
| @@ -527,6 +561,12 @@ int blk_register_queue(struct gendisk *disk) | |||
| 527 | if (WARN_ON(!q)) | 561 | if (WARN_ON(!q)) |
| 528 | return -ENXIO; | 562 | return -ENXIO; |
| 529 | 563 | ||
| 564 | /* | ||
| 565 | * Initialization must be complete by now. Finish the initial | ||
| 566 | * bypass from queue allocation. | ||
| 567 | */ | ||
| 568 | blk_queue_bypass_end(q); | ||
| 569 | |||
| 530 | ret = blk_trace_init_sysfs(dev); | 570 | ret = blk_trace_init_sysfs(dev); |
| 531 | if (ret) | 571 | if (ret) |
| 532 | return ret; | 572 | return ret; |
diff --git a/block/blk-tag.c b/block/blk-tag.c index 4af6f5cc1167..cc345e1d8d4e 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c | |||
| @@ -186,7 +186,8 @@ int blk_queue_init_tags(struct request_queue *q, int depth, | |||
| 186 | tags = __blk_queue_init_tags(q, depth); | 186 | tags = __blk_queue_init_tags(q, depth); |
| 187 | 187 | ||
| 188 | if (!tags) | 188 | if (!tags) |
| 189 | goto fail; | 189 | return -ENOMEM; |
| 190 | |||
| 190 | } else if (q->queue_tags) { | 191 | } else if (q->queue_tags) { |
| 191 | rc = blk_queue_resize_tags(q, depth); | 192 | rc = blk_queue_resize_tags(q, depth); |
| 192 | if (rc) | 193 | if (rc) |
| @@ -203,9 +204,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth, | |||
| 203 | queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); | 204 | queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); |
| 204 | INIT_LIST_HEAD(&q->tag_busy_list); | 205 | INIT_LIST_HEAD(&q->tag_busy_list); |
| 205 | return 0; | 206 | return 0; |
| 206 | fail: | ||
| 207 | kfree(tags); | ||
| 208 | return -ENOMEM; | ||
| 209 | } | 207 | } |
| 210 | EXPORT_SYMBOL(blk_queue_init_tags); | 208 | EXPORT_SYMBOL(blk_queue_init_tags); |
| 211 | 209 | ||
diff --git a/block/blk.h b/block/blk.h index 2a0ea32d249f..ca51543b248c 100644 --- a/block/blk.h +++ b/block/blk.h | |||
| @@ -171,14 +171,13 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) | |||
| 171 | * | 171 | * |
| 172 | * a) it's attached to a gendisk, and | 172 | * a) it's attached to a gendisk, and |
| 173 | * b) the queue had IO stats enabled when this request was started, and | 173 | * b) the queue had IO stats enabled when this request was started, and |
| 174 | * c) it's a file system request or a discard request | 174 | * c) it's a file system request |
| 175 | */ | 175 | */ |
| 176 | static inline int blk_do_io_stat(struct request *rq) | 176 | static inline int blk_do_io_stat(struct request *rq) |
| 177 | { | 177 | { |
| 178 | return rq->rq_disk && | 178 | return rq->rq_disk && |
| 179 | (rq->cmd_flags & REQ_IO_STAT) && | 179 | (rq->cmd_flags & REQ_IO_STAT) && |
| 180 | (rq->cmd_type == REQ_TYPE_FS || | 180 | (rq->cmd_type == REQ_TYPE_FS); |
| 181 | (rq->cmd_flags & REQ_DISCARD)); | ||
| 182 | } | 181 | } |
| 183 | 182 | ||
| 184 | /* | 183 | /* |
diff --git a/block/elevator.c b/block/elevator.c index 6a55d418896f..9b1d42b62f20 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
| @@ -562,8 +562,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) | |||
| 562 | 562 | ||
| 563 | if (rq->cmd_flags & REQ_SOFTBARRIER) { | 563 | if (rq->cmd_flags & REQ_SOFTBARRIER) { |
| 564 | /* barriers are scheduling boundary, update end_sector */ | 564 | /* barriers are scheduling boundary, update end_sector */ |
| 565 | if (rq->cmd_type == REQ_TYPE_FS || | 565 | if (rq->cmd_type == REQ_TYPE_FS) { |
| 566 | (rq->cmd_flags & REQ_DISCARD)) { | ||
| 567 | q->end_sector = rq_end_sector(rq); | 566 | q->end_sector = rq_end_sector(rq); |
| 568 | q->boundary_rq = rq; | 567 | q->boundary_rq = rq; |
| 569 | } | 568 | } |
| @@ -605,8 +604,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where) | |||
| 605 | if (elv_attempt_insert_merge(q, rq)) | 604 | if (elv_attempt_insert_merge(q, rq)) |
| 606 | break; | 605 | break; |
| 607 | case ELEVATOR_INSERT_SORT: | 606 | case ELEVATOR_INSERT_SORT: |
| 608 | BUG_ON(rq->cmd_type != REQ_TYPE_FS && | 607 | BUG_ON(rq->cmd_type != REQ_TYPE_FS); |
| 609 | !(rq->cmd_flags & REQ_DISCARD)); | ||
| 610 | rq->cmd_flags |= REQ_SORTED; | 608 | rq->cmd_flags |= REQ_SORTED; |
| 611 | q->nr_sorted++; | 609 | q->nr_sorted++; |
| 612 | if (rq_mergeable(rq)) { | 610 | if (rq_mergeable(rq)) { |
diff --git a/block/ioctl.c b/block/ioctl.c index 4a85096f5410..a31d91d9bc5a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c | |||
| @@ -185,6 +185,22 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, | |||
| 185 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); | 185 | return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); |
| 186 | } | 186 | } |
| 187 | 187 | ||
| 188 | static int blk_ioctl_zeroout(struct block_device *bdev, uint64_t start, | ||
| 189 | uint64_t len) | ||
| 190 | { | ||
| 191 | if (start & 511) | ||
| 192 | return -EINVAL; | ||
| 193 | if (len & 511) | ||
| 194 | return -EINVAL; | ||
| 195 | start >>= 9; | ||
| 196 | len >>= 9; | ||
| 197 | |||
| 198 | if (start + len > (i_size_read(bdev->bd_inode) >> 9)) | ||
| 199 | return -EINVAL; | ||
| 200 | |||
| 201 | return blkdev_issue_zeroout(bdev, start, len, GFP_KERNEL); | ||
| 202 | } | ||
| 203 | |||
| 188 | static int put_ushort(unsigned long arg, unsigned short val) | 204 | static int put_ushort(unsigned long arg, unsigned short val) |
| 189 | { | 205 | { |
| 190 | return put_user(val, (unsigned short __user *)arg); | 206 | return put_user(val, (unsigned short __user *)arg); |
| @@ -300,6 +316,17 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, | |||
| 300 | return blk_ioctl_discard(bdev, range[0], range[1], | 316 | return blk_ioctl_discard(bdev, range[0], range[1], |
| 301 | cmd == BLKSECDISCARD); | 317 | cmd == BLKSECDISCARD); |
| 302 | } | 318 | } |
| 319 | case BLKZEROOUT: { | ||
| 320 | uint64_t range[2]; | ||
| 321 | |||
| 322 | if (!(mode & FMODE_WRITE)) | ||
| 323 | return -EBADF; | ||
| 324 | |||
| 325 | if (copy_from_user(range, (void __user *)arg, sizeof(range))) | ||
| 326 | return -EFAULT; | ||
| 327 | |||
| 328 | return blk_ioctl_zeroout(bdev, range[0], range[1]); | ||
| 329 | } | ||
| 303 | 330 | ||
| 304 | case HDIO_GETGEO: { | 331 | case HDIO_GETGEO: { |
| 305 | struct hd_geometry geo; | 332 | struct hd_geometry geo; |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index f93a0320e952..f55683ad4ffa 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
| @@ -162,23 +162,12 @@ static const struct block_device_operations drbd_ops = { | |||
| 162 | .release = drbd_release, | 162 | .release = drbd_release, |
| 163 | }; | 163 | }; |
| 164 | 164 | ||
| 165 | static void bio_destructor_drbd(struct bio *bio) | ||
| 166 | { | ||
| 167 | bio_free(bio, drbd_md_io_bio_set); | ||
| 168 | } | ||
| 169 | |||
| 170 | struct bio *bio_alloc_drbd(gfp_t gfp_mask) | 165 | struct bio *bio_alloc_drbd(gfp_t gfp_mask) |
| 171 | { | 166 | { |
| 172 | struct bio *bio; | ||
| 173 | |||
| 174 | if (!drbd_md_io_bio_set) | 167 | if (!drbd_md_io_bio_set) |
| 175 | return bio_alloc(gfp_mask, 1); | 168 | return bio_alloc(gfp_mask, 1); |
| 176 | 169 | ||
| 177 | bio = bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); | 170 | return bio_alloc_bioset(gfp_mask, 1, drbd_md_io_bio_set); |
| 178 | if (!bio) | ||
| 179 | return NULL; | ||
| 180 | bio->bi_destructor = bio_destructor_drbd; | ||
| 181 | return bio; | ||
| 182 | } | 171 | } |
| 183 | 172 | ||
| 184 | #ifdef __CHECKER__ | 173 | #ifdef __CHECKER__ |
diff --git a/drivers/block/osdblk.c b/drivers/block/osdblk.c index 87311ebac0db..1bbc681688e4 100644 --- a/drivers/block/osdblk.c +++ b/drivers/block/osdblk.c | |||
| @@ -266,11 +266,10 @@ static struct bio *bio_chain_clone(struct bio *old_chain, gfp_t gfpmask) | |||
| 266 | struct bio *tmp, *new_chain = NULL, *tail = NULL; | 266 | struct bio *tmp, *new_chain = NULL, *tail = NULL; |
| 267 | 267 | ||
| 268 | while (old_chain) { | 268 | while (old_chain) { |
| 269 | tmp = bio_kmalloc(gfpmask, old_chain->bi_max_vecs); | 269 | tmp = bio_clone_kmalloc(old_chain, gfpmask); |
| 270 | if (!tmp) | 270 | if (!tmp) |
| 271 | goto err_out; | 271 | goto err_out; |
| 272 | 272 | ||
| 273 | __bio_clone(tmp, old_chain); | ||
| 274 | tmp->bi_bdev = NULL; | 273 | tmp->bi_bdev = NULL; |
| 275 | gfpmask &= ~__GFP_WAIT; | 274 | gfpmask &= ~__GFP_WAIT; |
| 276 | tmp->bi_next = NULL; | 275 | tmp->bi_next = NULL; |
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c index ba66e4445f41..2e7de7a59bfc 100644 --- a/drivers/block/pktcdvd.c +++ b/drivers/block/pktcdvd.c | |||
| @@ -522,38 +522,6 @@ static void pkt_bio_finished(struct pktcdvd_device *pd) | |||
| 522 | } | 522 | } |
| 523 | } | 523 | } |
| 524 | 524 | ||
| 525 | static void pkt_bio_destructor(struct bio *bio) | ||
| 526 | { | ||
| 527 | kfree(bio->bi_io_vec); | ||
| 528 | kfree(bio); | ||
| 529 | } | ||
| 530 | |||
| 531 | static struct bio *pkt_bio_alloc(int nr_iovecs) | ||
| 532 | { | ||
| 533 | struct bio_vec *bvl = NULL; | ||
| 534 | struct bio *bio; | ||
| 535 | |||
| 536 | bio = kmalloc(sizeof(struct bio), GFP_KERNEL); | ||
| 537 | if (!bio) | ||
| 538 | goto no_bio; | ||
| 539 | bio_init(bio); | ||
| 540 | |||
| 541 | bvl = kcalloc(nr_iovecs, sizeof(struct bio_vec), GFP_KERNEL); | ||
| 542 | if (!bvl) | ||
| 543 | goto no_bvl; | ||
| 544 | |||
| 545 | bio->bi_max_vecs = nr_iovecs; | ||
| 546 | bio->bi_io_vec = bvl; | ||
| 547 | bio->bi_destructor = pkt_bio_destructor; | ||
| 548 | |||
| 549 | return bio; | ||
| 550 | |||
| 551 | no_bvl: | ||
| 552 | kfree(bio); | ||
| 553 | no_bio: | ||
| 554 | return NULL; | ||
| 555 | } | ||
| 556 | |||
| 557 | /* | 525 | /* |
| 558 | * Allocate a packet_data struct | 526 | * Allocate a packet_data struct |
| 559 | */ | 527 | */ |
| @@ -567,7 +535,7 @@ static struct packet_data *pkt_alloc_packet_data(int frames) | |||
| 567 | goto no_pkt; | 535 | goto no_pkt; |
| 568 | 536 | ||
| 569 | pkt->frames = frames; | 537 | pkt->frames = frames; |
| 570 | pkt->w_bio = pkt_bio_alloc(frames); | 538 | pkt->w_bio = bio_kmalloc(GFP_KERNEL, frames); |
| 571 | if (!pkt->w_bio) | 539 | if (!pkt->w_bio) |
| 572 | goto no_bio; | 540 | goto no_bio; |
| 573 | 541 | ||
| @@ -581,9 +549,10 @@ static struct packet_data *pkt_alloc_packet_data(int frames) | |||
| 581 | bio_list_init(&pkt->orig_bios); | 549 | bio_list_init(&pkt->orig_bios); |
| 582 | 550 | ||
| 583 | for (i = 0; i < frames; i++) { | 551 | for (i = 0; i < frames; i++) { |
| 584 | struct bio *bio = pkt_bio_alloc(1); | 552 | struct bio *bio = bio_kmalloc(GFP_KERNEL, 1); |
| 585 | if (!bio) | 553 | if (!bio) |
| 586 | goto no_rd_bio; | 554 | goto no_rd_bio; |
| 555 | |||
| 587 | pkt->r_bios[i] = bio; | 556 | pkt->r_bios[i] = bio; |
| 588 | } | 557 | } |
| 589 | 558 | ||
| @@ -1111,21 +1080,17 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
| 1111 | * Schedule reads for missing parts of the packet. | 1080 | * Schedule reads for missing parts of the packet. |
| 1112 | */ | 1081 | */ |
| 1113 | for (f = 0; f < pkt->frames; f++) { | 1082 | for (f = 0; f < pkt->frames; f++) { |
| 1114 | struct bio_vec *vec; | ||
| 1115 | |||
| 1116 | int p, offset; | 1083 | int p, offset; |
| 1084 | |||
| 1117 | if (written[f]) | 1085 | if (written[f]) |
| 1118 | continue; | 1086 | continue; |
| 1087 | |||
| 1119 | bio = pkt->r_bios[f]; | 1088 | bio = pkt->r_bios[f]; |
| 1120 | vec = bio->bi_io_vec; | 1089 | bio_reset(bio); |
| 1121 | bio_init(bio); | ||
| 1122 | bio->bi_max_vecs = 1; | ||
| 1123 | bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); | 1090 | bio->bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); |
| 1124 | bio->bi_bdev = pd->bdev; | 1091 | bio->bi_bdev = pd->bdev; |
| 1125 | bio->bi_end_io = pkt_end_io_read; | 1092 | bio->bi_end_io = pkt_end_io_read; |
| 1126 | bio->bi_private = pkt; | 1093 | bio->bi_private = pkt; |
| 1127 | bio->bi_io_vec = vec; | ||
| 1128 | bio->bi_destructor = pkt_bio_destructor; | ||
| 1129 | 1094 | ||
| 1130 | p = (f * CD_FRAMESIZE) / PAGE_SIZE; | 1095 | p = (f * CD_FRAMESIZE) / PAGE_SIZE; |
| 1131 | offset = (f * CD_FRAMESIZE) % PAGE_SIZE; | 1096 | offset = (f * CD_FRAMESIZE) % PAGE_SIZE; |
| @@ -1418,14 +1383,11 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) | |||
| 1418 | } | 1383 | } |
| 1419 | 1384 | ||
| 1420 | /* Start the write request */ | 1385 | /* Start the write request */ |
| 1421 | bio_init(pkt->w_bio); | 1386 | bio_reset(pkt->w_bio); |
| 1422 | pkt->w_bio->bi_max_vecs = PACKET_MAX_SIZE; | ||
| 1423 | pkt->w_bio->bi_sector = pkt->sector; | 1387 | pkt->w_bio->bi_sector = pkt->sector; |
| 1424 | pkt->w_bio->bi_bdev = pd->bdev; | 1388 | pkt->w_bio->bi_bdev = pd->bdev; |
| 1425 | pkt->w_bio->bi_end_io = pkt_end_io_packet_write; | 1389 | pkt->w_bio->bi_end_io = pkt_end_io_packet_write; |
| 1426 | pkt->w_bio->bi_private = pkt; | 1390 | pkt->w_bio->bi_private = pkt; |
| 1427 | pkt->w_bio->bi_io_vec = bvec; | ||
| 1428 | pkt->w_bio->bi_destructor = pkt_bio_destructor; | ||
| 1429 | for (f = 0; f < pkt->frames; f++) | 1391 | for (f = 0; f < pkt->frames; f++) |
| 1430 | if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) | 1392 | if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset)) |
| 1431 | BUG(); | 1393 | BUG(); |
diff --git a/drivers/char/raw.c b/drivers/char/raw.c index 54a3a6d09819..0bb207eaef2f 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c | |||
| @@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd, | |||
| 285 | 285 | ||
| 286 | static const struct file_operations raw_fops = { | 286 | static const struct file_operations raw_fops = { |
| 287 | .read = do_sync_read, | 287 | .read = do_sync_read, |
| 288 | .aio_read = generic_file_aio_read, | 288 | .aio_read = blkdev_aio_read, |
| 289 | .write = do_sync_write, | 289 | .write = do_sync_write, |
| 290 | .aio_write = blkdev_aio_write, | 290 | .aio_write = blkdev_aio_write, |
| 291 | .fsync = blkdev_fsync, | 291 | .fsync = blkdev_fsync, |
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 664743d6a6cd..bbf459bca61d 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c | |||
| @@ -798,14 +798,6 @@ static int crypt_convert(struct crypt_config *cc, | |||
| 798 | return 0; | 798 | return 0; |
| 799 | } | 799 | } |
| 800 | 800 | ||
| 801 | static void dm_crypt_bio_destructor(struct bio *bio) | ||
| 802 | { | ||
| 803 | struct dm_crypt_io *io = bio->bi_private; | ||
| 804 | struct crypt_config *cc = io->cc; | ||
| 805 | |||
| 806 | bio_free(bio, cc->bs); | ||
| 807 | } | ||
| 808 | |||
| 809 | /* | 801 | /* |
| 810 | * Generate a new unfragmented bio with the given size | 802 | * Generate a new unfragmented bio with the given size |
| 811 | * This should never violate the device limitations | 803 | * This should never violate the device limitations |
| @@ -974,7 +966,6 @@ static void clone_init(struct dm_crypt_io *io, struct bio *clone) | |||
| 974 | clone->bi_end_io = crypt_endio; | 966 | clone->bi_end_io = crypt_endio; |
| 975 | clone->bi_bdev = cc->dev->bdev; | 967 | clone->bi_bdev = cc->dev->bdev; |
| 976 | clone->bi_rw = io->base_bio->bi_rw; | 968 | clone->bi_rw = io->base_bio->bi_rw; |
| 977 | clone->bi_destructor = dm_crypt_bio_destructor; | ||
| 978 | } | 969 | } |
| 979 | 970 | ||
| 980 | static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) | 971 | static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) |
| @@ -988,19 +979,14 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp) | |||
| 988 | * copy the required bvecs because we need the original | 979 | * copy the required bvecs because we need the original |
| 989 | * one in order to decrypt the whole bio data *afterwards*. | 980 | * one in order to decrypt the whole bio data *afterwards*. |
| 990 | */ | 981 | */ |
| 991 | clone = bio_alloc_bioset(gfp, bio_segments(base_bio), cc->bs); | 982 | clone = bio_clone_bioset(base_bio, gfp, cc->bs); |
| 992 | if (!clone) | 983 | if (!clone) |
| 993 | return 1; | 984 | return 1; |
| 994 | 985 | ||
| 995 | crypt_inc_pending(io); | 986 | crypt_inc_pending(io); |
| 996 | 987 | ||
| 997 | clone_init(io, clone); | 988 | clone_init(io, clone); |
| 998 | clone->bi_idx = 0; | ||
| 999 | clone->bi_vcnt = bio_segments(base_bio); | ||
| 1000 | clone->bi_size = base_bio->bi_size; | ||
| 1001 | clone->bi_sector = cc->start + io->sector; | 989 | clone->bi_sector = cc->start + io->sector; |
| 1002 | memcpy(clone->bi_io_vec, bio_iovec(base_bio), | ||
| 1003 | sizeof(struct bio_vec) * clone->bi_vcnt); | ||
| 1004 | 990 | ||
| 1005 | generic_make_request(clone); | 991 | generic_make_request(clone); |
| 1006 | return 0; | 992 | return 0; |
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index ea5dd289fe2a..1c46f97d6664 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c | |||
| @@ -249,16 +249,6 @@ static void vm_dp_init(struct dpages *dp, void *data) | |||
| 249 | dp->context_ptr = data; | 249 | dp->context_ptr = data; |
| 250 | } | 250 | } |
| 251 | 251 | ||
| 252 | static void dm_bio_destructor(struct bio *bio) | ||
| 253 | { | ||
| 254 | unsigned region; | ||
| 255 | struct io *io; | ||
| 256 | |||
| 257 | retrieve_io_and_region_from_bio(bio, &io, ®ion); | ||
| 258 | |||
| 259 | bio_free(bio, io->client->bios); | ||
| 260 | } | ||
| 261 | |||
| 262 | /* | 252 | /* |
| 263 | * Functions for getting the pages from kernel memory. | 253 | * Functions for getting the pages from kernel memory. |
| 264 | */ | 254 | */ |
| @@ -317,7 +307,6 @@ static void do_region(int rw, unsigned region, struct dm_io_region *where, | |||
| 317 | bio->bi_sector = where->sector + (where->count - remaining); | 307 | bio->bi_sector = where->sector + (where->count - remaining); |
| 318 | bio->bi_bdev = where->bdev; | 308 | bio->bi_bdev = where->bdev; |
| 319 | bio->bi_end_io = endio; | 309 | bio->bi_end_io = endio; |
| 320 | bio->bi_destructor = dm_bio_destructor; | ||
| 321 | store_io_and_region_in_bio(bio, io, region); | 310 | store_io_and_region_in_bio(bio, io, region); |
| 322 | 311 | ||
| 323 | if (rw & REQ_DISCARD) { | 312 | if (rw & REQ_DISCARD) { |
diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 67ffa391edcf..66ceaff6455c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c | |||
| @@ -86,12 +86,17 @@ struct dm_rq_target_io { | |||
| 86 | }; | 86 | }; |
| 87 | 87 | ||
| 88 | /* | 88 | /* |
| 89 | * For request-based dm. | 89 | * For request-based dm - the bio clones we allocate are embedded in these |
| 90 | * One of these is allocated per bio. | 90 | * structs. |
| 91 | * | ||
| 92 | * We allocate these with bio_alloc_bioset, using the front_pad parameter when | ||
| 93 | * the bioset is created - this means the bio has to come at the end of the | ||
| 94 | * struct. | ||
| 91 | */ | 95 | */ |
| 92 | struct dm_rq_clone_bio_info { | 96 | struct dm_rq_clone_bio_info { |
| 93 | struct bio *orig; | 97 | struct bio *orig; |
| 94 | struct dm_rq_target_io *tio; | 98 | struct dm_rq_target_io *tio; |
| 99 | struct bio clone; | ||
| 95 | }; | 100 | }; |
| 96 | 101 | ||
| 97 | union map_info *dm_get_mapinfo(struct bio *bio) | 102 | union map_info *dm_get_mapinfo(struct bio *bio) |
| @@ -211,6 +216,11 @@ struct dm_md_mempools { | |||
| 211 | static struct kmem_cache *_io_cache; | 216 | static struct kmem_cache *_io_cache; |
| 212 | static struct kmem_cache *_tio_cache; | 217 | static struct kmem_cache *_tio_cache; |
| 213 | static struct kmem_cache *_rq_tio_cache; | 218 | static struct kmem_cache *_rq_tio_cache; |
| 219 | |||
| 220 | /* | ||
| 221 | * Unused now, and needs to be deleted. But since io_pool is overloaded and it's | ||
| 222 | * still used for _io_cache, I'm leaving this for a later cleanup | ||
| 223 | */ | ||
| 214 | static struct kmem_cache *_rq_bio_info_cache; | 224 | static struct kmem_cache *_rq_bio_info_cache; |
| 215 | 225 | ||
| 216 | static int __init local_init(void) | 226 | static int __init local_init(void) |
| @@ -467,16 +477,6 @@ static void free_rq_tio(struct dm_rq_target_io *tio) | |||
| 467 | mempool_free(tio, tio->md->tio_pool); | 477 | mempool_free(tio, tio->md->tio_pool); |
| 468 | } | 478 | } |
| 469 | 479 | ||
| 470 | static struct dm_rq_clone_bio_info *alloc_bio_info(struct mapped_device *md) | ||
| 471 | { | ||
| 472 | return mempool_alloc(md->io_pool, GFP_ATOMIC); | ||
| 473 | } | ||
| 474 | |||
| 475 | static void free_bio_info(struct dm_rq_clone_bio_info *info) | ||
| 476 | { | ||
| 477 | mempool_free(info, info->tio->md->io_pool); | ||
| 478 | } | ||
| 479 | |||
| 480 | static int md_in_flight(struct mapped_device *md) | 480 | static int md_in_flight(struct mapped_device *md) |
| 481 | { | 481 | { |
| 482 | return atomic_read(&md->pending[READ]) + | 482 | return atomic_read(&md->pending[READ]) + |
| @@ -681,11 +681,6 @@ static void clone_endio(struct bio *bio, int error) | |||
| 681 | } | 681 | } |
| 682 | } | 682 | } |
| 683 | 683 | ||
| 684 | /* | ||
| 685 | * Store md for cleanup instead of tio which is about to get freed. | ||
| 686 | */ | ||
| 687 | bio->bi_private = md->bs; | ||
| 688 | |||
| 689 | free_tio(md, tio); | 684 | free_tio(md, tio); |
| 690 | bio_put(bio); | 685 | bio_put(bio); |
| 691 | dec_pending(io, error); | 686 | dec_pending(io, error); |
| @@ -1036,11 +1031,6 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, | |||
| 1036 | /* error the io and bail out, or requeue it if needed */ | 1031 | /* error the io and bail out, or requeue it if needed */ |
| 1037 | md = tio->io->md; | 1032 | md = tio->io->md; |
| 1038 | dec_pending(tio->io, r); | 1033 | dec_pending(tio->io, r); |
| 1039 | /* | ||
| 1040 | * Store bio_set for cleanup. | ||
| 1041 | */ | ||
| 1042 | clone->bi_end_io = NULL; | ||
| 1043 | clone->bi_private = md->bs; | ||
| 1044 | bio_put(clone); | 1034 | bio_put(clone); |
| 1045 | free_tio(md, tio); | 1035 | free_tio(md, tio); |
| 1046 | } else if (r) { | 1036 | } else if (r) { |
| @@ -1059,13 +1049,6 @@ struct clone_info { | |||
| 1059 | unsigned short idx; | 1049 | unsigned short idx; |
| 1060 | }; | 1050 | }; |
| 1061 | 1051 | ||
| 1062 | static void dm_bio_destructor(struct bio *bio) | ||
| 1063 | { | ||
| 1064 | struct bio_set *bs = bio->bi_private; | ||
| 1065 | |||
| 1066 | bio_free(bio, bs); | ||
| 1067 | } | ||
| 1068 | |||
| 1069 | /* | 1052 | /* |
| 1070 | * Creates a little bio that just does part of a bvec. | 1053 | * Creates a little bio that just does part of a bvec. |
| 1071 | */ | 1054 | */ |
| @@ -1077,7 +1060,6 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
| 1077 | struct bio_vec *bv = bio->bi_io_vec + idx; | 1060 | struct bio_vec *bv = bio->bi_io_vec + idx; |
| 1078 | 1061 | ||
| 1079 | clone = bio_alloc_bioset(GFP_NOIO, 1, bs); | 1062 | clone = bio_alloc_bioset(GFP_NOIO, 1, bs); |
| 1080 | clone->bi_destructor = dm_bio_destructor; | ||
| 1081 | *clone->bi_io_vec = *bv; | 1063 | *clone->bi_io_vec = *bv; |
| 1082 | 1064 | ||
| 1083 | clone->bi_sector = sector; | 1065 | clone->bi_sector = sector; |
| @@ -1090,7 +1072,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, | |||
| 1090 | clone->bi_flags |= 1 << BIO_CLONED; | 1072 | clone->bi_flags |= 1 << BIO_CLONED; |
| 1091 | 1073 | ||
| 1092 | if (bio_integrity(bio)) { | 1074 | if (bio_integrity(bio)) { |
| 1093 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); | 1075 | bio_integrity_clone(clone, bio, GFP_NOIO); |
| 1094 | bio_integrity_trim(clone, | 1076 | bio_integrity_trim(clone, |
| 1095 | bio_sector_offset(bio, idx, offset), len); | 1077 | bio_sector_offset(bio, idx, offset), len); |
| 1096 | } | 1078 | } |
| @@ -1109,7 +1091,6 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
| 1109 | 1091 | ||
| 1110 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); | 1092 | clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); |
| 1111 | __bio_clone(clone, bio); | 1093 | __bio_clone(clone, bio); |
| 1112 | clone->bi_destructor = dm_bio_destructor; | ||
| 1113 | clone->bi_sector = sector; | 1094 | clone->bi_sector = sector; |
| 1114 | clone->bi_idx = idx; | 1095 | clone->bi_idx = idx; |
| 1115 | clone->bi_vcnt = idx + bv_count; | 1096 | clone->bi_vcnt = idx + bv_count; |
| @@ -1117,7 +1098,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, | |||
| 1117 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); | 1098 | clone->bi_flags &= ~(1 << BIO_SEG_VALID); |
| 1118 | 1099 | ||
| 1119 | if (bio_integrity(bio)) { | 1100 | if (bio_integrity(bio)) { |
| 1120 | bio_integrity_clone(clone, bio, GFP_NOIO, bs); | 1101 | bio_integrity_clone(clone, bio, GFP_NOIO); |
| 1121 | 1102 | ||
| 1122 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) | 1103 | if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) |
| 1123 | bio_integrity_trim(clone, | 1104 | bio_integrity_trim(clone, |
| @@ -1152,9 +1133,8 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, | |||
| 1152 | * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush | 1133 | * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush |
| 1153 | * and discard, so no need for concern about wasted bvec allocations. | 1134 | * and discard, so no need for concern about wasted bvec allocations. |
| 1154 | */ | 1135 | */ |
| 1155 | clone = bio_alloc_bioset(GFP_NOIO, ci->bio->bi_max_vecs, ci->md->bs); | 1136 | clone = bio_clone_bioset(ci->bio, GFP_NOIO, ci->md->bs); |
| 1156 | __bio_clone(clone, ci->bio); | 1137 | |
| 1157 | clone->bi_destructor = dm_bio_destructor; | ||
| 1158 | if (len) { | 1138 | if (len) { |
| 1159 | clone->bi_sector = ci->sector; | 1139 | clone->bi_sector = ci->sector; |
| 1160 | clone->bi_size = to_bytes(len); | 1140 | clone->bi_size = to_bytes(len); |
| @@ -1484,30 +1464,17 @@ void dm_dispatch_request(struct request *rq) | |||
| 1484 | } | 1464 | } |
| 1485 | EXPORT_SYMBOL_GPL(dm_dispatch_request); | 1465 | EXPORT_SYMBOL_GPL(dm_dispatch_request); |
| 1486 | 1466 | ||
| 1487 | static void dm_rq_bio_destructor(struct bio *bio) | ||
| 1488 | { | ||
| 1489 | struct dm_rq_clone_bio_info *info = bio->bi_private; | ||
| 1490 | struct mapped_device *md = info->tio->md; | ||
| 1491 | |||
| 1492 | free_bio_info(info); | ||
| 1493 | bio_free(bio, md->bs); | ||
| 1494 | } | ||
| 1495 | |||
| 1496 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, | 1467 | static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, |
| 1497 | void *data) | 1468 | void *data) |
| 1498 | { | 1469 | { |
| 1499 | struct dm_rq_target_io *tio = data; | 1470 | struct dm_rq_target_io *tio = data; |
| 1500 | struct mapped_device *md = tio->md; | 1471 | struct dm_rq_clone_bio_info *info = |
| 1501 | struct dm_rq_clone_bio_info *info = alloc_bio_info(md); | 1472 | container_of(bio, struct dm_rq_clone_bio_info, clone); |
| 1502 | |||
| 1503 | if (!info) | ||
| 1504 | return -ENOMEM; | ||
| 1505 | 1473 | ||
| 1506 | info->orig = bio_orig; | 1474 | info->orig = bio_orig; |
| 1507 | info->tio = tio; | 1475 | info->tio = tio; |
| 1508 | bio->bi_end_io = end_clone_bio; | 1476 | bio->bi_end_io = end_clone_bio; |
| 1509 | bio->bi_private = info; | 1477 | bio->bi_private = info; |
| 1510 | bio->bi_destructor = dm_rq_bio_destructor; | ||
| 1511 | 1478 | ||
| 1512 | return 0; | 1479 | return 0; |
| 1513 | } | 1480 | } |
| @@ -2771,7 +2738,10 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity) | |||
| 2771 | if (!pools->tio_pool) | 2738 | if (!pools->tio_pool) |
| 2772 | goto free_io_pool_and_out; | 2739 | goto free_io_pool_and_out; |
| 2773 | 2740 | ||
| 2774 | pools->bs = bioset_create(pool_size, 0); | 2741 | pools->bs = (type == DM_TYPE_BIO_BASED) ? |
| 2742 | bioset_create(pool_size, 0) : | ||
| 2743 | bioset_create(pool_size, | ||
| 2744 | offsetof(struct dm_rq_clone_bio_info, clone)); | ||
| 2775 | if (!pools->bs) | 2745 | if (!pools->bs) |
| 2776 | goto free_tio_pool_and_out; | 2746 | goto free_tio_pool_and_out; |
| 2777 | 2747 | ||
diff --git a/drivers/md/md.c b/drivers/md/md.c index 308e87b417e0..95c88012a3b9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
| @@ -155,32 +155,17 @@ static int start_readonly; | |||
| 155 | * like bio_clone, but with a local bio set | 155 | * like bio_clone, but with a local bio set |
| 156 | */ | 156 | */ |
| 157 | 157 | ||
| 158 | static void mddev_bio_destructor(struct bio *bio) | ||
| 159 | { | ||
| 160 | struct mddev *mddev, **mddevp; | ||
| 161 | |||
| 162 | mddevp = (void*)bio; | ||
| 163 | mddev = mddevp[-1]; | ||
| 164 | |||
| 165 | bio_free(bio, mddev->bio_set); | ||
| 166 | } | ||
| 167 | |||
| 168 | struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, | 158 | struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, |
| 169 | struct mddev *mddev) | 159 | struct mddev *mddev) |
| 170 | { | 160 | { |
| 171 | struct bio *b; | 161 | struct bio *b; |
| 172 | struct mddev **mddevp; | ||
| 173 | 162 | ||
| 174 | if (!mddev || !mddev->bio_set) | 163 | if (!mddev || !mddev->bio_set) |
| 175 | return bio_alloc(gfp_mask, nr_iovecs); | 164 | return bio_alloc(gfp_mask, nr_iovecs); |
| 176 | 165 | ||
| 177 | b = bio_alloc_bioset(gfp_mask, nr_iovecs, | 166 | b = bio_alloc_bioset(gfp_mask, nr_iovecs, mddev->bio_set); |
| 178 | mddev->bio_set); | ||
| 179 | if (!b) | 167 | if (!b) |
| 180 | return NULL; | 168 | return NULL; |
| 181 | mddevp = (void*)b; | ||
| 182 | mddevp[-1] = mddev; | ||
| 183 | b->bi_destructor = mddev_bio_destructor; | ||
| 184 | return b; | 169 | return b; |
| 185 | } | 170 | } |
| 186 | EXPORT_SYMBOL_GPL(bio_alloc_mddev); | 171 | EXPORT_SYMBOL_GPL(bio_alloc_mddev); |
| @@ -188,32 +173,10 @@ EXPORT_SYMBOL_GPL(bio_alloc_mddev); | |||
| 188 | struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, | 173 | struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, |
| 189 | struct mddev *mddev) | 174 | struct mddev *mddev) |
| 190 | { | 175 | { |
| 191 | struct bio *b; | ||
| 192 | struct mddev **mddevp; | ||
| 193 | |||
| 194 | if (!mddev || !mddev->bio_set) | 176 | if (!mddev || !mddev->bio_set) |
| 195 | return bio_clone(bio, gfp_mask); | 177 | return bio_clone(bio, gfp_mask); |
| 196 | 178 | ||
| 197 | b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, | 179 | return bio_clone_bioset(bio, gfp_mask, mddev->bio_set); |
| 198 | mddev->bio_set); | ||
| 199 | if (!b) | ||
| 200 | return NULL; | ||
| 201 | mddevp = (void*)b; | ||
| 202 | mddevp[-1] = mddev; | ||
| 203 | b->bi_destructor = mddev_bio_destructor; | ||
| 204 | __bio_clone(b, bio); | ||
| 205 | if (bio_integrity(bio)) { | ||
| 206 | int ret; | ||
| 207 | |||
| 208 | ret = bio_integrity_clone(b, bio, gfp_mask, mddev->bio_set); | ||
| 209 | |||
| 210 | if (ret < 0) { | ||
| 211 | bio_put(b); | ||
| 212 | return NULL; | ||
| 213 | } | ||
| 214 | } | ||
| 215 | |||
| 216 | return b; | ||
| 217 | } | 180 | } |
| 218 | EXPORT_SYMBOL_GPL(bio_clone_mddev); | 181 | EXPORT_SYMBOL_GPL(bio_clone_mddev); |
| 219 | 182 | ||
| @@ -5006,8 +4969,7 @@ int md_run(struct mddev *mddev) | |||
| 5006 | } | 4969 | } |
| 5007 | 4970 | ||
| 5008 | if (mddev->bio_set == NULL) | 4971 | if (mddev->bio_set == NULL) |
| 5009 | mddev->bio_set = bioset_create(BIO_POOL_SIZE, | 4972 | mddev->bio_set = bioset_create(BIO_POOL_SIZE, 0); |
| 5010 | sizeof(struct mddev *)); | ||
| 5011 | 4973 | ||
| 5012 | spin_lock(&pers_lock); | 4974 | spin_lock(&pers_lock); |
| 5013 | pers = find_pers(mddev->level, mddev->clevel); | 4975 | pers = find_pers(mddev->level, mddev->clevel); |
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index de63a1fc3737..a9e4fa95dfaa 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c | |||
| @@ -422,6 +422,7 @@ static int raid0_run(struct mddev *mddev) | |||
| 422 | if (md_check_no_bitmap(mddev)) | 422 | if (md_check_no_bitmap(mddev)) |
| 423 | return -EINVAL; | 423 | return -EINVAL; |
| 424 | blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); | 424 | blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); |
| 425 | blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors); | ||
| 425 | 426 | ||
| 426 | /* if private is not null, we are here after takeover */ | 427 | /* if private is not null, we are here after takeover */ |
| 427 | if (mddev->private == NULL) { | 428 | if (mddev->private == NULL) { |
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 29408d46a6d9..57d7674c5013 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c | |||
| @@ -553,14 +553,6 @@ static void iblock_complete_cmd(struct se_cmd *cmd) | |||
| 553 | kfree(ibr); | 553 | kfree(ibr); |
| 554 | } | 554 | } |
| 555 | 555 | ||
| 556 | static void iblock_bio_destructor(struct bio *bio) | ||
| 557 | { | ||
| 558 | struct se_cmd *cmd = bio->bi_private; | ||
| 559 | struct iblock_dev *ib_dev = cmd->se_dev->dev_ptr; | ||
| 560 | |||
| 561 | bio_free(bio, ib_dev->ibd_bio_set); | ||
| 562 | } | ||
| 563 | |||
| 564 | static struct bio * | 556 | static struct bio * |
| 565 | iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num) | 557 | iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num) |
| 566 | { | 558 | { |
| @@ -582,7 +574,6 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num) | |||
| 582 | 574 | ||
| 583 | bio->bi_bdev = ib_dev->ibd_bd; | 575 | bio->bi_bdev = ib_dev->ibd_bd; |
| 584 | bio->bi_private = cmd; | 576 | bio->bi_private = cmd; |
| 585 | bio->bi_destructor = iblock_bio_destructor; | ||
| 586 | bio->bi_end_io = &iblock_bio_done; | 577 | bio->bi_end_io = &iblock_bio_done; |
| 587 | bio->bi_sector = lba; | 578 | bio->bi_sector = lba; |
| 588 | return bio; | 579 | return bio; |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index e85c04b9f61c..a3f28f331b2b 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
| @@ -70,23 +70,25 @@ static inline int use_bip_pool(unsigned int idx) | |||
| 70 | } | 70 | } |
| 71 | 71 | ||
| 72 | /** | 72 | /** |
| 73 | * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio | 73 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio |
| 74 | * @bio: bio to attach integrity metadata to | 74 | * @bio: bio to attach integrity metadata to |
| 75 | * @gfp_mask: Memory allocation mask | 75 | * @gfp_mask: Memory allocation mask |
| 76 | * @nr_vecs: Number of integrity metadata scatter-gather elements | 76 | * @nr_vecs: Number of integrity metadata scatter-gather elements |
| 77 | * @bs: bio_set to allocate from | ||
| 78 | * | 77 | * |
| 79 | * Description: This function prepares a bio for attaching integrity | 78 | * Description: This function prepares a bio for attaching integrity |
| 80 | * metadata. nr_vecs specifies the maximum number of pages containing | 79 | * metadata. nr_vecs specifies the maximum number of pages containing |
| 81 | * integrity metadata that can be attached. | 80 | * integrity metadata that can be attached. |
| 82 | */ | 81 | */ |
| 83 | struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, | 82 | struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, |
| 84 | gfp_t gfp_mask, | 83 | gfp_t gfp_mask, |
| 85 | unsigned int nr_vecs, | 84 | unsigned int nr_vecs) |
| 86 | struct bio_set *bs) | ||
| 87 | { | 85 | { |
| 88 | struct bio_integrity_payload *bip; | 86 | struct bio_integrity_payload *bip; |
| 89 | unsigned int idx = vecs_to_idx(nr_vecs); | 87 | unsigned int idx = vecs_to_idx(nr_vecs); |
| 88 | struct bio_set *bs = bio->bi_pool; | ||
| 89 | |||
| 90 | if (!bs) | ||
| 91 | bs = fs_bio_set; | ||
| 90 | 92 | ||
| 91 | BUG_ON(bio == NULL); | 93 | BUG_ON(bio == NULL); |
| 92 | bip = NULL; | 94 | bip = NULL; |
| @@ -114,37 +116,22 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio, | |||
| 114 | 116 | ||
| 115 | return bip; | 117 | return bip; |
| 116 | } | 118 | } |
| 117 | EXPORT_SYMBOL(bio_integrity_alloc_bioset); | ||
| 118 | |||
| 119 | /** | ||
| 120 | * bio_integrity_alloc - Allocate integrity payload and attach it to bio | ||
| 121 | * @bio: bio to attach integrity metadata to | ||
| 122 | * @gfp_mask: Memory allocation mask | ||
| 123 | * @nr_vecs: Number of integrity metadata scatter-gather elements | ||
| 124 | * | ||
| 125 | * Description: This function prepares a bio for attaching integrity | ||
| 126 | * metadata. nr_vecs specifies the maximum number of pages containing | ||
| 127 | * integrity metadata that can be attached. | ||
| 128 | */ | ||
| 129 | struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, | ||
| 130 | gfp_t gfp_mask, | ||
| 131 | unsigned int nr_vecs) | ||
| 132 | { | ||
| 133 | return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set); | ||
| 134 | } | ||
| 135 | EXPORT_SYMBOL(bio_integrity_alloc); | 119 | EXPORT_SYMBOL(bio_integrity_alloc); |
| 136 | 120 | ||
| 137 | /** | 121 | /** |
| 138 | * bio_integrity_free - Free bio integrity payload | 122 | * bio_integrity_free - Free bio integrity payload |
| 139 | * @bio: bio containing bip to be freed | 123 | * @bio: bio containing bip to be freed |
| 140 | * @bs: bio_set this bio was allocated from | ||
| 141 | * | 124 | * |
| 142 | * Description: Used to free the integrity portion of a bio. Usually | 125 | * Description: Used to free the integrity portion of a bio. Usually |
| 143 | * called from bio_free(). | 126 | * called from bio_free(). |
| 144 | */ | 127 | */ |
| 145 | void bio_integrity_free(struct bio *bio, struct bio_set *bs) | 128 | void bio_integrity_free(struct bio *bio) |
| 146 | { | 129 | { |
| 147 | struct bio_integrity_payload *bip = bio->bi_integrity; | 130 | struct bio_integrity_payload *bip = bio->bi_integrity; |
| 131 | struct bio_set *bs = bio->bi_pool; | ||
| 132 | |||
| 133 | if (!bs) | ||
| 134 | bs = fs_bio_set; | ||
| 148 | 135 | ||
| 149 | BUG_ON(bip == NULL); | 136 | BUG_ON(bip == NULL); |
| 150 | 137 | ||
| @@ -730,19 +717,18 @@ EXPORT_SYMBOL(bio_integrity_split); | |||
| 730 | * @bio: New bio | 717 | * @bio: New bio |
| 731 | * @bio_src: Original bio | 718 | * @bio_src: Original bio |
| 732 | * @gfp_mask: Memory allocation mask | 719 | * @gfp_mask: Memory allocation mask |
| 733 | * @bs: bio_set to allocate bip from | ||
| 734 | * | 720 | * |
| 735 | * Description: Called to allocate a bip when cloning a bio | 721 | * Description: Called to allocate a bip when cloning a bio |
| 736 | */ | 722 | */ |
| 737 | int bio_integrity_clone(struct bio *bio, struct bio *bio_src, | 723 | int bio_integrity_clone(struct bio *bio, struct bio *bio_src, |
| 738 | gfp_t gfp_mask, struct bio_set *bs) | 724 | gfp_t gfp_mask) |
| 739 | { | 725 | { |
| 740 | struct bio_integrity_payload *bip_src = bio_src->bi_integrity; | 726 | struct bio_integrity_payload *bip_src = bio_src->bi_integrity; |
| 741 | struct bio_integrity_payload *bip; | 727 | struct bio_integrity_payload *bip; |
| 742 | 728 | ||
| 743 | BUG_ON(bip_src == NULL); | 729 | BUG_ON(bip_src == NULL); |
| 744 | 730 | ||
| 745 | bip = bio_integrity_alloc_bioset(bio, gfp_mask, bip_src->bip_vcnt, bs); | 731 | bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt); |
| 746 | 732 | ||
| 747 | if (bip == NULL) | 733 | if (bip == NULL) |
| 748 | return -EIO; | 734 | return -EIO; |
| @@ -55,6 +55,7 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = { | |||
| 55 | * IO code that does not need private memory pools. | 55 | * IO code that does not need private memory pools. |
| 56 | */ | 56 | */ |
| 57 | struct bio_set *fs_bio_set; | 57 | struct bio_set *fs_bio_set; |
| 58 | EXPORT_SYMBOL(fs_bio_set); | ||
| 58 | 59 | ||
| 59 | /* | 60 | /* |
| 60 | * Our slab pool management | 61 | * Our slab pool management |
| @@ -233,26 +234,37 @@ fallback: | |||
| 233 | return bvl; | 234 | return bvl; |
| 234 | } | 235 | } |
| 235 | 236 | ||
| 236 | void bio_free(struct bio *bio, struct bio_set *bs) | 237 | static void __bio_free(struct bio *bio) |
| 237 | { | 238 | { |
| 239 | bio_disassociate_task(bio); | ||
| 240 | |||
| 241 | if (bio_integrity(bio)) | ||
| 242 | bio_integrity_free(bio); | ||
| 243 | } | ||
| 244 | |||
| 245 | static void bio_free(struct bio *bio) | ||
| 246 | { | ||
| 247 | struct bio_set *bs = bio->bi_pool; | ||
| 238 | void *p; | 248 | void *p; |
| 239 | 249 | ||
| 240 | if (bio_has_allocated_vec(bio)) | 250 | __bio_free(bio); |
| 241 | bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); | ||
| 242 | 251 | ||
| 243 | if (bio_integrity(bio)) | 252 | if (bs) { |
| 244 | bio_integrity_free(bio, bs); | 253 | if (bio_has_allocated_vec(bio)) |
| 254 | bvec_free_bs(bs, bio->bi_io_vec, BIO_POOL_IDX(bio)); | ||
| 245 | 255 | ||
| 246 | /* | 256 | /* |
| 247 | * If we have front padding, adjust the bio pointer before freeing | 257 | * If we have front padding, adjust the bio pointer before freeing |
| 248 | */ | 258 | */ |
| 249 | p = bio; | 259 | p = bio; |
| 250 | if (bs->front_pad) | ||
| 251 | p -= bs->front_pad; | 260 | p -= bs->front_pad; |
| 252 | 261 | ||
| 253 | mempool_free(p, bs->bio_pool); | 262 | mempool_free(p, bs->bio_pool); |
| 263 | } else { | ||
| 264 | /* Bio was allocated by bio_kmalloc() */ | ||
| 265 | kfree(bio); | ||
| 266 | } | ||
| 254 | } | 267 | } |
| 255 | EXPORT_SYMBOL(bio_free); | ||
| 256 | 268 | ||
| 257 | void bio_init(struct bio *bio) | 269 | void bio_init(struct bio *bio) |
| 258 | { | 270 | { |
| @@ -263,48 +275,85 @@ void bio_init(struct bio *bio) | |||
| 263 | EXPORT_SYMBOL(bio_init); | 275 | EXPORT_SYMBOL(bio_init); |
| 264 | 276 | ||
| 265 | /** | 277 | /** |
| 278 | * bio_reset - reinitialize a bio | ||
| 279 | * @bio: bio to reset | ||
| 280 | * | ||
| 281 | * Description: | ||
| 282 | * After calling bio_reset(), @bio will be in the same state as a freshly | ||
| 283 | * allocated bio returned bio bio_alloc_bioset() - the only fields that are | ||
| 284 | * preserved are the ones that are initialized by bio_alloc_bioset(). See | ||
| 285 | * comment in struct bio. | ||
| 286 | */ | ||
| 287 | void bio_reset(struct bio *bio) | ||
| 288 | { | ||
| 289 | unsigned long flags = bio->bi_flags & (~0UL << BIO_RESET_BITS); | ||
| 290 | |||
| 291 | __bio_free(bio); | ||
| 292 | |||
| 293 | memset(bio, 0, BIO_RESET_BYTES); | ||
| 294 | bio->bi_flags = flags|(1 << BIO_UPTODATE); | ||
| 295 | } | ||
| 296 | EXPORT_SYMBOL(bio_reset); | ||
| 297 | |||
| 298 | /** | ||
| 266 | * bio_alloc_bioset - allocate a bio for I/O | 299 | * bio_alloc_bioset - allocate a bio for I/O |
| 267 | * @gfp_mask: the GFP_ mask given to the slab allocator | 300 | * @gfp_mask: the GFP_ mask given to the slab allocator |
| 268 | * @nr_iovecs: number of iovecs to pre-allocate | 301 | * @nr_iovecs: number of iovecs to pre-allocate |
| 269 | * @bs: the bio_set to allocate from. | 302 | * @bs: the bio_set to allocate from. |
| 270 | * | 303 | * |
| 271 | * Description: | 304 | * Description: |
| 272 | * bio_alloc_bioset will try its own mempool to satisfy the allocation. | 305 | * If @bs is NULL, uses kmalloc() to allocate the bio; else the allocation is |
| 273 | * If %__GFP_WAIT is set then we will block on the internal pool waiting | 306 | * backed by the @bs's mempool. |
| 274 | * for a &struct bio to become free. | ||
| 275 | * | 307 | * |
| 276 | * Note that the caller must set ->bi_destructor on successful return | 308 | * When @bs is not NULL, if %__GFP_WAIT is set then bio_alloc will always be |
| 277 | * of a bio, to do the appropriate freeing of the bio once the reference | 309 | * able to allocate a bio. This is due to the mempool guarantees. To make this |
| 278 | * count drops to zero. | 310 | * work, callers must never allocate more than 1 bio at a time from this pool. |
| 279 | **/ | 311 | * Callers that need to allocate more than 1 bio must always submit the |
| 312 | * previously allocated bio for IO before attempting to allocate a new one. | ||
| 313 | * Failure to do so can cause deadlocks under memory pressure. | ||
| 314 | * | ||
| 315 | * RETURNS: | ||
| 316 | * Pointer to new bio on success, NULL on failure. | ||
| 317 | */ | ||
| 280 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 318 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
| 281 | { | 319 | { |
| 320 | unsigned front_pad; | ||
| 321 | unsigned inline_vecs; | ||
| 282 | unsigned long idx = BIO_POOL_NONE; | 322 | unsigned long idx = BIO_POOL_NONE; |
| 283 | struct bio_vec *bvl = NULL; | 323 | struct bio_vec *bvl = NULL; |
| 284 | struct bio *bio; | 324 | struct bio *bio; |
| 285 | void *p; | 325 | void *p; |
| 286 | 326 | ||
| 287 | p = mempool_alloc(bs->bio_pool, gfp_mask); | 327 | if (!bs) { |
| 328 | if (nr_iovecs > UIO_MAXIOV) | ||
| 329 | return NULL; | ||
| 330 | |||
| 331 | p = kmalloc(sizeof(struct bio) + | ||
| 332 | nr_iovecs * sizeof(struct bio_vec), | ||
| 333 | gfp_mask); | ||
| 334 | front_pad = 0; | ||
| 335 | inline_vecs = nr_iovecs; | ||
| 336 | } else { | ||
| 337 | p = mempool_alloc(bs->bio_pool, gfp_mask); | ||
| 338 | front_pad = bs->front_pad; | ||
| 339 | inline_vecs = BIO_INLINE_VECS; | ||
| 340 | } | ||
| 341 | |||
| 288 | if (unlikely(!p)) | 342 | if (unlikely(!p)) |
| 289 | return NULL; | 343 | return NULL; |
| 290 | bio = p + bs->front_pad; | ||
| 291 | 344 | ||
| 345 | bio = p + front_pad; | ||
| 292 | bio_init(bio); | 346 | bio_init(bio); |
| 293 | 347 | ||
| 294 | if (unlikely(!nr_iovecs)) | 348 | if (nr_iovecs > inline_vecs) { |
| 295 | goto out_set; | ||
| 296 | |||
| 297 | if (nr_iovecs <= BIO_INLINE_VECS) { | ||
| 298 | bvl = bio->bi_inline_vecs; | ||
| 299 | nr_iovecs = BIO_INLINE_VECS; | ||
| 300 | } else { | ||
| 301 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); | 349 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); |
| 302 | if (unlikely(!bvl)) | 350 | if (unlikely(!bvl)) |
| 303 | goto err_free; | 351 | goto err_free; |
| 304 | 352 | } else if (nr_iovecs) { | |
| 305 | nr_iovecs = bvec_nr_vecs(idx); | 353 | bvl = bio->bi_inline_vecs; |
| 306 | } | 354 | } |
| 307 | out_set: | 355 | |
| 356 | bio->bi_pool = bs; | ||
| 308 | bio->bi_flags |= idx << BIO_POOL_OFFSET; | 357 | bio->bi_flags |= idx << BIO_POOL_OFFSET; |
| 309 | bio->bi_max_vecs = nr_iovecs; | 358 | bio->bi_max_vecs = nr_iovecs; |
| 310 | bio->bi_io_vec = bvl; | 359 | bio->bi_io_vec = bvl; |
| @@ -316,80 +365,6 @@ err_free: | |||
| 316 | } | 365 | } |
| 317 | EXPORT_SYMBOL(bio_alloc_bioset); | 366 | EXPORT_SYMBOL(bio_alloc_bioset); |
| 318 | 367 | ||
| 319 | static void bio_fs_destructor(struct bio *bio) | ||
| 320 | { | ||
| 321 | bio_free(bio, fs_bio_set); | ||
| 322 | } | ||
| 323 | |||
| 324 | /** | ||
| 325 | * bio_alloc - allocate a new bio, memory pool backed | ||
| 326 | * @gfp_mask: allocation mask to use | ||
| 327 | * @nr_iovecs: number of iovecs | ||
| 328 | * | ||
| 329 | * bio_alloc will allocate a bio and associated bio_vec array that can hold | ||
| 330 | * at least @nr_iovecs entries. Allocations will be done from the | ||
| 331 | * fs_bio_set. Also see @bio_alloc_bioset and @bio_kmalloc. | ||
| 332 | * | ||
| 333 | * If %__GFP_WAIT is set, then bio_alloc will always be able to allocate | ||
| 334 | * a bio. This is due to the mempool guarantees. To make this work, callers | ||
| 335 | * must never allocate more than 1 bio at a time from this pool. Callers | ||
| 336 | * that need to allocate more than 1 bio must always submit the previously | ||
| 337 | * allocated bio for IO before attempting to allocate a new one. Failure to | ||
| 338 | * do so can cause livelocks under memory pressure. | ||
| 339 | * | ||
| 340 | * RETURNS: | ||
| 341 | * Pointer to new bio on success, NULL on failure. | ||
| 342 | */ | ||
| 343 | struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
| 344 | { | ||
| 345 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | ||
| 346 | |||
| 347 | if (bio) | ||
| 348 | bio->bi_destructor = bio_fs_destructor; | ||
| 349 | |||
| 350 | return bio; | ||
| 351 | } | ||
| 352 | EXPORT_SYMBOL(bio_alloc); | ||
| 353 | |||
| 354 | static void bio_kmalloc_destructor(struct bio *bio) | ||
| 355 | { | ||
| 356 | if (bio_integrity(bio)) | ||
| 357 | bio_integrity_free(bio, fs_bio_set); | ||
| 358 | kfree(bio); | ||
| 359 | } | ||
| 360 | |||
| 361 | /** | ||
| 362 | * bio_kmalloc - allocate a bio for I/O using kmalloc() | ||
| 363 | * @gfp_mask: the GFP_ mask given to the slab allocator | ||
| 364 | * @nr_iovecs: number of iovecs to pre-allocate | ||
| 365 | * | ||
| 366 | * Description: | ||
| 367 | * Allocate a new bio with @nr_iovecs bvecs. If @gfp_mask contains | ||
| 368 | * %__GFP_WAIT, the allocation is guaranteed to succeed. | ||
| 369 | * | ||
| 370 | **/ | ||
| 371 | struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
| 372 | { | ||
| 373 | struct bio *bio; | ||
| 374 | |||
| 375 | if (nr_iovecs > UIO_MAXIOV) | ||
| 376 | return NULL; | ||
| 377 | |||
| 378 | bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec), | ||
| 379 | gfp_mask); | ||
| 380 | if (unlikely(!bio)) | ||
| 381 | return NULL; | ||
| 382 | |||
| 383 | bio_init(bio); | ||
| 384 | bio->bi_flags |= BIO_POOL_NONE << BIO_POOL_OFFSET; | ||
| 385 | bio->bi_max_vecs = nr_iovecs; | ||
| 386 | bio->bi_io_vec = bio->bi_inline_vecs; | ||
| 387 | bio->bi_destructor = bio_kmalloc_destructor; | ||
| 388 | |||
| 389 | return bio; | ||
| 390 | } | ||
| 391 | EXPORT_SYMBOL(bio_kmalloc); | ||
| 392 | |||
| 393 | void zero_fill_bio(struct bio *bio) | 368 | void zero_fill_bio(struct bio *bio) |
| 394 | { | 369 | { |
| 395 | unsigned long flags; | 370 | unsigned long flags; |
| @@ -420,11 +395,8 @@ void bio_put(struct bio *bio) | |||
| 420 | /* | 395 | /* |
| 421 | * last put frees it | 396 | * last put frees it |
| 422 | */ | 397 | */ |
| 423 | if (atomic_dec_and_test(&bio->bi_cnt)) { | 398 | if (atomic_dec_and_test(&bio->bi_cnt)) |
| 424 | bio_disassociate_task(bio); | 399 | bio_free(bio); |
| 425 | bio->bi_next = NULL; | ||
| 426 | bio->bi_destructor(bio); | ||
| 427 | } | ||
| 428 | } | 400 | } |
| 429 | EXPORT_SYMBOL(bio_put); | 401 | EXPORT_SYMBOL(bio_put); |
| 430 | 402 | ||
| @@ -466,26 +438,28 @@ void __bio_clone(struct bio *bio, struct bio *bio_src) | |||
| 466 | EXPORT_SYMBOL(__bio_clone); | 438 | EXPORT_SYMBOL(__bio_clone); |
| 467 | 439 | ||
| 468 | /** | 440 | /** |
| 469 | * bio_clone - clone a bio | 441 | * bio_clone_bioset - clone a bio |
| 470 | * @bio: bio to clone | 442 | * @bio: bio to clone |
| 471 | * @gfp_mask: allocation priority | 443 | * @gfp_mask: allocation priority |
| 444 | * @bs: bio_set to allocate from | ||
| 472 | * | 445 | * |
| 473 | * Like __bio_clone, only also allocates the returned bio | 446 | * Like __bio_clone, only also allocates the returned bio |
| 474 | */ | 447 | */ |
| 475 | struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | 448 | struct bio *bio_clone_bioset(struct bio *bio, gfp_t gfp_mask, |
| 449 | struct bio_set *bs) | ||
| 476 | { | 450 | { |
| 477 | struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); | 451 | struct bio *b; |
| 478 | 452 | ||
| 453 | b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, bs); | ||
| 479 | if (!b) | 454 | if (!b) |
| 480 | return NULL; | 455 | return NULL; |
| 481 | 456 | ||
| 482 | b->bi_destructor = bio_fs_destructor; | ||
| 483 | __bio_clone(b, bio); | 457 | __bio_clone(b, bio); |
| 484 | 458 | ||
| 485 | if (bio_integrity(bio)) { | 459 | if (bio_integrity(bio)) { |
| 486 | int ret; | 460 | int ret; |
| 487 | 461 | ||
| 488 | ret = bio_integrity_clone(b, bio, gfp_mask, fs_bio_set); | 462 | ret = bio_integrity_clone(b, bio, gfp_mask); |
| 489 | 463 | ||
| 490 | if (ret < 0) { | 464 | if (ret < 0) { |
| 491 | bio_put(b); | 465 | bio_put(b); |
| @@ -495,7 +469,7 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | |||
| 495 | 469 | ||
| 496 | return b; | 470 | return b; |
| 497 | } | 471 | } |
| 498 | EXPORT_SYMBOL(bio_clone); | 472 | EXPORT_SYMBOL(bio_clone_bioset); |
| 499 | 473 | ||
| 500 | /** | 474 | /** |
| 501 | * bio_get_nr_vecs - return approx number of vecs | 475 | * bio_get_nr_vecs - return approx number of vecs |
| @@ -1501,7 +1475,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) | |||
| 1501 | trace_block_split(bdev_get_queue(bi->bi_bdev), bi, | 1475 | trace_block_split(bdev_get_queue(bi->bi_bdev), bi, |
| 1502 | bi->bi_sector + first_sectors); | 1476 | bi->bi_sector + first_sectors); |
| 1503 | 1477 | ||
| 1504 | BUG_ON(bi->bi_vcnt != 1); | 1478 | BUG_ON(bi->bi_vcnt != 1 && bi->bi_vcnt != 0); |
| 1505 | BUG_ON(bi->bi_idx != 0); | 1479 | BUG_ON(bi->bi_idx != 0); |
| 1506 | atomic_set(&bp->cnt, 3); | 1480 | atomic_set(&bp->cnt, 3); |
| 1507 | bp->error = 0; | 1481 | bp->error = 0; |
| @@ -1511,17 +1485,22 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) | |||
| 1511 | bp->bio2.bi_size -= first_sectors << 9; | 1485 | bp->bio2.bi_size -= first_sectors << 9; |
| 1512 | bp->bio1.bi_size = first_sectors << 9; | 1486 | bp->bio1.bi_size = first_sectors << 9; |
| 1513 | 1487 | ||
| 1514 | bp->bv1 = bi->bi_io_vec[0]; | 1488 | if (bi->bi_vcnt != 0) { |
| 1515 | bp->bv2 = bi->bi_io_vec[0]; | 1489 | bp->bv1 = bi->bi_io_vec[0]; |
| 1516 | bp->bv2.bv_offset += first_sectors << 9; | 1490 | bp->bv2 = bi->bi_io_vec[0]; |
| 1517 | bp->bv2.bv_len -= first_sectors << 9; | 1491 | |
| 1518 | bp->bv1.bv_len = first_sectors << 9; | 1492 | if (bio_is_rw(bi)) { |
| 1493 | bp->bv2.bv_offset += first_sectors << 9; | ||
| 1494 | bp->bv2.bv_len -= first_sectors << 9; | ||
| 1495 | bp->bv1.bv_len = first_sectors << 9; | ||
| 1496 | } | ||
| 1519 | 1497 | ||
| 1520 | bp->bio1.bi_io_vec = &bp->bv1; | 1498 | bp->bio1.bi_io_vec = &bp->bv1; |
| 1521 | bp->bio2.bi_io_vec = &bp->bv2; | 1499 | bp->bio2.bi_io_vec = &bp->bv2; |
| 1522 | 1500 | ||
| 1523 | bp->bio1.bi_max_vecs = 1; | 1501 | bp->bio1.bi_max_vecs = 1; |
| 1524 | bp->bio2.bi_max_vecs = 1; | 1502 | bp->bio2.bi_max_vecs = 1; |
| 1503 | } | ||
| 1525 | 1504 | ||
| 1526 | bp->bio1.bi_end_io = bio_pair_end_1; | 1505 | bp->bio1.bi_end_io = bio_pair_end_1; |
| 1527 | bp->bio2.bi_end_io = bio_pair_end_2; | 1506 | bp->bio2.bi_end_io = bio_pair_end_2; |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 38e721b35d45..b3c1d3dae77d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -116,6 +116,8 @@ EXPORT_SYMBOL(invalidate_bdev); | |||
| 116 | 116 | ||
| 117 | int set_blocksize(struct block_device *bdev, int size) | 117 | int set_blocksize(struct block_device *bdev, int size) |
| 118 | { | 118 | { |
| 119 | struct address_space *mapping; | ||
| 120 | |||
| 119 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ | 121 | /* Size must be a power of two, and between 512 and PAGE_SIZE */ |
| 120 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) | 122 | if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size)) |
| 121 | return -EINVAL; | 123 | return -EINVAL; |
| @@ -124,6 +126,19 @@ int set_blocksize(struct block_device *bdev, int size) | |||
| 124 | if (size < bdev_logical_block_size(bdev)) | 126 | if (size < bdev_logical_block_size(bdev)) |
| 125 | return -EINVAL; | 127 | return -EINVAL; |
| 126 | 128 | ||
| 129 | /* Prevent starting I/O or mapping the device */ | ||
| 130 | percpu_down_write(&bdev->bd_block_size_semaphore); | ||
| 131 | |||
| 132 | /* Check that the block device is not memory mapped */ | ||
| 133 | mapping = bdev->bd_inode->i_mapping; | ||
| 134 | mutex_lock(&mapping->i_mmap_mutex); | ||
| 135 | if (mapping_mapped(mapping)) { | ||
| 136 | mutex_unlock(&mapping->i_mmap_mutex); | ||
| 137 | percpu_up_write(&bdev->bd_block_size_semaphore); | ||
| 138 | return -EBUSY; | ||
| 139 | } | ||
| 140 | mutex_unlock(&mapping->i_mmap_mutex); | ||
| 141 | |||
| 127 | /* Don't change the size if it is same as current */ | 142 | /* Don't change the size if it is same as current */ |
| 128 | if (bdev->bd_block_size != size) { | 143 | if (bdev->bd_block_size != size) { |
| 129 | sync_blockdev(bdev); | 144 | sync_blockdev(bdev); |
| @@ -131,6 +146,9 @@ int set_blocksize(struct block_device *bdev, int size) | |||
| 131 | bdev->bd_inode->i_blkbits = blksize_bits(size); | 146 | bdev->bd_inode->i_blkbits = blksize_bits(size); |
| 132 | kill_bdev(bdev); | 147 | kill_bdev(bdev); |
| 133 | } | 148 | } |
| 149 | |||
| 150 | percpu_up_write(&bdev->bd_block_size_semaphore); | ||
| 151 | |||
| 134 | return 0; | 152 | return 0; |
| 135 | } | 153 | } |
| 136 | 154 | ||
| @@ -441,6 +459,12 @@ static struct inode *bdev_alloc_inode(struct super_block *sb) | |||
| 441 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); | 459 | struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL); |
| 442 | if (!ei) | 460 | if (!ei) |
| 443 | return NULL; | 461 | return NULL; |
| 462 | |||
| 463 | if (unlikely(percpu_init_rwsem(&ei->bdev.bd_block_size_semaphore))) { | ||
| 464 | kmem_cache_free(bdev_cachep, ei); | ||
| 465 | return NULL; | ||
| 466 | } | ||
| 467 | |||
| 444 | return &ei->vfs_inode; | 468 | return &ei->vfs_inode; |
| 445 | } | 469 | } |
| 446 | 470 | ||
| @@ -449,6 +473,8 @@ static void bdev_i_callback(struct rcu_head *head) | |||
| 449 | struct inode *inode = container_of(head, struct inode, i_rcu); | 473 | struct inode *inode = container_of(head, struct inode, i_rcu); |
| 450 | struct bdev_inode *bdi = BDEV_I(inode); | 474 | struct bdev_inode *bdi = BDEV_I(inode); |
| 451 | 475 | ||
| 476 | percpu_free_rwsem(&bdi->bdev.bd_block_size_semaphore); | ||
| 477 | |||
| 452 | kmem_cache_free(bdev_cachep, bdi); | 478 | kmem_cache_free(bdev_cachep, bdi); |
| 453 | } | 479 | } |
| 454 | 480 | ||
| @@ -1567,6 +1593,22 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
| 1567 | return blkdev_ioctl(bdev, mode, cmd, arg); | 1593 | return blkdev_ioctl(bdev, mode, cmd, arg); |
| 1568 | } | 1594 | } |
| 1569 | 1595 | ||
| 1596 | ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, | ||
| 1597 | unsigned long nr_segs, loff_t pos) | ||
| 1598 | { | ||
| 1599 | ssize_t ret; | ||
| 1600 | struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host); | ||
| 1601 | |||
| 1602 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
| 1603 | |||
| 1604 | ret = generic_file_aio_read(iocb, iov, nr_segs, pos); | ||
| 1605 | |||
| 1606 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
| 1607 | |||
| 1608 | return ret; | ||
| 1609 | } | ||
| 1610 | EXPORT_SYMBOL_GPL(blkdev_aio_read); | ||
| 1611 | |||
| 1570 | /* | 1612 | /* |
| 1571 | * Write data to the block device. Only intended for the block device itself | 1613 | * Write data to the block device. Only intended for the block device itself |
| 1572 | * and the raw driver which basically is a fake block device. | 1614 | * and the raw driver which basically is a fake block device. |
| @@ -1578,12 +1620,16 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1578 | unsigned long nr_segs, loff_t pos) | 1620 | unsigned long nr_segs, loff_t pos) |
| 1579 | { | 1621 | { |
| 1580 | struct file *file = iocb->ki_filp; | 1622 | struct file *file = iocb->ki_filp; |
| 1623 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | ||
| 1581 | struct blk_plug plug; | 1624 | struct blk_plug plug; |
| 1582 | ssize_t ret; | 1625 | ssize_t ret; |
| 1583 | 1626 | ||
| 1584 | BUG_ON(iocb->ki_pos != pos); | 1627 | BUG_ON(iocb->ki_pos != pos); |
| 1585 | 1628 | ||
| 1586 | blk_start_plug(&plug); | 1629 | blk_start_plug(&plug); |
| 1630 | |||
| 1631 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
| 1632 | |||
| 1587 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); | 1633 | ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos); |
| 1588 | if (ret > 0 || ret == -EIOCBQUEUED) { | 1634 | if (ret > 0 || ret == -EIOCBQUEUED) { |
| 1589 | ssize_t err; | 1635 | ssize_t err; |
| @@ -1592,11 +1638,29 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 1592 | if (err < 0 && ret > 0) | 1638 | if (err < 0 && ret > 0) |
| 1593 | ret = err; | 1639 | ret = err; |
| 1594 | } | 1640 | } |
| 1641 | |||
| 1642 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
| 1643 | |||
| 1595 | blk_finish_plug(&plug); | 1644 | blk_finish_plug(&plug); |
| 1645 | |||
| 1596 | return ret; | 1646 | return ret; |
| 1597 | } | 1647 | } |
| 1598 | EXPORT_SYMBOL_GPL(blkdev_aio_write); | 1648 | EXPORT_SYMBOL_GPL(blkdev_aio_write); |
| 1599 | 1649 | ||
| 1650 | static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) | ||
| 1651 | { | ||
| 1652 | int ret; | ||
| 1653 | struct block_device *bdev = I_BDEV(file->f_mapping->host); | ||
| 1654 | |||
| 1655 | percpu_down_read(&bdev->bd_block_size_semaphore); | ||
| 1656 | |||
| 1657 | ret = generic_file_mmap(file, vma); | ||
| 1658 | |||
| 1659 | percpu_up_read(&bdev->bd_block_size_semaphore); | ||
| 1660 | |||
| 1661 | return ret; | ||
| 1662 | } | ||
| 1663 | |||
| 1600 | /* | 1664 | /* |
| 1601 | * Try to release a page associated with block device when the system | 1665 | * Try to release a page associated with block device when the system |
| 1602 | * is under memory pressure. | 1666 | * is under memory pressure. |
| @@ -1627,9 +1691,9 @@ const struct file_operations def_blk_fops = { | |||
| 1627 | .llseek = block_llseek, | 1691 | .llseek = block_llseek, |
| 1628 | .read = do_sync_read, | 1692 | .read = do_sync_read, |
| 1629 | .write = do_sync_write, | 1693 | .write = do_sync_write, |
| 1630 | .aio_read = generic_file_aio_read, | 1694 | .aio_read = blkdev_aio_read, |
| 1631 | .aio_write = blkdev_aio_write, | 1695 | .aio_write = blkdev_aio_write, |
| 1632 | .mmap = generic_file_mmap, | 1696 | .mmap = blkdev_mmap, |
| 1633 | .fsync = blkdev_fsync, | 1697 | .fsync = blkdev_fsync, |
| 1634 | .unlocked_ioctl = block_ioctl, | 1698 | .unlocked_ioctl = block_ioctl, |
| 1635 | #ifdef CONFIG_COMPAT | 1699 | #ifdef CONFIG_COMPAT |
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index 1585db1aa365..f936cb50dc0d 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c | |||
| @@ -814,8 +814,8 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
| 814 | struct bio *bio; | 814 | struct bio *bio; |
| 815 | 815 | ||
| 816 | if (per_dev != master_dev) { | 816 | if (per_dev != master_dev) { |
| 817 | bio = bio_kmalloc(GFP_KERNEL, | 817 | bio = bio_clone_kmalloc(master_dev->bio, |
| 818 | master_dev->bio->bi_max_vecs); | 818 | GFP_KERNEL); |
| 819 | if (unlikely(!bio)) { | 819 | if (unlikely(!bio)) { |
| 820 | ORE_DBGMSG( | 820 | ORE_DBGMSG( |
| 821 | "Failed to allocate BIO size=%u\n", | 821 | "Failed to allocate BIO size=%u\n", |
| @@ -824,7 +824,6 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp) | |||
| 824 | goto out; | 824 | goto out; |
| 825 | } | 825 | } |
| 826 | 826 | ||
| 827 | __bio_clone(bio, master_dev->bio); | ||
| 828 | bio->bi_bdev = NULL; | 827 | bio->bi_bdev = NULL; |
| 829 | bio->bi_next = NULL; | 828 | bio->bi_next = NULL; |
| 830 | per_dev->offset = master_dev->offset; | 829 | per_dev->offset = master_dev->offset; |
diff --git a/include/linux/bio.h b/include/linux/bio.h index 26435890dc87..820e7aaad4fd 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h | |||
| @@ -212,20 +212,41 @@ extern void bio_pair_release(struct bio_pair *dbio); | |||
| 212 | extern struct bio_set *bioset_create(unsigned int, unsigned int); | 212 | extern struct bio_set *bioset_create(unsigned int, unsigned int); |
| 213 | extern void bioset_free(struct bio_set *); | 213 | extern void bioset_free(struct bio_set *); |
| 214 | 214 | ||
| 215 | extern struct bio *bio_alloc(gfp_t, unsigned int); | ||
| 216 | extern struct bio *bio_kmalloc(gfp_t, unsigned int); | ||
| 217 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); | 215 | extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); |
| 218 | extern void bio_put(struct bio *); | 216 | extern void bio_put(struct bio *); |
| 219 | extern void bio_free(struct bio *, struct bio_set *); | 217 | |
| 218 | extern void __bio_clone(struct bio *, struct bio *); | ||
| 219 | extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); | ||
| 220 | |||
| 221 | extern struct bio_set *fs_bio_set; | ||
| 222 | |||
| 223 | static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
| 224 | { | ||
| 225 | return bio_alloc_bioset(gfp_mask, nr_iovecs, fs_bio_set); | ||
| 226 | } | ||
| 227 | |||
| 228 | static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) | ||
| 229 | { | ||
| 230 | return bio_clone_bioset(bio, gfp_mask, fs_bio_set); | ||
| 231 | } | ||
| 232 | |||
| 233 | static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) | ||
| 234 | { | ||
| 235 | return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); | ||
| 236 | } | ||
| 237 | |||
| 238 | static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) | ||
| 239 | { | ||
| 240 | return bio_clone_bioset(bio, gfp_mask, NULL); | ||
| 241 | |||
| 242 | } | ||
| 220 | 243 | ||
| 221 | extern void bio_endio(struct bio *, int); | 244 | extern void bio_endio(struct bio *, int); |
| 222 | struct request_queue; | 245 | struct request_queue; |
| 223 | extern int bio_phys_segments(struct request_queue *, struct bio *); | 246 | extern int bio_phys_segments(struct request_queue *, struct bio *); |
| 224 | 247 | ||
| 225 | extern void __bio_clone(struct bio *, struct bio *); | ||
| 226 | extern struct bio *bio_clone(struct bio *, gfp_t); | ||
| 227 | |||
| 228 | extern void bio_init(struct bio *); | 248 | extern void bio_init(struct bio *); |
| 249 | extern void bio_reset(struct bio *); | ||
| 229 | 250 | ||
| 230 | extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); | 251 | extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); |
| 231 | extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, | 252 | extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, |
| @@ -304,8 +325,6 @@ struct biovec_slab { | |||
| 304 | struct kmem_cache *slab; | 325 | struct kmem_cache *slab; |
| 305 | }; | 326 | }; |
| 306 | 327 | ||
| 307 | extern struct bio_set *fs_bio_set; | ||
| 308 | |||
| 309 | /* | 328 | /* |
| 310 | * a small number of entries is fine, not going to be performance critical. | 329 | * a small number of entries is fine, not going to be performance critical. |
| 311 | * basically we just need to survive | 330 | * basically we just need to survive |
| @@ -367,9 +386,31 @@ static inline char *__bio_kmap_irq(struct bio *bio, unsigned short idx, | |||
| 367 | /* | 386 | /* |
| 368 | * Check whether this bio carries any data or not. A NULL bio is allowed. | 387 | * Check whether this bio carries any data or not. A NULL bio is allowed. |
| 369 | */ | 388 | */ |
| 370 | static inline int bio_has_data(struct bio *bio) | 389 | static inline bool bio_has_data(struct bio *bio) |
| 371 | { | 390 | { |
| 372 | return bio && bio->bi_io_vec != NULL; | 391 | if (bio && bio->bi_vcnt) |
| 392 | return true; | ||
| 393 | |||
| 394 | return false; | ||
| 395 | } | ||
| 396 | |||
| 397 | static inline bool bio_is_rw(struct bio *bio) | ||
| 398 | { | ||
| 399 | if (!bio_has_data(bio)) | ||
| 400 | return false; | ||
| 401 | |||
| 402 | if (bio->bi_rw & REQ_WRITE_SAME) | ||
| 403 | return false; | ||
| 404 | |||
| 405 | return true; | ||
| 406 | } | ||
| 407 | |||
| 408 | static inline bool bio_mergeable(struct bio *bio) | ||
| 409 | { | ||
| 410 | if (bio->bi_rw & REQ_NOMERGE_FLAGS) | ||
| 411 | return false; | ||
| 412 | |||
| 413 | return true; | ||
| 373 | } | 414 | } |
| 374 | 415 | ||
| 375 | /* | 416 | /* |
| @@ -505,9 +546,8 @@ static inline struct bio *bio_list_get(struct bio_list *bl) | |||
| 505 | 546 | ||
| 506 | #define bio_integrity(bio) (bio->bi_integrity != NULL) | 547 | #define bio_integrity(bio) (bio->bi_integrity != NULL) |
| 507 | 548 | ||
| 508 | extern struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *, gfp_t, unsigned int, struct bio_set *); | ||
| 509 | extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); | 549 | extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); |
| 510 | extern void bio_integrity_free(struct bio *, struct bio_set *); | 550 | extern void bio_integrity_free(struct bio *); |
| 511 | extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); | 551 | extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); |
| 512 | extern int bio_integrity_enabled(struct bio *bio); | 552 | extern int bio_integrity_enabled(struct bio *bio); |
| 513 | extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); | 553 | extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); |
| @@ -517,7 +557,7 @@ extern void bio_integrity_endio(struct bio *, int); | |||
| 517 | extern void bio_integrity_advance(struct bio *, unsigned int); | 557 | extern void bio_integrity_advance(struct bio *, unsigned int); |
| 518 | extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); | 558 | extern void bio_integrity_trim(struct bio *, unsigned int, unsigned int); |
| 519 | extern void bio_integrity_split(struct bio *, struct bio_pair *, int); | 559 | extern void bio_integrity_split(struct bio *, struct bio_pair *, int); |
| 520 | extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t, struct bio_set *); | 560 | extern int bio_integrity_clone(struct bio *, struct bio *, gfp_t); |
| 521 | extern int bioset_integrity_create(struct bio_set *, int); | 561 | extern int bioset_integrity_create(struct bio_set *, int); |
| 522 | extern void bioset_integrity_free(struct bio_set *); | 562 | extern void bioset_integrity_free(struct bio_set *); |
| 523 | extern void bio_integrity_init(void); | 563 | extern void bio_integrity_init(void); |
| @@ -549,13 +589,13 @@ static inline int bio_integrity_prep(struct bio *bio) | |||
| 549 | return 0; | 589 | return 0; |
| 550 | } | 590 | } |
| 551 | 591 | ||
| 552 | static inline void bio_integrity_free(struct bio *bio, struct bio_set *bs) | 592 | static inline void bio_integrity_free(struct bio *bio) |
| 553 | { | 593 | { |
| 554 | return; | 594 | return; |
| 555 | } | 595 | } |
| 556 | 596 | ||
| 557 | static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, | 597 | static inline int bio_integrity_clone(struct bio *bio, struct bio *bio_src, |
| 558 | gfp_t gfp_mask, struct bio_set *bs) | 598 | gfp_t gfp_mask) |
| 559 | { | 599 | { |
| 560 | return 0; | 600 | return 0; |
| 561 | } | 601 | } |
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 7b7ac9ccec7a..cdf11191e645 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h | |||
| @@ -59,12 +59,6 @@ struct bio { | |||
| 59 | unsigned int bi_seg_front_size; | 59 | unsigned int bi_seg_front_size; |
| 60 | unsigned int bi_seg_back_size; | 60 | unsigned int bi_seg_back_size; |
| 61 | 61 | ||
| 62 | unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ | ||
| 63 | |||
| 64 | atomic_t bi_cnt; /* pin count */ | ||
| 65 | |||
| 66 | struct bio_vec *bi_io_vec; /* the actual vec list */ | ||
| 67 | |||
| 68 | bio_end_io_t *bi_end_io; | 62 | bio_end_io_t *bi_end_io; |
| 69 | 63 | ||
| 70 | void *bi_private; | 64 | void *bi_private; |
| @@ -80,7 +74,17 @@ struct bio { | |||
| 80 | struct bio_integrity_payload *bi_integrity; /* data integrity */ | 74 | struct bio_integrity_payload *bi_integrity; /* data integrity */ |
| 81 | #endif | 75 | #endif |
| 82 | 76 | ||
| 83 | bio_destructor_t *bi_destructor; /* destructor */ | 77 | /* |
| 78 | * Everything starting with bi_max_vecs will be preserved by bio_reset() | ||
| 79 | */ | ||
| 80 | |||
| 81 | unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ | ||
| 82 | |||
| 83 | atomic_t bi_cnt; /* pin count */ | ||
| 84 | |||
| 85 | struct bio_vec *bi_io_vec; /* the actual vec list */ | ||
| 86 | |||
| 87 | struct bio_set *bi_pool; | ||
| 84 | 88 | ||
| 85 | /* | 89 | /* |
| 86 | * We can inline a number of vecs at the end of the bio, to avoid | 90 | * We can inline a number of vecs at the end of the bio, to avoid |
| @@ -90,6 +94,8 @@ struct bio { | |||
| 90 | struct bio_vec bi_inline_vecs[0]; | 94 | struct bio_vec bi_inline_vecs[0]; |
| 91 | }; | 95 | }; |
| 92 | 96 | ||
| 97 | #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) | ||
| 98 | |||
| 93 | /* | 99 | /* |
| 94 | * bio flags | 100 | * bio flags |
| 95 | */ | 101 | */ |
| @@ -105,6 +111,13 @@ struct bio { | |||
| 105 | #define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ | 111 | #define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ |
| 106 | #define BIO_QUIET 10 /* Make BIO Quiet */ | 112 | #define BIO_QUIET 10 /* Make BIO Quiet */ |
| 107 | #define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ | 113 | #define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ |
| 114 | |||
| 115 | /* | ||
| 116 | * Flags starting here get preserved by bio_reset() - this includes | ||
| 117 | * BIO_POOL_IDX() | ||
| 118 | */ | ||
| 119 | #define BIO_RESET_BITS 12 | ||
| 120 | |||
| 108 | #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) | 121 | #define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) |
| 109 | 122 | ||
| 110 | /* | 123 | /* |
| @@ -134,6 +147,7 @@ enum rq_flag_bits { | |||
| 134 | __REQ_PRIO, /* boost priority in cfq */ | 147 | __REQ_PRIO, /* boost priority in cfq */ |
| 135 | __REQ_DISCARD, /* request to discard sectors */ | 148 | __REQ_DISCARD, /* request to discard sectors */ |
| 136 | __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ | 149 | __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ |
| 150 | __REQ_WRITE_SAME, /* write same block many times */ | ||
| 137 | 151 | ||
| 138 | __REQ_NOIDLE, /* don't anticipate more IO after this one */ | 152 | __REQ_NOIDLE, /* don't anticipate more IO after this one */ |
| 139 | __REQ_FUA, /* forced unit access */ | 153 | __REQ_FUA, /* forced unit access */ |
| @@ -172,15 +186,21 @@ enum rq_flag_bits { | |||
| 172 | #define REQ_META (1 << __REQ_META) | 186 | #define REQ_META (1 << __REQ_META) |
| 173 | #define REQ_PRIO (1 << __REQ_PRIO) | 187 | #define REQ_PRIO (1 << __REQ_PRIO) |
| 174 | #define REQ_DISCARD (1 << __REQ_DISCARD) | 188 | #define REQ_DISCARD (1 << __REQ_DISCARD) |
| 189 | #define REQ_WRITE_SAME (1 << __REQ_WRITE_SAME) | ||
| 175 | #define REQ_NOIDLE (1 << __REQ_NOIDLE) | 190 | #define REQ_NOIDLE (1 << __REQ_NOIDLE) |
| 176 | 191 | ||
| 177 | #define REQ_FAILFAST_MASK \ | 192 | #define REQ_FAILFAST_MASK \ |
| 178 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) | 193 | (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) |
| 179 | #define REQ_COMMON_MASK \ | 194 | #define REQ_COMMON_MASK \ |
| 180 | (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ | 195 | (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ |
| 181 | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) | 196 | REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ |
| 197 | REQ_SECURE) | ||
| 182 | #define REQ_CLONE_MASK REQ_COMMON_MASK | 198 | #define REQ_CLONE_MASK REQ_COMMON_MASK |
| 183 | 199 | ||
| 200 | /* This mask is used for both bio and request merge checking */ | ||
| 201 | #define REQ_NOMERGE_FLAGS \ | ||
| 202 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) | ||
| 203 | |||
| 184 | #define REQ_RAHEAD (1 << __REQ_RAHEAD) | 204 | #define REQ_RAHEAD (1 << __REQ_RAHEAD) |
| 185 | #define REQ_THROTTLED (1 << __REQ_THROTTLED) | 205 | #define REQ_THROTTLED (1 << __REQ_THROTTLED) |
| 186 | 206 | ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4a2ab7c85393..1756001210d2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
| @@ -270,6 +270,7 @@ struct queue_limits { | |||
| 270 | unsigned int io_min; | 270 | unsigned int io_min; |
| 271 | unsigned int io_opt; | 271 | unsigned int io_opt; |
| 272 | unsigned int max_discard_sectors; | 272 | unsigned int max_discard_sectors; |
| 273 | unsigned int max_write_same_sectors; | ||
| 273 | unsigned int discard_granularity; | 274 | unsigned int discard_granularity; |
| 274 | unsigned int discard_alignment; | 275 | unsigned int discard_alignment; |
| 275 | 276 | ||
| @@ -540,8 +541,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) | |||
| 540 | 541 | ||
| 541 | #define blk_account_rq(rq) \ | 542 | #define blk_account_rq(rq) \ |
| 542 | (((rq)->cmd_flags & REQ_STARTED) && \ | 543 | (((rq)->cmd_flags & REQ_STARTED) && \ |
| 543 | ((rq)->cmd_type == REQ_TYPE_FS || \ | 544 | ((rq)->cmd_type == REQ_TYPE_FS)) |
| 544 | ((rq)->cmd_flags & REQ_DISCARD))) | ||
| 545 | 545 | ||
| 546 | #define blk_pm_request(rq) \ | 546 | #define blk_pm_request(rq) \ |
| 547 | ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ | 547 | ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ |
| @@ -595,17 +595,39 @@ static inline void blk_clear_rl_full(struct request_list *rl, bool sync) | |||
| 595 | rl->flags &= ~flag; | 595 | rl->flags &= ~flag; |
| 596 | } | 596 | } |
| 597 | 597 | ||
| 598 | static inline bool rq_mergeable(struct request *rq) | ||
| 599 | { | ||
| 600 | if (rq->cmd_type != REQ_TYPE_FS) | ||
| 601 | return false; | ||
| 598 | 602 | ||
| 599 | /* | 603 | if (rq->cmd_flags & REQ_NOMERGE_FLAGS) |
| 600 | * mergeable request must not have _NOMERGE or _BARRIER bit set, nor may | 604 | return false; |
| 601 | * it already be started by driver. | 605 | |
| 602 | */ | 606 | return true; |
| 603 | #define RQ_NOMERGE_FLAGS \ | 607 | } |
| 604 | (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_DISCARD) | 608 | |
| 605 | #define rq_mergeable(rq) \ | 609 | static inline bool blk_check_merge_flags(unsigned int flags1, |
| 606 | (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ | 610 | unsigned int flags2) |
| 607 | (((rq)->cmd_flags & REQ_DISCARD) || \ | 611 | { |
| 608 | (rq)->cmd_type == REQ_TYPE_FS)) | 612 | if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD)) |
| 613 | return false; | ||
| 614 | |||
| 615 | if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE)) | ||
| 616 | return false; | ||
| 617 | |||
| 618 | if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME)) | ||
| 619 | return false; | ||
| 620 | |||
| 621 | return true; | ||
| 622 | } | ||
| 623 | |||
| 624 | static inline bool blk_write_same_mergeable(struct bio *a, struct bio *b) | ||
| 625 | { | ||
| 626 | if (bio_data(a) == bio_data(b)) | ||
| 627 | return true; | ||
| 628 | |||
| 629 | return false; | ||
| 630 | } | ||
| 609 | 631 | ||
| 610 | /* | 632 | /* |
| 611 | * q->prep_rq_fn return values | 633 | * q->prep_rq_fn return values |
| @@ -802,6 +824,28 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) | |||
| 802 | return blk_rq_cur_bytes(rq) >> 9; | 824 | return blk_rq_cur_bytes(rq) >> 9; |
| 803 | } | 825 | } |
| 804 | 826 | ||
| 827 | static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, | ||
| 828 | unsigned int cmd_flags) | ||
| 829 | { | ||
| 830 | if (unlikely(cmd_flags & REQ_DISCARD)) | ||
| 831 | return q->limits.max_discard_sectors; | ||
| 832 | |||
| 833 | if (unlikely(cmd_flags & REQ_WRITE_SAME)) | ||
| 834 | return q->limits.max_write_same_sectors; | ||
| 835 | |||
| 836 | return q->limits.max_sectors; | ||
| 837 | } | ||
| 838 | |||
| 839 | static inline unsigned int blk_rq_get_max_sectors(struct request *rq) | ||
| 840 | { | ||
| 841 | struct request_queue *q = rq->q; | ||
| 842 | |||
| 843 | if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC)) | ||
| 844 | return q->limits.max_hw_sectors; | ||
| 845 | |||
| 846 | return blk_queue_get_max_sectors(q, rq->cmd_flags); | ||
| 847 | } | ||
| 848 | |||
| 805 | /* | 849 | /* |
| 806 | * Request issue related functions. | 850 | * Request issue related functions. |
| 807 | */ | 851 | */ |
| @@ -857,6 +901,8 @@ extern void blk_queue_max_segments(struct request_queue *, unsigned short); | |||
| 857 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); | 901 | extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); |
| 858 | extern void blk_queue_max_discard_sectors(struct request_queue *q, | 902 | extern void blk_queue_max_discard_sectors(struct request_queue *q, |
| 859 | unsigned int max_discard_sectors); | 903 | unsigned int max_discard_sectors); |
| 904 | extern void blk_queue_max_write_same_sectors(struct request_queue *q, | ||
| 905 | unsigned int max_write_same_sectors); | ||
| 860 | extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); | 906 | extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); |
| 861 | extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); | 907 | extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); |
| 862 | extern void blk_queue_alignment_offset(struct request_queue *q, | 908 | extern void blk_queue_alignment_offset(struct request_queue *q, |
| @@ -987,6 +1033,8 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, | |||
| 987 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); | 1033 | extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); |
| 988 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | 1034 | extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, |
| 989 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); | 1035 | sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); |
| 1036 | extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, | ||
| 1037 | sector_t nr_sects, gfp_t gfp_mask, struct page *page); | ||
| 990 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, | 1038 | extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, |
| 991 | sector_t nr_sects, gfp_t gfp_mask); | 1039 | sector_t nr_sects, gfp_t gfp_mask); |
| 992 | static inline int sb_issue_discard(struct super_block *sb, sector_t block, | 1040 | static inline int sb_issue_discard(struct super_block *sb, sector_t block, |
| @@ -1164,6 +1212,16 @@ static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) | |||
| 1164 | return queue_discard_zeroes_data(bdev_get_queue(bdev)); | 1212 | return queue_discard_zeroes_data(bdev_get_queue(bdev)); |
| 1165 | } | 1213 | } |
| 1166 | 1214 | ||
| 1215 | static inline unsigned int bdev_write_same(struct block_device *bdev) | ||
| 1216 | { | ||
| 1217 | struct request_queue *q = bdev_get_queue(bdev); | ||
| 1218 | |||
| 1219 | if (q) | ||
| 1220 | return q->limits.max_write_same_sectors; | ||
| 1221 | |||
| 1222 | return 0; | ||
| 1223 | } | ||
| 1224 | |||
| 1167 | static inline int queue_dma_alignment(struct request_queue *q) | 1225 | static inline int queue_dma_alignment(struct request_queue *q) |
| 1168 | { | 1226 | { |
| 1169 | return q ? q->dma_alignment : 511; | 1227 | return q ? q->dma_alignment : 511; |
diff --git a/include/linux/fs.h b/include/linux/fs.h index c617ed024df8..39f3e12ca752 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -335,6 +335,7 @@ struct inodes_stat_t { | |||
| 335 | #define BLKDISCARDZEROES _IO(0x12,124) | 335 | #define BLKDISCARDZEROES _IO(0x12,124) |
| 336 | #define BLKSECDISCARD _IO(0x12,125) | 336 | #define BLKSECDISCARD _IO(0x12,125) |
| 337 | #define BLKROTATIONAL _IO(0x12,126) | 337 | #define BLKROTATIONAL _IO(0x12,126) |
| 338 | #define BLKZEROOUT _IO(0x12,127) | ||
| 338 | 339 | ||
| 339 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ | 340 | #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ |
| 340 | #define FIBMAP _IO(0x00,1) /* bmap access */ | 341 | #define FIBMAP _IO(0x00,1) /* bmap access */ |
| @@ -415,6 +416,7 @@ struct inodes_stat_t { | |||
| 415 | #include <linux/migrate_mode.h> | 416 | #include <linux/migrate_mode.h> |
| 416 | #include <linux/uidgid.h> | 417 | #include <linux/uidgid.h> |
| 417 | #include <linux/lockdep.h> | 418 | #include <linux/lockdep.h> |
| 419 | #include <linux/percpu-rwsem.h> | ||
| 418 | 420 | ||
| 419 | #include <asm/byteorder.h> | 421 | #include <asm/byteorder.h> |
| 420 | 422 | ||
| @@ -724,6 +726,8 @@ struct block_device { | |||
| 724 | int bd_fsfreeze_count; | 726 | int bd_fsfreeze_count; |
| 725 | /* Mutex for freeze */ | 727 | /* Mutex for freeze */ |
| 726 | struct mutex bd_fsfreeze_mutex; | 728 | struct mutex bd_fsfreeze_mutex; |
| 729 | /* A semaphore that prevents I/O while block size is being changed */ | ||
| 730 | struct percpu_rw_semaphore bd_block_size_semaphore; | ||
| 727 | }; | 731 | }; |
| 728 | 732 | ||
| 729 | /* | 733 | /* |
| @@ -2570,6 +2574,8 @@ extern int generic_segment_checks(const struct iovec *iov, | |||
| 2570 | unsigned long *nr_segs, size_t *count, int access_flags); | 2574 | unsigned long *nr_segs, size_t *count, int access_flags); |
| 2571 | 2575 | ||
| 2572 | /* fs/block_dev.c */ | 2576 | /* fs/block_dev.c */ |
| 2577 | extern ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, | ||
| 2578 | unsigned long nr_segs, loff_t pos); | ||
| 2573 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | 2579 | extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, |
| 2574 | unsigned long nr_segs, loff_t pos); | 2580 | unsigned long nr_segs, loff_t pos); |
| 2575 | extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, | 2581 | extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, |
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h new file mode 100644 index 000000000000..cf80f7e5277f --- /dev/null +++ b/include/linux/percpu-rwsem.h | |||
| @@ -0,0 +1,89 @@ | |||
| 1 | #ifndef _LINUX_PERCPU_RWSEM_H | ||
| 2 | #define _LINUX_PERCPU_RWSEM_H | ||
| 3 | |||
| 4 | #include <linux/mutex.h> | ||
| 5 | #include <linux/percpu.h> | ||
| 6 | #include <linux/rcupdate.h> | ||
| 7 | #include <linux/delay.h> | ||
| 8 | |||
| 9 | struct percpu_rw_semaphore { | ||
| 10 | unsigned __percpu *counters; | ||
| 11 | bool locked; | ||
| 12 | struct mutex mtx; | ||
| 13 | }; | ||
| 14 | |||
| 15 | static inline void percpu_down_read(struct percpu_rw_semaphore *p) | ||
| 16 | { | ||
| 17 | rcu_read_lock(); | ||
| 18 | if (unlikely(p->locked)) { | ||
| 19 | rcu_read_unlock(); | ||
| 20 | mutex_lock(&p->mtx); | ||
| 21 | this_cpu_inc(*p->counters); | ||
| 22 | mutex_unlock(&p->mtx); | ||
| 23 | return; | ||
| 24 | } | ||
| 25 | this_cpu_inc(*p->counters); | ||
| 26 | rcu_read_unlock(); | ||
| 27 | } | ||
| 28 | |||
| 29 | static inline void percpu_up_read(struct percpu_rw_semaphore *p) | ||
| 30 | { | ||
| 31 | /* | ||
| 32 | * On X86, write operation in this_cpu_dec serves as a memory unlock | ||
| 33 | * barrier (i.e. memory accesses may be moved before the write, but | ||
| 34 | * no memory accesses are moved past the write). | ||
| 35 | * On other architectures this may not be the case, so we need smp_mb() | ||
| 36 | * there. | ||
| 37 | */ | ||
| 38 | #if defined(CONFIG_X86) && (!defined(CONFIG_X86_PPRO_FENCE) && !defined(CONFIG_X86_OOSTORE)) | ||
| 39 | barrier(); | ||
| 40 | #else | ||
| 41 | smp_mb(); | ||
| 42 | #endif | ||
| 43 | this_cpu_dec(*p->counters); | ||
| 44 | } | ||
| 45 | |||
| 46 | static inline unsigned __percpu_count(unsigned __percpu *counters) | ||
| 47 | { | ||
| 48 | unsigned total = 0; | ||
| 49 | int cpu; | ||
| 50 | |||
| 51 | for_each_possible_cpu(cpu) | ||
| 52 | total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu)); | ||
| 53 | |||
| 54 | return total; | ||
| 55 | } | ||
| 56 | |||
| 57 | static inline void percpu_down_write(struct percpu_rw_semaphore *p) | ||
| 58 | { | ||
| 59 | mutex_lock(&p->mtx); | ||
| 60 | p->locked = true; | ||
| 61 | synchronize_rcu(); | ||
| 62 | while (__percpu_count(p->counters)) | ||
| 63 | msleep(1); | ||
| 64 | smp_rmb(); /* paired with smp_mb() in percpu_sem_up_read() */ | ||
| 65 | } | ||
| 66 | |||
| 67 | static inline void percpu_up_write(struct percpu_rw_semaphore *p) | ||
| 68 | { | ||
| 69 | p->locked = false; | ||
| 70 | mutex_unlock(&p->mtx); | ||
| 71 | } | ||
| 72 | |||
| 73 | static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p) | ||
| 74 | { | ||
| 75 | p->counters = alloc_percpu(unsigned); | ||
| 76 | if (unlikely(!p->counters)) | ||
| 77 | return -ENOMEM; | ||
| 78 | p->locked = false; | ||
| 79 | mutex_init(&p->mtx); | ||
| 80 | return 0; | ||
| 81 | } | ||
| 82 | |||
| 83 | static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p) | ||
| 84 | { | ||
| 85 | free_percpu(p->counters); | ||
| 86 | p->counters = NULL; /* catch use after free bugs */ | ||
| 87 | } | ||
| 88 | |||
| 89 | #endif | ||
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 7b600da9a635..4bd6c06eb28e 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h | |||
| @@ -201,6 +201,7 @@ static inline void *sg_virt(struct scatterlist *sg) | |||
| 201 | return page_address(sg_page(sg)) + sg->offset; | 201 | return page_address(sg_page(sg)) + sg->offset; |
| 202 | } | 202 | } |
| 203 | 203 | ||
| 204 | int sg_nents(struct scatterlist *sg); | ||
| 204 | struct scatterlist *sg_next(struct scatterlist *); | 205 | struct scatterlist *sg_next(struct scatterlist *); |
| 205 | struct scatterlist *sg_last(struct scatterlist *s, unsigned int); | 206 | struct scatterlist *sg_last(struct scatterlist *s, unsigned int); |
| 206 | void sg_init_table(struct scatterlist *, unsigned int); | 207 | void sg_init_table(struct scatterlist *, unsigned int); |
diff --git a/lib/scatterlist.c b/lib/scatterlist.c index e76d85cf3175..3675452b23ca 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c | |||
| @@ -39,6 +39,25 @@ struct scatterlist *sg_next(struct scatterlist *sg) | |||
| 39 | EXPORT_SYMBOL(sg_next); | 39 | EXPORT_SYMBOL(sg_next); |
| 40 | 40 | ||
| 41 | /** | 41 | /** |
| 42 | * sg_nents - return total count of entries in scatterlist | ||
| 43 | * @sg: The scatterlist | ||
| 44 | * | ||
| 45 | * Description: | ||
| 46 | * Allows to know how many entries are in sg, taking into acount | ||
| 47 | * chaining as well | ||
| 48 | * | ||
| 49 | **/ | ||
| 50 | int sg_nents(struct scatterlist *sg) | ||
| 51 | { | ||
| 52 | int nents; | ||
| 53 | for (nents = 0; sg; sg = sg_next(sg)) | ||
| 54 | nents++; | ||
| 55 | return nents; | ||
| 56 | } | ||
| 57 | EXPORT_SYMBOL(sg_nents); | ||
| 58 | |||
| 59 | |||
| 60 | /** | ||
| 42 | * sg_last - return the last scatterlist entry in a list | 61 | * sg_last - return the last scatterlist entry in a list |
| 43 | * @sgl: First entry in the scatterlist | 62 | * @sgl: First entry in the scatterlist |
| 44 | * @nents: Number of entries in the scatterlist | 63 | * @nents: Number of entries in the scatterlist |
