diff options
author | Martin K. Petersen <martin.petersen@oracle.com> | 2012-09-18 12:19:27 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-09-20 08:31:45 -0400 |
commit | 4363ac7c13a9a4b763c6e8d9fdbfc2468f3b8ca4 (patch) | |
tree | 010b05699eb9544b9cdfe5e1b3affdaea80132e7 /block | |
parent | f31dc1cd490539e2b62a126bc4dc2495b165d772 (diff) |
block: Implement support for WRITE SAME
The WRITE SAME command supported on some SCSI devices allows the same
block to be efficiently replicated throughout a block range. Only a
single logical block is transferred from the host and the storage device
writes the same data to all blocks described by the I/O.
This patch implements support for WRITE SAME in the block layer. The
blkdev_issue_write_same() function can be used by filesystems and block
drivers to replicate a buffer across a block range. This can be used to
efficiently initialize software RAID devices, etc.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 14 | ||||
-rw-r--r-- | block/blk-lib.c | 74 | ||||
-rw-r--r-- | block/blk-merge.c | 9 | ||||
-rw-r--r-- | block/blk-settings.c | 16 | ||||
-rw-r--r-- | block/blk-sysfs.c | 13 |
5 files changed, 124 insertions, 2 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 33eded00c5b1..3b080541098e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -1704,6 +1704,11 @@ generic_make_request_checks(struct bio *bio) | |||
1704 | goto end_io; | 1704 | goto end_io; |
1705 | } | 1705 | } |
1706 | 1706 | ||
1707 | if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { | ||
1708 | err = -EOPNOTSUPP; | ||
1709 | goto end_io; | ||
1710 | } | ||
1711 | |||
1707 | /* | 1712 | /* |
1708 | * Various block parts want %current->io_context and lazy ioc | 1713 | * Various block parts want %current->io_context and lazy ioc |
1709 | * allocation ends up trading a lot of pain for a small amount of | 1714 | * allocation ends up trading a lot of pain for a small amount of |
@@ -1809,8 +1814,6 @@ EXPORT_SYMBOL(generic_make_request); | |||
1809 | */ | 1814 | */ |
1810 | void submit_bio(int rw, struct bio *bio) | 1815 | void submit_bio(int rw, struct bio *bio) |
1811 | { | 1816 | { |
1812 | int count = bio_sectors(bio); | ||
1813 | |||
1814 | bio->bi_rw |= rw; | 1817 | bio->bi_rw |= rw; |
1815 | 1818 | ||
1816 | /* | 1819 | /* |
@@ -1818,6 +1821,13 @@ void submit_bio(int rw, struct bio *bio) | |||
1818 | * go through the normal accounting stuff before submission. | 1821 | * go through the normal accounting stuff before submission. |
1819 | */ | 1822 | */ |
1820 | if (bio_has_data(bio)) { | 1823 | if (bio_has_data(bio)) { |
1824 | unsigned int count; | ||
1825 | |||
1826 | if (unlikely(rw & REQ_WRITE_SAME)) | ||
1827 | count = bdev_logical_block_size(bio->bi_bdev) >> 9; | ||
1828 | else | ||
1829 | count = bio_sectors(bio); | ||
1830 | |||
1821 | if (rw & WRITE) { | 1831 | if (rw & WRITE) { |
1822 | count_vm_events(PGPGOUT, count); | 1832 | count_vm_events(PGPGOUT, count); |
1823 | } else { | 1833 | } else { |
diff --git a/block/blk-lib.c b/block/blk-lib.c index 19cc761cacb2..a062543c58ac 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c | |||
@@ -130,6 +130,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, | |||
130 | EXPORT_SYMBOL(blkdev_issue_discard); | 130 | EXPORT_SYMBOL(blkdev_issue_discard); |
131 | 131 | ||
132 | /** | 132 | /** |
133 | * blkdev_issue_write_same - queue a write same operation | ||
134 | * @bdev: target blockdev | ||
135 | * @sector: start sector | ||
136 | * @nr_sects: number of sectors to write | ||
137 | * @gfp_mask: memory allocation flags (for bio_alloc) | ||
138 | * @page: page containing data to write | ||
139 | * | ||
140 | * Description: | ||
141 | * Issue a write same request for the sectors in question. | ||
142 | */ | ||
143 | int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, | ||
144 | sector_t nr_sects, gfp_t gfp_mask, | ||
145 | struct page *page) | ||
146 | { | ||
147 | DECLARE_COMPLETION_ONSTACK(wait); | ||
148 | struct request_queue *q = bdev_get_queue(bdev); | ||
149 | unsigned int max_write_same_sectors; | ||
150 | struct bio_batch bb; | ||
151 | struct bio *bio; | ||
152 | int ret = 0; | ||
153 | |||
154 | if (!q) | ||
155 | return -ENXIO; | ||
156 | |||
157 | max_write_same_sectors = q->limits.max_write_same_sectors; | ||
158 | |||
159 | if (max_write_same_sectors == 0) | ||
160 | return -EOPNOTSUPP; | ||
161 | |||
162 | atomic_set(&bb.done, 1); | ||
163 | bb.flags = 1 << BIO_UPTODATE; | ||
164 | bb.wait = &wait; | ||
165 | |||
166 | while (nr_sects) { | ||
167 | bio = bio_alloc(gfp_mask, 1); | ||
168 | if (!bio) { | ||
169 | ret = -ENOMEM; | ||
170 | break; | ||
171 | } | ||
172 | |||
173 | bio->bi_sector = sector; | ||
174 | bio->bi_end_io = bio_batch_end_io; | ||
175 | bio->bi_bdev = bdev; | ||
176 | bio->bi_private = &bb; | ||
177 | bio->bi_vcnt = 1; | ||
178 | bio->bi_io_vec->bv_page = page; | ||
179 | bio->bi_io_vec->bv_offset = 0; | ||
180 | bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); | ||
181 | |||
182 | if (nr_sects > max_write_same_sectors) { | ||
183 | bio->bi_size = max_write_same_sectors << 9; | ||
184 | nr_sects -= max_write_same_sectors; | ||
185 | sector += max_write_same_sectors; | ||
186 | } else { | ||
187 | bio->bi_size = nr_sects << 9; | ||
188 | nr_sects = 0; | ||
189 | } | ||
190 | |||
191 | atomic_inc(&bb.done); | ||
192 | submit_bio(REQ_WRITE | REQ_WRITE_SAME, bio); | ||
193 | } | ||
194 | |||
195 | /* Wait for bios in-flight */ | ||
196 | if (!atomic_dec_and_test(&bb.done)) | ||
197 | wait_for_completion(&wait); | ||
198 | |||
199 | if (!test_bit(BIO_UPTODATE, &bb.flags)) | ||
200 | ret = -ENOTSUPP; | ||
201 | |||
202 | return ret; | ||
203 | } | ||
204 | EXPORT_SYMBOL(blkdev_issue_write_same); | ||
205 | |||
206 | /** | ||
133 | * blkdev_issue_zeroout - generate number of zero filed write bios | 207 | * blkdev_issue_zeroout - generate number of zero filed write bios |
134 | * @bdev: blockdev to issue | 208 | * @bdev: blockdev to issue |
135 | * @sector: start sector | 209 | * @sector: start sector |
diff --git a/block/blk-merge.c b/block/blk-merge.c index 642b862608a1..936a110de0b9 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -419,6 +419,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, | |||
419 | || next->special) | 419 | || next->special) |
420 | return 0; | 420 | return 0; |
421 | 421 | ||
422 | if (req->cmd_flags & REQ_WRITE_SAME && | ||
423 | !blk_write_same_mergeable(req->bio, next->bio)) | ||
424 | return 0; | ||
425 | |||
422 | /* | 426 | /* |
423 | * If we are allowed to merge, then append bio list | 427 | * If we are allowed to merge, then append bio list |
424 | * from next to rq and release next. merge_requests_fn | 428 | * from next to rq and release next. merge_requests_fn |
@@ -518,6 +522,11 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio) | |||
518 | if (bio_integrity(bio) != blk_integrity_rq(rq)) | 522 | if (bio_integrity(bio) != blk_integrity_rq(rq)) |
519 | return false; | 523 | return false; |
520 | 524 | ||
525 | /* must be using the same buffer */ | ||
526 | if (rq->cmd_flags & REQ_WRITE_SAME && | ||
527 | !blk_write_same_mergeable(rq->bio, bio)) | ||
528 | return false; | ||
529 | |||
521 | return true; | 530 | return true; |
522 | } | 531 | } |
523 | 532 | ||
diff --git a/block/blk-settings.c b/block/blk-settings.c index 565a6786032f..779bb7646bcd 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c | |||
@@ -113,6 +113,7 @@ void blk_set_default_limits(struct queue_limits *lim) | |||
113 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; | 113 | lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; |
114 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; | 114 | lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; |
115 | lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; | 115 | lim->max_sectors = lim->max_hw_sectors = BLK_SAFE_MAX_SECTORS; |
116 | lim->max_write_same_sectors = 0; | ||
116 | lim->max_discard_sectors = 0; | 117 | lim->max_discard_sectors = 0; |
117 | lim->discard_granularity = 0; | 118 | lim->discard_granularity = 0; |
118 | lim->discard_alignment = 0; | 119 | lim->discard_alignment = 0; |
@@ -144,6 +145,7 @@ void blk_set_stacking_limits(struct queue_limits *lim) | |||
144 | lim->max_segments = USHRT_MAX; | 145 | lim->max_segments = USHRT_MAX; |
145 | lim->max_hw_sectors = UINT_MAX; | 146 | lim->max_hw_sectors = UINT_MAX; |
146 | lim->max_sectors = UINT_MAX; | 147 | lim->max_sectors = UINT_MAX; |
148 | lim->max_write_same_sectors = UINT_MAX; | ||
147 | } | 149 | } |
148 | EXPORT_SYMBOL(blk_set_stacking_limits); | 150 | EXPORT_SYMBOL(blk_set_stacking_limits); |
149 | 151 | ||
@@ -286,6 +288,18 @@ void blk_queue_max_discard_sectors(struct request_queue *q, | |||
286 | EXPORT_SYMBOL(blk_queue_max_discard_sectors); | 288 | EXPORT_SYMBOL(blk_queue_max_discard_sectors); |
287 | 289 | ||
288 | /** | 290 | /** |
291 | * blk_queue_max_write_same_sectors - set max sectors for a single write same | ||
292 | * @q: the request queue for the device | ||
293 | * @max_write_same_sectors: maximum number of sectors to write per command | ||
294 | **/ | ||
295 | void blk_queue_max_write_same_sectors(struct request_queue *q, | ||
296 | unsigned int max_write_same_sectors) | ||
297 | { | ||
298 | q->limits.max_write_same_sectors = max_write_same_sectors; | ||
299 | } | ||
300 | EXPORT_SYMBOL(blk_queue_max_write_same_sectors); | ||
301 | |||
302 | /** | ||
289 | * blk_queue_max_segments - set max hw segments for a request for this queue | 303 | * blk_queue_max_segments - set max hw segments for a request for this queue |
290 | * @q: the request queue for the device | 304 | * @q: the request queue for the device |
291 | * @max_segments: max number of segments | 305 | * @max_segments: max number of segments |
@@ -510,6 +524,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, | |||
510 | 524 | ||
511 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); | 525 | t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); |
512 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); | 526 | t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); |
527 | t->max_write_same_sectors = min(t->max_write_same_sectors, | ||
528 | b->max_write_same_sectors); | ||
513 | t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); | 529 | t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); |
514 | 530 | ||
515 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, | 531 | t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index ea51d827a0bb..247dbfd42621 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -180,6 +180,13 @@ static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *pag | |||
180 | return queue_var_show(queue_discard_zeroes_data(q), page); | 180 | return queue_var_show(queue_discard_zeroes_data(q), page); |
181 | } | 181 | } |
182 | 182 | ||
183 | static ssize_t queue_write_same_max_show(struct request_queue *q, char *page) | ||
184 | { | ||
185 | return sprintf(page, "%llu\n", | ||
186 | (unsigned long long)q->limits.max_write_same_sectors << 9); | ||
187 | } | ||
188 | |||
189 | |||
183 | static ssize_t | 190 | static ssize_t |
184 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) | 191 | queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) |
185 | { | 192 | { |
@@ -385,6 +392,11 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { | |||
385 | .show = queue_discard_zeroes_data_show, | 392 | .show = queue_discard_zeroes_data_show, |
386 | }; | 393 | }; |
387 | 394 | ||
395 | static struct queue_sysfs_entry queue_write_same_max_entry = { | ||
396 | .attr = {.name = "write_same_max_bytes", .mode = S_IRUGO }, | ||
397 | .show = queue_write_same_max_show, | ||
398 | }; | ||
399 | |||
388 | static struct queue_sysfs_entry queue_nonrot_entry = { | 400 | static struct queue_sysfs_entry queue_nonrot_entry = { |
389 | .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, | 401 | .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, |
390 | .show = queue_show_nonrot, | 402 | .show = queue_show_nonrot, |
@@ -432,6 +444,7 @@ static struct attribute *default_attrs[] = { | |||
432 | &queue_discard_granularity_entry.attr, | 444 | &queue_discard_granularity_entry.attr, |
433 | &queue_discard_max_entry.attr, | 445 | &queue_discard_max_entry.attr, |
434 | &queue_discard_zeroes_data_entry.attr, | 446 | &queue_discard_zeroes_data_entry.attr, |
447 | &queue_write_same_max_entry.attr, | ||
435 | &queue_nonrot_entry.attr, | 448 | &queue_nonrot_entry.attr, |
436 | &queue_nomerges_entry.attr, | 449 | &queue_nomerges_entry.attr, |
437 | &queue_rq_affinity_entry.attr, | 450 | &queue_rq_affinity_entry.attr, |