diff options
author | Martin K. Petersen <martin.petersen@oracle.com> | 2008-06-30 14:04:41 -0400 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2008-07-03 07:21:13 -0400 |
commit | 7ba1ba12eeef0aa7113beb16410ef8b7c748e18b (patch) | |
tree | 4629aabe88bf095d58eabd2f451207695bb35b08 /block | |
parent | 51d654e1d885607a6edd02b337105fa5c28b6d33 (diff) |
block: Block layer data integrity support
Some block devices support verifying the integrity of requests by way
of checksums or other protection information that is submitted along
with the I/O.
This patch implements support for generating and verifying integrity
metadata, as well as correctly merging, splitting and cloning bios and
requests that have this extra information attached.
See Documentation/block/data-integrity.txt for more information.
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 12 | ||||
-rw-r--r-- | block/Makefile | 1 | ||||
-rw-r--r-- | block/blk-core.c | 7 | ||||
-rw-r--r-- | block/blk-integrity.c | 382 | ||||
-rw-r--r-- | block/blk-merge.c | 3 | ||||
-rw-r--r-- | block/blk.h | 8 | ||||
-rw-r--r-- | block/elevator.c | 6 |
7 files changed, 419 insertions, 0 deletions
diff --git a/block/Kconfig b/block/Kconfig index 3e97f2bc446..1ab7c15c8d7 100644 --- a/block/Kconfig +++ b/block/Kconfig | |||
@@ -81,6 +81,18 @@ config BLK_DEV_BSG | |||
81 | 81 | ||
82 | If unsure, say N. | 82 | If unsure, say N. |
83 | 83 | ||
84 | config BLK_DEV_INTEGRITY | ||
85 | bool "Block layer data integrity support" | ||
86 | ---help--- | ||
87 | Some storage devices allow extra information to be | ||
88 | stored/retrieved to help protect the data. The block layer | ||
89 | data integrity option provides hooks which can be used by | ||
90 | filesystems to ensure better data integrity. | ||
91 | |||
92 | Say yes here if you have a storage device that provides the | ||
93 | T10/SCSI Data Integrity Field or the T13/ATA External Path | ||
94 | Protection. If in doubt, say N. | ||
95 | |||
84 | endif # BLOCK | 96 | endif # BLOCK |
85 | 97 | ||
86 | config BLOCK_COMPAT | 98 | config BLOCK_COMPAT |
diff --git a/block/Makefile b/block/Makefile index 5a43c7d7959..045f7b62e4b 100644 --- a/block/Makefile +++ b/block/Makefile | |||
@@ -14,3 +14,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o | |||
14 | 14 | ||
15 | obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o | 15 | obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o |
16 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o | 16 | obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o |
17 | obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o | ||
diff --git a/block/blk-core.c b/block/blk-core.c index 1905aaba49f..e0fb0bcc0c1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c | |||
@@ -143,6 +143,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio, | |||
143 | 143 | ||
144 | bio->bi_size -= nbytes; | 144 | bio->bi_size -= nbytes; |
145 | bio->bi_sector += (nbytes >> 9); | 145 | bio->bi_sector += (nbytes >> 9); |
146 | |||
147 | if (bio_integrity(bio)) | ||
148 | bio_integrity_advance(bio, nbytes); | ||
149 | |||
146 | if (bio->bi_size == 0) | 150 | if (bio->bi_size == 0) |
147 | bio_endio(bio, error); | 151 | bio_endio(bio, error); |
148 | } else { | 152 | } else { |
@@ -1381,6 +1385,9 @@ end_io: | |||
1381 | */ | 1385 | */ |
1382 | blk_partition_remap(bio); | 1386 | blk_partition_remap(bio); |
1383 | 1387 | ||
1388 | if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) | ||
1389 | goto end_io; | ||
1390 | |||
1384 | if (old_sector != -1) | 1391 | if (old_sector != -1) |
1385 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, | 1392 | blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, |
1386 | old_sector); | 1393 | old_sector); |
diff --git a/block/blk-integrity.c b/block/blk-integrity.c new file mode 100644 index 00000000000..65f23ef38bb --- /dev/null +++ b/block/blk-integrity.c | |||
@@ -0,0 +1,382 @@ | |||
1 | /* | ||
2 | * blk-integrity.c - Block layer data integrity extensions | ||
3 | * | ||
4 | * Copyright (C) 2007, 2008 Oracle Corporation | ||
5 | * Written by: Martin K. Petersen <martin.petersen@oracle.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License version | ||
9 | * 2 as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
14 | * General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; see the file COPYING. If not, write to | ||
18 | * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, | ||
19 | * USA. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/blkdev.h> | ||
24 | #include <linux/mempool.h> | ||
25 | #include <linux/bio.h> | ||
26 | #include <linux/scatterlist.h> | ||
27 | |||
28 | #include "blk.h" | ||
29 | |||
30 | static struct kmem_cache *integrity_cachep; | ||
31 | |||
32 | /** | ||
33 | * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements | ||
34 | * @rq: request with integrity metadata attached | ||
35 | * | ||
36 | * Description: Returns the number of elements required in a | ||
37 | * scatterlist corresponding to the integrity metadata in a request. | ||
38 | */ | ||
39 | int blk_rq_count_integrity_sg(struct request *rq) | ||
40 | { | ||
41 | struct bio_vec *iv, *ivprv; | ||
42 | struct req_iterator iter; | ||
43 | unsigned int segments; | ||
44 | |||
45 | ivprv = NULL; | ||
46 | segments = 0; | ||
47 | |||
48 | rq_for_each_integrity_segment(iv, rq, iter) { | ||
49 | |||
50 | if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv)) | ||
51 | segments++; | ||
52 | |||
53 | ivprv = iv; | ||
54 | } | ||
55 | |||
56 | return segments; | ||
57 | } | ||
58 | EXPORT_SYMBOL(blk_rq_count_integrity_sg); | ||
59 | |||
60 | /** | ||
61 | * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist | ||
62 | * @rq: request with integrity metadata attached | ||
63 | * @sglist: target scatterlist | ||
64 | * | ||
65 | * Description: Map the integrity vectors in request into a | ||
66 | * scatterlist. The scatterlist must be big enough to hold all | ||
67 | * elements. I.e. sized using blk_rq_count_integrity_sg(). | ||
68 | */ | ||
69 | int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist) | ||
70 | { | ||
71 | struct bio_vec *iv, *ivprv; | ||
72 | struct req_iterator iter; | ||
73 | struct scatterlist *sg; | ||
74 | unsigned int segments; | ||
75 | |||
76 | ivprv = NULL; | ||
77 | sg = NULL; | ||
78 | segments = 0; | ||
79 | |||
80 | rq_for_each_integrity_segment(iv, rq, iter) { | ||
81 | |||
82 | if (ivprv) { | ||
83 | if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv)) | ||
84 | goto new_segment; | ||
85 | |||
86 | sg->length += iv->bv_len; | ||
87 | } else { | ||
88 | new_segment: | ||
89 | if (!sg) | ||
90 | sg = sglist; | ||
91 | else { | ||
92 | sg->page_link &= ~0x02; | ||
93 | sg = sg_next(sg); | ||
94 | } | ||
95 | |||
96 | sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset); | ||
97 | segments++; | ||
98 | } | ||
99 | |||
100 | ivprv = iv; | ||
101 | } | ||
102 | |||
103 | if (sg) | ||
104 | sg_mark_end(sg); | ||
105 | |||
106 | return segments; | ||
107 | } | ||
108 | EXPORT_SYMBOL(blk_rq_map_integrity_sg); | ||
109 | |||
110 | /** | ||
111 | * blk_integrity_compare - Compare integrity profile of two block devices | ||
112 | * @b1: Device to compare | ||
113 | * @b2: Device to compare | ||
114 | * | ||
115 | * Description: Meta-devices like DM and MD need to verify that all | ||
116 | * sub-devices use the same integrity format before advertising to | ||
117 | * upper layers that they can send/receive integrity metadata. This | ||
118 | * function can be used to check whether two block devices have | ||
119 | * compatible integrity formats. | ||
120 | */ | ||
121 | int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2) | ||
122 | { | ||
123 | struct blk_integrity *b1 = bd1->bd_disk->integrity; | ||
124 | struct blk_integrity *b2 = bd2->bd_disk->integrity; | ||
125 | |||
126 | BUG_ON(bd1->bd_disk == NULL); | ||
127 | BUG_ON(bd2->bd_disk == NULL); | ||
128 | |||
129 | if (!b1 || !b2) | ||
130 | return 0; | ||
131 | |||
132 | if (b1->sector_size != b2->sector_size) { | ||
133 | printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__, | ||
134 | bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, | ||
135 | b1->sector_size, b2->sector_size); | ||
136 | return -1; | ||
137 | } | ||
138 | |||
139 | if (b1->tuple_size != b2->tuple_size) { | ||
140 | printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__, | ||
141 | bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, | ||
142 | b1->tuple_size, b2->tuple_size); | ||
143 | return -1; | ||
144 | } | ||
145 | |||
146 | if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) { | ||
147 | printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__, | ||
148 | bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, | ||
149 | b1->tag_size, b2->tag_size); | ||
150 | return -1; | ||
151 | } | ||
152 | |||
153 | if (strcmp(b1->name, b2->name)) { | ||
154 | printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__, | ||
155 | bd1->bd_disk->disk_name, bd2->bd_disk->disk_name, | ||
156 | b1->name, b2->name); | ||
157 | return -1; | ||
158 | } | ||
159 | |||
160 | return 0; | ||
161 | } | ||
162 | EXPORT_SYMBOL(blk_integrity_compare); | ||
163 | |||
164 | struct integrity_sysfs_entry { | ||
165 | struct attribute attr; | ||
166 | ssize_t (*show)(struct blk_integrity *, char *); | ||
167 | ssize_t (*store)(struct blk_integrity *, const char *, size_t); | ||
168 | }; | ||
169 | |||
170 | static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr, | ||
171 | char *page) | ||
172 | { | ||
173 | struct blk_integrity *bi = | ||
174 | container_of(kobj, struct blk_integrity, kobj); | ||
175 | struct integrity_sysfs_entry *entry = | ||
176 | container_of(attr, struct integrity_sysfs_entry, attr); | ||
177 | |||
178 | return entry->show(bi, page); | ||
179 | } | ||
180 | |||
181 | static ssize_t integrity_attr_store(struct kobject *kobj, struct attribute *attr, | ||
182 | const char *page, size_t count) | ||
183 | { | ||
184 | struct blk_integrity *bi = | ||
185 | container_of(kobj, struct blk_integrity, kobj); | ||
186 | struct integrity_sysfs_entry *entry = | ||
187 | container_of(attr, struct integrity_sysfs_entry, attr); | ||
188 | ssize_t ret = 0; | ||
189 | |||
190 | if (entry->store) | ||
191 | ret = entry->store(bi, page, count); | ||
192 | |||
193 | return ret; | ||
194 | } | ||
195 | |||
196 | static ssize_t integrity_format_show(struct blk_integrity *bi, char *page) | ||
197 | { | ||
198 | if (bi != NULL && bi->name != NULL) | ||
199 | return sprintf(page, "%s\n", bi->name); | ||
200 | else | ||
201 | return sprintf(page, "none\n"); | ||
202 | } | ||
203 | |||
204 | static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page) | ||
205 | { | ||
206 | if (bi != NULL) | ||
207 | return sprintf(page, "%u\n", bi->tag_size); | ||
208 | else | ||
209 | return sprintf(page, "0\n"); | ||
210 | } | ||
211 | |||
212 | static ssize_t integrity_read_store(struct blk_integrity *bi, | ||
213 | const char *page, size_t count) | ||
214 | { | ||
215 | char *p = (char *) page; | ||
216 | unsigned long val = simple_strtoul(p, &p, 10); | ||
217 | |||
218 | if (val) | ||
219 | set_bit(INTEGRITY_FLAG_READ, &bi->flags); | ||
220 | else | ||
221 | clear_bit(INTEGRITY_FLAG_READ, &bi->flags); | ||
222 | |||
223 | return count; | ||
224 | } | ||
225 | |||
226 | static ssize_t integrity_read_show(struct blk_integrity *bi, char *page) | ||
227 | { | ||
228 | return sprintf(page, "%d\n", | ||
229 | test_bit(INTEGRITY_FLAG_READ, &bi->flags) ? 1 : 0); | ||
230 | } | ||
231 | |||
232 | static ssize_t integrity_write_store(struct blk_integrity *bi, | ||
233 | const char *page, size_t count) | ||
234 | { | ||
235 | char *p = (char *) page; | ||
236 | unsigned long val = simple_strtoul(p, &p, 10); | ||
237 | |||
238 | if (val) | ||
239 | set_bit(INTEGRITY_FLAG_WRITE, &bi->flags); | ||
240 | else | ||
241 | clear_bit(INTEGRITY_FLAG_WRITE, &bi->flags); | ||
242 | |||
243 | return count; | ||
244 | } | ||
245 | |||
246 | static ssize_t integrity_write_show(struct blk_integrity *bi, char *page) | ||
247 | { | ||
248 | return sprintf(page, "%d\n", | ||
249 | test_bit(INTEGRITY_FLAG_WRITE, &bi->flags) ? 1 : 0); | ||
250 | } | ||
251 | |||
252 | static struct integrity_sysfs_entry integrity_format_entry = { | ||
253 | .attr = { .name = "format", .mode = S_IRUGO }, | ||
254 | .show = integrity_format_show, | ||
255 | }; | ||
256 | |||
257 | static struct integrity_sysfs_entry integrity_tag_size_entry = { | ||
258 | .attr = { .name = "tag_size", .mode = S_IRUGO }, | ||
259 | .show = integrity_tag_size_show, | ||
260 | }; | ||
261 | |||
262 | static struct integrity_sysfs_entry integrity_read_entry = { | ||
263 | .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR }, | ||
264 | .show = integrity_read_show, | ||
265 | .store = integrity_read_store, | ||
266 | }; | ||
267 | |||
268 | static struct integrity_sysfs_entry integrity_write_entry = { | ||
269 | .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR }, | ||
270 | .show = integrity_write_show, | ||
271 | .store = integrity_write_store, | ||
272 | }; | ||
273 | |||
274 | static struct attribute *integrity_attrs[] = { | ||
275 | &integrity_format_entry.attr, | ||
276 | &integrity_tag_size_entry.attr, | ||
277 | &integrity_read_entry.attr, | ||
278 | &integrity_write_entry.attr, | ||
279 | NULL, | ||
280 | }; | ||
281 | |||
282 | static struct sysfs_ops integrity_ops = { | ||
283 | .show = &integrity_attr_show, | ||
284 | .store = &integrity_attr_store, | ||
285 | }; | ||
286 | |||
287 | static int __init blk_dev_integrity_init(void) | ||
288 | { | ||
289 | integrity_cachep = kmem_cache_create("blkdev_integrity", | ||
290 | sizeof(struct blk_integrity), | ||
291 | 0, SLAB_PANIC, NULL); | ||
292 | return 0; | ||
293 | } | ||
294 | subsys_initcall(blk_dev_integrity_init); | ||
295 | |||
296 | static void blk_integrity_release(struct kobject *kobj) | ||
297 | { | ||
298 | struct blk_integrity *bi = | ||
299 | container_of(kobj, struct blk_integrity, kobj); | ||
300 | |||
301 | kmem_cache_free(integrity_cachep, bi); | ||
302 | } | ||
303 | |||
304 | static struct kobj_type integrity_ktype = { | ||
305 | .default_attrs = integrity_attrs, | ||
306 | .sysfs_ops = &integrity_ops, | ||
307 | .release = blk_integrity_release, | ||
308 | }; | ||
309 | |||
310 | /** | ||
311 | * blk_integrity_register - Register a gendisk as being integrity-capable | ||
312 | * @disk: struct gendisk pointer to make integrity-aware | ||
313 | * @template: integrity profile | ||
314 | * | ||
315 | * Description: When a device needs to advertise itself as being able | ||
316 | * to send/receive integrity metadata it must use this function to | ||
317 | * register the capability with the block layer. The template is a | ||
318 | * blk_integrity struct with values appropriate for the underlying | ||
319 | * hardware. See Documentation/block/data-integrity.txt. | ||
320 | */ | ||
321 | int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template) | ||
322 | { | ||
323 | struct blk_integrity *bi; | ||
324 | |||
325 | BUG_ON(disk == NULL); | ||
326 | BUG_ON(template == NULL); | ||
327 | |||
328 | if (disk->integrity == NULL) { | ||
329 | bi = kmem_cache_alloc(integrity_cachep, GFP_KERNEL | __GFP_ZERO); | ||
330 | if (!bi) | ||
331 | return -1; | ||
332 | |||
333 | if (kobject_init_and_add(&bi->kobj, &integrity_ktype, | ||
334 | &disk->dev.kobj, "%s", "integrity")) { | ||
335 | kmem_cache_free(integrity_cachep, bi); | ||
336 | return -1; | ||
337 | } | ||
338 | |||
339 | kobject_uevent(&bi->kobj, KOBJ_ADD); | ||
340 | |||
341 | set_bit(INTEGRITY_FLAG_READ, &bi->flags); | ||
342 | set_bit(INTEGRITY_FLAG_WRITE, &bi->flags); | ||
343 | bi->sector_size = disk->queue->hardsect_size; | ||
344 | disk->integrity = bi; | ||
345 | } else | ||
346 | bi = disk->integrity; | ||
347 | |||
348 | /* Use the provided profile as template */ | ||
349 | bi->name = template->name; | ||
350 | bi->generate_fn = template->generate_fn; | ||
351 | bi->verify_fn = template->verify_fn; | ||
352 | bi->tuple_size = template->tuple_size; | ||
353 | bi->set_tag_fn = template->set_tag_fn; | ||
354 | bi->get_tag_fn = template->get_tag_fn; | ||
355 | bi->tag_size = template->tag_size; | ||
356 | |||
357 | return 0; | ||
358 | } | ||
359 | EXPORT_SYMBOL(blk_integrity_register); | ||
360 | |||
361 | /** | ||
362 | * blk_integrity_unregister - Remove block integrity profile | ||
363 | * @disk: disk whose integrity profile to deallocate | ||
364 | * | ||
365 | * Description: This function frees all memory used by the block | ||
366 | * integrity profile. To be called at device teardown. | ||
367 | */ | ||
368 | void blk_integrity_unregister(struct gendisk *disk) | ||
369 | { | ||
370 | struct blk_integrity *bi; | ||
371 | |||
372 | if (!disk || !disk->integrity) | ||
373 | return; | ||
374 | |||
375 | bi = disk->integrity; | ||
376 | |||
377 | kobject_uevent(&bi->kobj, KOBJ_REMOVE); | ||
378 | kobject_del(&bi->kobj); | ||
379 | kobject_put(&disk->dev.kobj); | ||
380 | kmem_cache_free(integrity_cachep, bi); | ||
381 | } | ||
382 | EXPORT_SYMBOL(blk_integrity_unregister); | ||
diff --git a/block/blk-merge.c b/block/blk-merge.c index 651136aae76..5efc9e7a68b 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c | |||
@@ -441,6 +441,9 @@ static int attempt_merge(struct request_queue *q, struct request *req, | |||
441 | || next->special) | 441 | || next->special) |
442 | return 0; | 442 | return 0; |
443 | 443 | ||
444 | if (blk_integrity_rq(req) != blk_integrity_rq(next)) | ||
445 | return 0; | ||
446 | |||
444 | /* | 447 | /* |
445 | * If we are allowed to merge, then append bio list | 448 | * If we are allowed to merge, then append bio list |
446 | * from next to rq and release next. merge_requests_fn | 449 | * from next to rq and release next. merge_requests_fn |
diff --git a/block/blk.h b/block/blk.h index 59776ab4742..c79f30e1df5 100644 --- a/block/blk.h +++ b/block/blk.h | |||
@@ -51,4 +51,12 @@ static inline int queue_congestion_off_threshold(struct request_queue *q) | |||
51 | return q->nr_congestion_off; | 51 | return q->nr_congestion_off; |
52 | } | 52 | } |
53 | 53 | ||
54 | #if defined(CONFIG_BLK_DEV_INTEGRITY) | ||
55 | |||
56 | #define rq_for_each_integrity_segment(bvl, _rq, _iter) \ | ||
57 | __rq_for_each_bio(_iter.bio, _rq) \ | ||
58 | bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i) | ||
59 | |||
60 | #endif /* BLK_DEV_INTEGRITY */ | ||
61 | |||
54 | #endif | 62 | #endif |
diff --git a/block/elevator.c b/block/elevator.c index 902dd1344d5..1f5bfe69602 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -86,6 +86,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) | |||
86 | if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) | 86 | if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) |
87 | return 0; | 87 | return 0; |
88 | 88 | ||
89 | /* | ||
90 | * only merge integrity protected bio into ditto rq | ||
91 | */ | ||
92 | if (bio_integrity(bio) != blk_integrity_rq(rq)) | ||
93 | return 0; | ||
94 | |||
89 | if (!elv_iosched_allow_merge(rq, bio)) | 95 | if (!elv_iosched_allow_merge(rq, bio)) |
90 | return 0; | 96 | return 0; |
91 | 97 | ||