aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorMartin K. Petersen <martin.petersen@oracle.com>2008-06-30 14:04:41 -0400
committerJens Axboe <jens.axboe@oracle.com>2008-07-03 07:21:13 -0400
commit7ba1ba12eeef0aa7113beb16410ef8b7c748e18b (patch)
tree4629aabe88bf095d58eabd2f451207695bb35b08 /block
parent51d654e1d885607a6edd02b337105fa5c28b6d33 (diff)
block: Block layer data integrity support
Some block devices support verifying the integrity of requests by way of checksums or other protection information that is submitted along with the I/O. This patch implements support for generating and verifying integrity metadata, as well as correctly merging, splitting and cloning bios and requests that have this extra information attached. See Documentation/block/data-integrity.txt for more information. Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig12
-rw-r--r--block/Makefile1
-rw-r--r--block/blk-core.c7
-rw-r--r--block/blk-integrity.c382
-rw-r--r--block/blk-merge.c3
-rw-r--r--block/blk.h8
-rw-r--r--block/elevator.c6
7 files changed, 419 insertions, 0 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 3e97f2bc446f..1ab7c15c8d7a 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -81,6 +81,18 @@ config BLK_DEV_BSG
81 81
82 If unsure, say N. 82 If unsure, say N.
83 83
84config BLK_DEV_INTEGRITY
85 bool "Block layer data integrity support"
86 ---help---
87 Some storage devices allow extra information to be
88 stored/retrieved to help protect the data. The block layer
89 data integrity option provides hooks which can be used by
90 filesystems to ensure better data integrity.
91
92 Say yes here if you have a storage device that provides the
93 T10/SCSI Data Integrity Field or the T13/ATA External Path
94 Protection. If in doubt, say N.
95
84endif # BLOCK 96endif # BLOCK
85 97
86config BLOCK_COMPAT 98config BLOCK_COMPAT
diff --git a/block/Makefile b/block/Makefile
index 5a43c7d79594..045f7b62e4bb 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -14,3 +14,4 @@ obj-$(CONFIG_IOSCHED_CFQ) += cfq-iosched.o
14 14
15obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o 15obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
16obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o 16obj-$(CONFIG_BLOCK_COMPAT) += compat_ioctl.o
17obj-$(CONFIG_BLK_DEV_INTEGRITY) += blk-integrity.o
diff --git a/block/blk-core.c b/block/blk-core.c
index 1905aaba49fb..e0fb0bcc0c17 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -143,6 +143,10 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
143 143
144 bio->bi_size -= nbytes; 144 bio->bi_size -= nbytes;
145 bio->bi_sector += (nbytes >> 9); 145 bio->bi_sector += (nbytes >> 9);
146
147 if (bio_integrity(bio))
148 bio_integrity_advance(bio, nbytes);
149
146 if (bio->bi_size == 0) 150 if (bio->bi_size == 0)
147 bio_endio(bio, error); 151 bio_endio(bio, error);
148 } else { 152 } else {
@@ -1381,6 +1385,9 @@ end_io:
1381 */ 1385 */
1382 blk_partition_remap(bio); 1386 blk_partition_remap(bio);
1383 1387
1388 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio))
1389 goto end_io;
1390
1384 if (old_sector != -1) 1391 if (old_sector != -1)
1385 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, 1392 blk_add_trace_remap(q, bio, old_dev, bio->bi_sector,
1386 old_sector); 1393 old_sector);
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
new file mode 100644
index 000000000000..65f23ef38bbe
--- /dev/null
+++ b/block/blk-integrity.c
@@ -0,0 +1,382 @@
1/*
2 * blk-integrity.c - Block layer data integrity extensions
3 *
4 * Copyright (C) 2007, 2008 Oracle Corporation
5 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
19 * USA.
20 *
21 */
22
23#include <linux/blkdev.h>
24#include <linux/mempool.h>
25#include <linux/bio.h>
26#include <linux/scatterlist.h>
27
28#include "blk.h"
29
30static struct kmem_cache *integrity_cachep;
31
32/**
33 * blk_rq_count_integrity_sg - Count number of integrity scatterlist elements
34 * @rq: request with integrity metadata attached
35 *
36 * Description: Returns the number of elements required in a
37 * scatterlist corresponding to the integrity metadata in a request.
38 */
39int blk_rq_count_integrity_sg(struct request *rq)
40{
41 struct bio_vec *iv, *ivprv;
42 struct req_iterator iter;
43 unsigned int segments;
44
45 ivprv = NULL;
46 segments = 0;
47
48 rq_for_each_integrity_segment(iv, rq, iter) {
49
50 if (!ivprv || !BIOVEC_PHYS_MERGEABLE(ivprv, iv))
51 segments++;
52
53 ivprv = iv;
54 }
55
56 return segments;
57}
58EXPORT_SYMBOL(blk_rq_count_integrity_sg);
59
60/**
61 * blk_rq_map_integrity_sg - Map integrity metadata into a scatterlist
62 * @rq: request with integrity metadata attached
63 * @sglist: target scatterlist
64 *
65 * Description: Map the integrity vectors in request into a
66 * scatterlist. The scatterlist must be big enough to hold all
67 * elements. I.e. sized using blk_rq_count_integrity_sg().
68 */
69int blk_rq_map_integrity_sg(struct request *rq, struct scatterlist *sglist)
70{
71 struct bio_vec *iv, *ivprv;
72 struct req_iterator iter;
73 struct scatterlist *sg;
74 unsigned int segments;
75
76 ivprv = NULL;
77 sg = NULL;
78 segments = 0;
79
80 rq_for_each_integrity_segment(iv, rq, iter) {
81
82 if (ivprv) {
83 if (!BIOVEC_PHYS_MERGEABLE(ivprv, iv))
84 goto new_segment;
85
86 sg->length += iv->bv_len;
87 } else {
88new_segment:
89 if (!sg)
90 sg = sglist;
91 else {
92 sg->page_link &= ~0x02;
93 sg = sg_next(sg);
94 }
95
96 sg_set_page(sg, iv->bv_page, iv->bv_len, iv->bv_offset);
97 segments++;
98 }
99
100 ivprv = iv;
101 }
102
103 if (sg)
104 sg_mark_end(sg);
105
106 return segments;
107}
108EXPORT_SYMBOL(blk_rq_map_integrity_sg);
109
110/**
111 * blk_integrity_compare - Compare integrity profile of two block devices
112 * @b1: Device to compare
113 * @b2: Device to compare
114 *
115 * Description: Meta-devices like DM and MD need to verify that all
116 * sub-devices use the same integrity format before advertising to
117 * upper layers that they can send/receive integrity metadata. This
118 * function can be used to check whether two block devices have
119 * compatible integrity formats.
120 */
121int blk_integrity_compare(struct block_device *bd1, struct block_device *bd2)
122{
123 struct blk_integrity *b1 = bd1->bd_disk->integrity;
124 struct blk_integrity *b2 = bd2->bd_disk->integrity;
125
126 BUG_ON(bd1->bd_disk == NULL);
127 BUG_ON(bd2->bd_disk == NULL);
128
129 if (!b1 || !b2)
130 return 0;
131
132 if (b1->sector_size != b2->sector_size) {
133 printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
134 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
135 b1->sector_size, b2->sector_size);
136 return -1;
137 }
138
139 if (b1->tuple_size != b2->tuple_size) {
140 printk(KERN_ERR "%s: %s/%s tuple sz %u != %u\n", __func__,
141 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
142 b1->tuple_size, b2->tuple_size);
143 return -1;
144 }
145
146 if (b1->tag_size && b2->tag_size && (b1->tag_size != b2->tag_size)) {
147 printk(KERN_ERR "%s: %s/%s tag sz %u != %u\n", __func__,
148 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
149 b1->tag_size, b2->tag_size);
150 return -1;
151 }
152
153 if (strcmp(b1->name, b2->name)) {
154 printk(KERN_ERR "%s: %s/%s type %s != %s\n", __func__,
155 bd1->bd_disk->disk_name, bd2->bd_disk->disk_name,
156 b1->name, b2->name);
157 return -1;
158 }
159
160 return 0;
161}
162EXPORT_SYMBOL(blk_integrity_compare);
163
164struct integrity_sysfs_entry {
165 struct attribute attr;
166 ssize_t (*show)(struct blk_integrity *, char *);
167 ssize_t (*store)(struct blk_integrity *, const char *, size_t);
168};
169
170static ssize_t integrity_attr_show(struct kobject *kobj, struct attribute *attr,
171 char *page)
172{
173 struct blk_integrity *bi =
174 container_of(kobj, struct blk_integrity, kobj);
175 struct integrity_sysfs_entry *entry =
176 container_of(attr, struct integrity_sysfs_entry, attr);
177
178 return entry->show(bi, page);
179}
180
181static ssize_t integrity_attr_store(struct kobject *kobj, struct attribute *attr,
182 const char *page, size_t count)
183{
184 struct blk_integrity *bi =
185 container_of(kobj, struct blk_integrity, kobj);
186 struct integrity_sysfs_entry *entry =
187 container_of(attr, struct integrity_sysfs_entry, attr);
188 ssize_t ret = 0;
189
190 if (entry->store)
191 ret = entry->store(bi, page, count);
192
193 return ret;
194}
195
196static ssize_t integrity_format_show(struct blk_integrity *bi, char *page)
197{
198 if (bi != NULL && bi->name != NULL)
199 return sprintf(page, "%s\n", bi->name);
200 else
201 return sprintf(page, "none\n");
202}
203
204static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
205{
206 if (bi != NULL)
207 return sprintf(page, "%u\n", bi->tag_size);
208 else
209 return sprintf(page, "0\n");
210}
211
212static ssize_t integrity_read_store(struct blk_integrity *bi,
213 const char *page, size_t count)
214{
215 char *p = (char *) page;
216 unsigned long val = simple_strtoul(p, &p, 10);
217
218 if (val)
219 set_bit(INTEGRITY_FLAG_READ, &bi->flags);
220 else
221 clear_bit(INTEGRITY_FLAG_READ, &bi->flags);
222
223 return count;
224}
225
226static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
227{
228 return sprintf(page, "%d\n",
229 test_bit(INTEGRITY_FLAG_READ, &bi->flags) ? 1 : 0);
230}
231
232static ssize_t integrity_write_store(struct blk_integrity *bi,
233 const char *page, size_t count)
234{
235 char *p = (char *) page;
236 unsigned long val = simple_strtoul(p, &p, 10);
237
238 if (val)
239 set_bit(INTEGRITY_FLAG_WRITE, &bi->flags);
240 else
241 clear_bit(INTEGRITY_FLAG_WRITE, &bi->flags);
242
243 return count;
244}
245
246static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
247{
248 return sprintf(page, "%d\n",
249 test_bit(INTEGRITY_FLAG_WRITE, &bi->flags) ? 1 : 0);
250}
251
252static struct integrity_sysfs_entry integrity_format_entry = {
253 .attr = { .name = "format", .mode = S_IRUGO },
254 .show = integrity_format_show,
255};
256
257static struct integrity_sysfs_entry integrity_tag_size_entry = {
258 .attr = { .name = "tag_size", .mode = S_IRUGO },
259 .show = integrity_tag_size_show,
260};
261
262static struct integrity_sysfs_entry integrity_read_entry = {
263 .attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
264 .show = integrity_read_show,
265 .store = integrity_read_store,
266};
267
268static struct integrity_sysfs_entry integrity_write_entry = {
269 .attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
270 .show = integrity_write_show,
271 .store = integrity_write_store,
272};
273
274static struct attribute *integrity_attrs[] = {
275 &integrity_format_entry.attr,
276 &integrity_tag_size_entry.attr,
277 &integrity_read_entry.attr,
278 &integrity_write_entry.attr,
279 NULL,
280};
281
282static struct sysfs_ops integrity_ops = {
283 .show = &integrity_attr_show,
284 .store = &integrity_attr_store,
285};
286
287static int __init blk_dev_integrity_init(void)
288{
289 integrity_cachep = kmem_cache_create("blkdev_integrity",
290 sizeof(struct blk_integrity),
291 0, SLAB_PANIC, NULL);
292 return 0;
293}
294subsys_initcall(blk_dev_integrity_init);
295
296static void blk_integrity_release(struct kobject *kobj)
297{
298 struct blk_integrity *bi =
299 container_of(kobj, struct blk_integrity, kobj);
300
301 kmem_cache_free(integrity_cachep, bi);
302}
303
304static struct kobj_type integrity_ktype = {
305 .default_attrs = integrity_attrs,
306 .sysfs_ops = &integrity_ops,
307 .release = blk_integrity_release,
308};
309
310/**
311 * blk_integrity_register - Register a gendisk as being integrity-capable
312 * @disk: struct gendisk pointer to make integrity-aware
313 * @template: integrity profile
314 *
315 * Description: When a device needs to advertise itself as being able
316 * to send/receive integrity metadata it must use this function to
317 * register the capability with the block layer. The template is a
318 * blk_integrity struct with values appropriate for the underlying
319 * hardware. See Documentation/block/data-integrity.txt.
320 */
321int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
322{
323 struct blk_integrity *bi;
324
325 BUG_ON(disk == NULL);
326 BUG_ON(template == NULL);
327
328 if (disk->integrity == NULL) {
329 bi = kmem_cache_alloc(integrity_cachep, GFP_KERNEL | __GFP_ZERO);
330 if (!bi)
331 return -1;
332
333 if (kobject_init_and_add(&bi->kobj, &integrity_ktype,
334 &disk->dev.kobj, "%s", "integrity")) {
335 kmem_cache_free(integrity_cachep, bi);
336 return -1;
337 }
338
339 kobject_uevent(&bi->kobj, KOBJ_ADD);
340
341 set_bit(INTEGRITY_FLAG_READ, &bi->flags);
342 set_bit(INTEGRITY_FLAG_WRITE, &bi->flags);
343 bi->sector_size = disk->queue->hardsect_size;
344 disk->integrity = bi;
345 } else
346 bi = disk->integrity;
347
348 /* Use the provided profile as template */
349 bi->name = template->name;
350 bi->generate_fn = template->generate_fn;
351 bi->verify_fn = template->verify_fn;
352 bi->tuple_size = template->tuple_size;
353 bi->set_tag_fn = template->set_tag_fn;
354 bi->get_tag_fn = template->get_tag_fn;
355 bi->tag_size = template->tag_size;
356
357 return 0;
358}
359EXPORT_SYMBOL(blk_integrity_register);
360
361/**
362 * blk_integrity_unregister - Remove block integrity profile
363 * @disk: disk whose integrity profile to deallocate
364 *
365 * Description: This function frees all memory used by the block
366 * integrity profile. To be called at device teardown.
367 */
368void blk_integrity_unregister(struct gendisk *disk)
369{
370 struct blk_integrity *bi;
371
372 if (!disk || !disk->integrity)
373 return;
374
375 bi = disk->integrity;
376
377 kobject_uevent(&bi->kobj, KOBJ_REMOVE);
378 kobject_del(&bi->kobj);
379 kobject_put(&disk->dev.kobj);
380 kmem_cache_free(integrity_cachep, bi);
381}
382EXPORT_SYMBOL(blk_integrity_unregister);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 651136aae76e..5efc9e7a68b7 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -441,6 +441,9 @@ static int attempt_merge(struct request_queue *q, struct request *req,
441 || next->special) 441 || next->special)
442 return 0; 442 return 0;
443 443
444 if (blk_integrity_rq(req) != blk_integrity_rq(next))
445 return 0;
446
444 /* 447 /*
445 * If we are allowed to merge, then append bio list 448 * If we are allowed to merge, then append bio list
446 * from next to rq and release next. merge_requests_fn 449 * from next to rq and release next. merge_requests_fn
diff --git a/block/blk.h b/block/blk.h
index 59776ab4742a..c79f30e1df52 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -51,4 +51,12 @@ static inline int queue_congestion_off_threshold(struct request_queue *q)
51 return q->nr_congestion_off; 51 return q->nr_congestion_off;
52} 52}
53 53
54#if defined(CONFIG_BLK_DEV_INTEGRITY)
55
56#define rq_for_each_integrity_segment(bvl, _rq, _iter) \
57 __rq_for_each_bio(_iter.bio, _rq) \
58 bip_for_each_vec(bvl, _iter.bio->bi_integrity, _iter.i)
59
60#endif /* BLK_DEV_INTEGRITY */
61
54#endif 62#endif
diff --git a/block/elevator.c b/block/elevator.c
index 902dd1344d56..1f5bfe696026 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -86,6 +86,12 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio)
86 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special) 86 if (rq->rq_disk != bio->bi_bdev->bd_disk || rq->special)
87 return 0; 87 return 0;
88 88
89 /*
90 * only merge integrity protected bio into ditto rq
91 */
92 if (bio_integrity(bio) != blk_integrity_rq(rq))
93 return 0;
94
89 if (!elv_iosched_allow_merge(rq, bio)) 95 if (!elv_iosched_allow_merge(rq, bio))
90 return 0; 96 return 0;
91 97