aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-07-14 16:15:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-07-14 16:15:14 -0400
commitdddec01eb8e2b56267b37a6f9f0997a64b4e0b2a (patch)
treeb6d8bfbce9abd105384b9d116499afbe306b9c22 /fs
parent7daf705f362e349983e92037a198b8821db198af (diff)
parent32502b8413a77b54b9e19809404109590c32dfb7 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-2.6-block
* 'for-linus' of git://git.kernel.dk/linux-2.6-block: (37 commits) splice: fix generic_file_splice_read() race with page invalidation ramfs: enable splice write drivers/block/pktcdvd.c: avoid useless memset cdrom: revert commit 22a9189 (cdrom: use kmalloced buffers instead of buffers on stack) scsi: sr avoids useless buffer allocation block: blk_rq_map_kern uses the bounce buffers for stack buffers block: add blk_queue_update_dma_pad DAC960: push down BKL pktcdvd: push BKL down into driver paride: push ioctl down into driver block: use get_unaligned_* helpers block: extend queue_flag bitops block: request_module(): use format string Add bvec_merge_data to handle stacked devices and ->merge_bvec() block: integrity flags can't use bit ops on unsigned short cmdfilter: extend default read filter sg: fix odd style (extra parenthesis) introduced by cmd filter patch block: add bounce support to blk_rq_map_user_iov cfq-iosched: get rid of enable_idle being unused warning allow userspace to modify scsi command filter on per device basis ...
Diffstat (limited to 'fs')
-rw-r--r--fs/Makefile1
-rw-r--r--fs/bio-integrity.c719
-rw-r--r--fs/bio.c88
-rw-r--r--fs/ramfs/file-mmu.c1
-rw-r--r--fs/ramfs/file-nommu.c1
-rw-r--r--fs/splice.c17
6 files changed, 787 insertions, 40 deletions
diff --git a/fs/Makefile b/fs/Makefile
index 1e7a11bd4da1..277b079dec9e 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -19,6 +19,7 @@ else
19obj-y += no-block.o 19obj-y += no-block.o
20endif 20endif
21 21
22obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
22obj-$(CONFIG_INOTIFY) += inotify.o 23obj-$(CONFIG_INOTIFY) += inotify.o
23obj-$(CONFIG_INOTIFY_USER) += inotify_user.o 24obj-$(CONFIG_INOTIFY_USER) += inotify_user.o
24obj-$(CONFIG_EPOLL) += eventpoll.o 25obj-$(CONFIG_EPOLL) += eventpoll.o
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
new file mode 100644
index 000000000000..63e2ee63058d
--- /dev/null
+++ b/fs/bio-integrity.c
@@ -0,0 +1,719 @@
1/*
2 * bio-integrity.c - bio data integrity extensions
3 *
4 * Copyright (C) 2007, 2008 Oracle Corporation
5 * Written by: Martin K. Petersen <martin.petersen@oracle.com>
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License version
9 * 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; see the file COPYING. If not, write to
18 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
19 * USA.
20 *
21 */
22
23#include <linux/blkdev.h>
24#include <linux/mempool.h>
25#include <linux/bio.h>
26#include <linux/workqueue.h>
27
28static struct kmem_cache *bio_integrity_slab __read_mostly;
29static struct workqueue_struct *kintegrityd_wq;
30
31/**
32 * bio_integrity_alloc_bioset - Allocate integrity payload and attach it to bio
33 * @bio: bio to attach integrity metadata to
34 * @gfp_mask: Memory allocation mask
35 * @nr_vecs: Number of integrity metadata scatter-gather elements
36 * @bs: bio_set to allocate from
37 *
38 * Description: This function prepares a bio for attaching integrity
39 * metadata. nr_vecs specifies the maximum number of pages containing
40 * integrity metadata that can be attached.
41 */
42struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
43 gfp_t gfp_mask,
44 unsigned int nr_vecs,
45 struct bio_set *bs)
46{
47 struct bio_integrity_payload *bip;
48 struct bio_vec *iv;
49 unsigned long idx;
50
51 BUG_ON(bio == NULL);
52
53 bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
54 if (unlikely(bip == NULL)) {
55 printk(KERN_ERR "%s: could not alloc bip\n", __func__);
56 return NULL;
57 }
58
59 memset(bip, 0, sizeof(*bip));
60
61 iv = bvec_alloc_bs(gfp_mask, nr_vecs, &idx, bs);
62 if (unlikely(iv == NULL)) {
63 printk(KERN_ERR "%s: could not alloc bip_vec\n", __func__);
64 mempool_free(bip, bs->bio_integrity_pool);
65 return NULL;
66 }
67
68 bip->bip_pool = idx;
69 bip->bip_vec = iv;
70 bip->bip_bio = bio;
71 bio->bi_integrity = bip;
72
73 return bip;
74}
75EXPORT_SYMBOL(bio_integrity_alloc_bioset);
76
77/**
78 * bio_integrity_alloc - Allocate integrity payload and attach it to bio
79 * @bio: bio to attach integrity metadata to
80 * @gfp_mask: Memory allocation mask
81 * @nr_vecs: Number of integrity metadata scatter-gather elements
82 *
83 * Description: This function prepares a bio for attaching integrity
84 * metadata. nr_vecs specifies the maximum number of pages containing
85 * integrity metadata that can be attached.
86 */
87struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
88 gfp_t gfp_mask,
89 unsigned int nr_vecs)
90{
91 return bio_integrity_alloc_bioset(bio, gfp_mask, nr_vecs, fs_bio_set);
92}
93EXPORT_SYMBOL(bio_integrity_alloc);
94
95/**
96 * bio_integrity_free - Free bio integrity payload
97 * @bio: bio containing bip to be freed
98 * @bs: bio_set this bio was allocated from
99 *
100 * Description: Used to free the integrity portion of a bio. Usually
101 * called from bio_free().
102 */
103void bio_integrity_free(struct bio *bio, struct bio_set *bs)
104{
105 struct bio_integrity_payload *bip = bio->bi_integrity;
106
107 BUG_ON(bip == NULL);
108
109 /* A cloned bio doesn't own the integrity metadata */
110 if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL)
111 kfree(bip->bip_buf);
112
113 mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]);
114 mempool_free(bip, bs->bio_integrity_pool);
115
116 bio->bi_integrity = NULL;
117}
118EXPORT_SYMBOL(bio_integrity_free);
119
120/**
121 * bio_integrity_add_page - Attach integrity metadata
122 * @bio: bio to update
123 * @page: page containing integrity metadata
124 * @len: number of bytes of integrity metadata in page
125 * @offset: start offset within page
126 *
127 * Description: Attach a page containing integrity metadata to bio.
128 */
129int bio_integrity_add_page(struct bio *bio, struct page *page,
130 unsigned int len, unsigned int offset)
131{
132 struct bio_integrity_payload *bip = bio->bi_integrity;
133 struct bio_vec *iv;
134
135 if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_pool)) {
136 printk(KERN_ERR "%s: bip_vec full\n", __func__);
137 return 0;
138 }
139
140 iv = bip_vec_idx(bip, bip->bip_vcnt);
141 BUG_ON(iv == NULL);
142 BUG_ON(iv->bv_page != NULL);
143
144 iv->bv_page = page;
145 iv->bv_len = len;
146 iv->bv_offset = offset;
147 bip->bip_vcnt++;
148
149 return len;
150}
151EXPORT_SYMBOL(bio_integrity_add_page);
152
153/**
154 * bio_integrity_enabled - Check whether integrity can be passed
155 * @bio: bio to check
156 *
157 * Description: Determines whether bio_integrity_prep() can be called
158 * on this bio or not. bio data direction and target device must be
159 * set prior to calling. The functions honors the write_generate and
160 * read_verify flags in sysfs.
161 */
162int bio_integrity_enabled(struct bio *bio)
163{
164 /* Already protected? */
165 if (bio_integrity(bio))
166 return 0;
167
168 return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio));
169}
170EXPORT_SYMBOL(bio_integrity_enabled);
171
172/**
173 * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto
174 * @bi: blk_integrity profile for device
175 * @sectors: Number of 512 sectors to convert
176 *
177 * Description: The block layer calculates everything in 512 byte
178 * sectors but integrity metadata is done in terms of the hardware
179 * sector size of the storage device. Convert the block layer sectors
180 * to physical sectors.
181 */
182static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
183 unsigned int sectors)
184{
185 /* At this point there are only 512b or 4096b DIF/EPP devices */
186 if (bi->sector_size == 4096)
187 return sectors >>= 3;
188
189 return sectors;
190}
191
192/**
193 * bio_integrity_tag_size - Retrieve integrity tag space
194 * @bio: bio to inspect
195 *
196 * Description: Returns the maximum number of tag bytes that can be
197 * attached to this bio. Filesystems can use this to determine how
198 * much metadata to attach to an I/O.
199 */
200unsigned int bio_integrity_tag_size(struct bio *bio)
201{
202 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
203
204 BUG_ON(bio->bi_size == 0);
205
206 return bi->tag_size * (bio->bi_size / bi->sector_size);
207}
208EXPORT_SYMBOL(bio_integrity_tag_size);
209
210int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len, int set)
211{
212 struct bio_integrity_payload *bip = bio->bi_integrity;
213 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
214 unsigned int nr_sectors;
215
216 BUG_ON(bip->bip_buf == NULL);
217
218 if (bi->tag_size == 0)
219 return -1;
220
221 nr_sectors = bio_integrity_hw_sectors(bi,
222 DIV_ROUND_UP(len, bi->tag_size));
223
224 if (nr_sectors * bi->tuple_size > bip->bip_size) {
225 printk(KERN_ERR "%s: tag too big for bio: %u > %u\n",
226 __func__, nr_sectors * bi->tuple_size, bip->bip_size);
227 return -1;
228 }
229
230 if (set)
231 bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
232 else
233 bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
234
235 return 0;
236}
237
238/**
239 * bio_integrity_set_tag - Attach a tag buffer to a bio
240 * @bio: bio to attach buffer to
241 * @tag_buf: Pointer to a buffer containing tag data
242 * @len: Length of the included buffer
243 *
244 * Description: Use this function to tag a bio by leveraging the extra
245 * space provided by devices formatted with integrity protection. The
246 * size of the integrity buffer must be <= to the size reported by
247 * bio_integrity_tag_size().
248 */
249int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
250{
251 BUG_ON(bio_data_dir(bio) != WRITE);
252
253 return bio_integrity_tag(bio, tag_buf, len, 1);
254}
255EXPORT_SYMBOL(bio_integrity_set_tag);
256
257/**
258 * bio_integrity_get_tag - Retrieve a tag buffer from a bio
259 * @bio: bio to retrieve buffer from
260 * @tag_buf: Pointer to a buffer for the tag data
261 * @len: Length of the target buffer
262 *
263 * Description: Use this function to retrieve the tag buffer from a
264 * completed I/O. The size of the integrity buffer must be <= to the
265 * size reported by bio_integrity_tag_size().
266 */
267int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
268{
269 BUG_ON(bio_data_dir(bio) != READ);
270
271 return bio_integrity_tag(bio, tag_buf, len, 0);
272}
273EXPORT_SYMBOL(bio_integrity_get_tag);
274
275/**
276 * bio_integrity_generate - Generate integrity metadata for a bio
277 * @bio: bio to generate integrity metadata for
278 *
279 * Description: Generates integrity metadata for a bio by calling the
280 * block device's generation callback function. The bio must have a
281 * bip attached with enough room to accommodate the generated
282 * integrity metadata.
283 */
284static void bio_integrity_generate(struct bio *bio)
285{
286 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
287 struct blk_integrity_exchg bix;
288 struct bio_vec *bv;
289 sector_t sector = bio->bi_sector;
290 unsigned int i, sectors, total;
291 void *prot_buf = bio->bi_integrity->bip_buf;
292
293 total = 0;
294 bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
295 bix.sector_size = bi->sector_size;
296
297 bio_for_each_segment(bv, bio, i) {
298 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
299 bix.data_buf = kaddr + bv->bv_offset;
300 bix.data_size = bv->bv_len;
301 bix.prot_buf = prot_buf;
302 bix.sector = sector;
303
304 bi->generate_fn(&bix);
305
306 sectors = bv->bv_len / bi->sector_size;
307 sector += sectors;
308 prot_buf += sectors * bi->tuple_size;
309 total += sectors * bi->tuple_size;
310 BUG_ON(total > bio->bi_integrity->bip_size);
311
312 kunmap_atomic(kaddr, KM_USER0);
313 }
314}
315
316/**
317 * bio_integrity_prep - Prepare bio for integrity I/O
318 * @bio: bio to prepare
319 *
320 * Description: Allocates a buffer for integrity metadata, maps the
321 * pages and attaches them to a bio. The bio must have data
322 * direction, target device and start sector set priot to calling. In
323 * the WRITE case, integrity metadata will be generated using the
324 * block device's integrity function. In the READ case, the buffer
325 * will be prepared for DMA and a suitable end_io handler set up.
326 */
327int bio_integrity_prep(struct bio *bio)
328{
329 struct bio_integrity_payload *bip;
330 struct blk_integrity *bi;
331 struct request_queue *q;
332 void *buf;
333 unsigned long start, end;
334 unsigned int len, nr_pages;
335 unsigned int bytes, offset, i;
336 unsigned int sectors;
337
338 bi = bdev_get_integrity(bio->bi_bdev);
339 q = bdev_get_queue(bio->bi_bdev);
340 BUG_ON(bi == NULL);
341 BUG_ON(bio_integrity(bio));
342
343 sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
344
345 /* Allocate kernel buffer for protection data */
346 len = sectors * blk_integrity_tuple_size(bi);
347 buf = kmalloc(len, GFP_NOIO | __GFP_NOFAIL | q->bounce_gfp);
348 if (unlikely(buf == NULL)) {
349 printk(KERN_ERR "could not allocate integrity buffer\n");
350 return -EIO;
351 }
352
353 end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
354 start = ((unsigned long) buf) >> PAGE_SHIFT;
355 nr_pages = end - start;
356
357 /* Allocate bio integrity payload and integrity vectors */
358 bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
359 if (unlikely(bip == NULL)) {
360 printk(KERN_ERR "could not allocate data integrity bioset\n");
361 kfree(buf);
362 return -EIO;
363 }
364
365 bip->bip_buf = buf;
366 bip->bip_size = len;
367 bip->bip_sector = bio->bi_sector;
368
369 /* Map it */
370 offset = offset_in_page(buf);
371 for (i = 0 ; i < nr_pages ; i++) {
372 int ret;
373 bytes = PAGE_SIZE - offset;
374
375 if (len <= 0)
376 break;
377
378 if (bytes > len)
379 bytes = len;
380
381 ret = bio_integrity_add_page(bio, virt_to_page(buf),
382 bytes, offset);
383
384 if (ret == 0)
385 return 0;
386
387 if (ret < bytes)
388 break;
389
390 buf += bytes;
391 len -= bytes;
392 offset = 0;
393 }
394
395 /* Install custom I/O completion handler if read verify is enabled */
396 if (bio_data_dir(bio) == READ) {
397 bip->bip_end_io = bio->bi_end_io;
398 bio->bi_end_io = bio_integrity_endio;
399 }
400
401 /* Auto-generate integrity metadata if this is a write */
402 if (bio_data_dir(bio) == WRITE)
403 bio_integrity_generate(bio);
404
405 return 0;
406}
407EXPORT_SYMBOL(bio_integrity_prep);
408
409/**
410 * bio_integrity_verify - Verify integrity metadata for a bio
411 * @bio: bio to verify
412 *
413 * Description: This function is called to verify the integrity of a
414 * bio. The data in the bio io_vec is compared to the integrity
415 * metadata returned by the HBA.
416 */
417static int bio_integrity_verify(struct bio *bio)
418{
419 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
420 struct blk_integrity_exchg bix;
421 struct bio_vec *bv;
422 sector_t sector = bio->bi_integrity->bip_sector;
423 unsigned int i, sectors, total, ret;
424 void *prot_buf = bio->bi_integrity->bip_buf;
425
426 ret = total = 0;
427 bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
428 bix.sector_size = bi->sector_size;
429
430 bio_for_each_segment(bv, bio, i) {
431 void *kaddr = kmap_atomic(bv->bv_page, KM_USER0);
432 bix.data_buf = kaddr + bv->bv_offset;
433 bix.data_size = bv->bv_len;
434 bix.prot_buf = prot_buf;
435 bix.sector = sector;
436
437 ret = bi->verify_fn(&bix);
438
439 if (ret) {
440 kunmap_atomic(kaddr, KM_USER0);
441 break;
442 }
443
444 sectors = bv->bv_len / bi->sector_size;
445 sector += sectors;
446 prot_buf += sectors * bi->tuple_size;
447 total += sectors * bi->tuple_size;
448 BUG_ON(total > bio->bi_integrity->bip_size);
449
450 kunmap_atomic(kaddr, KM_USER0);
451 }
452
453 return ret;
454}
455
456/**
457 * bio_integrity_verify_fn - Integrity I/O completion worker
458 * @work: Work struct stored in bio to be verified
459 *
460 * Description: This workqueue function is called to complete a READ
461 * request. The function verifies the transferred integrity metadata
462 * and then calls the original bio end_io function.
463 */
464static void bio_integrity_verify_fn(struct work_struct *work)
465{
466 struct bio_integrity_payload *bip =
467 container_of(work, struct bio_integrity_payload, bip_work);
468 struct bio *bio = bip->bip_bio;
469 int error = bip->bip_error;
470
471 if (bio_integrity_verify(bio)) {
472 clear_bit(BIO_UPTODATE, &bio->bi_flags);
473 error = -EIO;
474 }
475
476 /* Restore original bio completion handler */
477 bio->bi_end_io = bip->bip_end_io;
478
479 if (bio->bi_end_io)
480 bio->bi_end_io(bio, error);
481}
482
483/**
484 * bio_integrity_endio - Integrity I/O completion function
485 * @bio: Protected bio
486 * @error: Pointer to errno
487 *
488 * Description: Completion for integrity I/O
489 *
490 * Normally I/O completion is done in interrupt context. However,
491 * verifying I/O integrity is a time-consuming task which must be run
492 * in process context. This function postpones completion
493 * accordingly.
494 */
495void bio_integrity_endio(struct bio *bio, int error)
496{
497 struct bio_integrity_payload *bip = bio->bi_integrity;
498
499 BUG_ON(bip->bip_bio != bio);
500
501 bip->bip_error = error;
502 INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
503 queue_work(kintegrityd_wq, &bip->bip_work);
504}
505EXPORT_SYMBOL(bio_integrity_endio);
506
507/**
508 * bio_integrity_mark_head - Advance bip_vec skip bytes
509 * @bip: Integrity vector to advance
510 * @skip: Number of bytes to advance it
511 */
512void bio_integrity_mark_head(struct bio_integrity_payload *bip,
513 unsigned int skip)
514{
515 struct bio_vec *iv;
516 unsigned int i;
517
518 bip_for_each_vec(iv, bip, i) {
519 if (skip == 0) {
520 bip->bip_idx = i;
521 return;
522 } else if (skip >= iv->bv_len) {
523 skip -= iv->bv_len;
524 } else { /* skip < iv->bv_len) */
525 iv->bv_offset += skip;
526 iv->bv_len -= skip;
527 bip->bip_idx = i;
528 return;
529 }
530 }
531}
532
533/**
534 * bio_integrity_mark_tail - Truncate bip_vec to be len bytes long
535 * @bip: Integrity vector to truncate
536 * @len: New length of integrity vector
537 */
538void bio_integrity_mark_tail(struct bio_integrity_payload *bip,
539 unsigned int len)
540{
541 struct bio_vec *iv;
542 unsigned int i;
543
544 bip_for_each_vec(iv, bip, i) {
545 if (len == 0) {
546 bip->bip_vcnt = i;
547 return;
548 } else if (len >= iv->bv_len) {
549 len -= iv->bv_len;
550 } else { /* len < iv->bv_len) */
551 iv->bv_len = len;
552 len = 0;
553 }
554 }
555}
556
557/**
558 * bio_integrity_advance - Advance integrity vector
559 * @bio: bio whose integrity vector to update
560 * @bytes_done: number of data bytes that have been completed
561 *
562 * Description: This function calculates how many integrity bytes the
563 * number of completed data bytes correspond to and advances the
564 * integrity vector accordingly.
565 */
566void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
567{
568 struct bio_integrity_payload *bip = bio->bi_integrity;
569 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
570 unsigned int nr_sectors;
571
572 BUG_ON(bip == NULL);
573 BUG_ON(bi == NULL);
574
575 nr_sectors = bio_integrity_hw_sectors(bi, bytes_done >> 9);
576 bio_integrity_mark_head(bip, nr_sectors * bi->tuple_size);
577}
578EXPORT_SYMBOL(bio_integrity_advance);
579
580/**
581 * bio_integrity_trim - Trim integrity vector
582 * @bio: bio whose integrity vector to update
583 * @offset: offset to first data sector
584 * @sectors: number of data sectors
585 *
586 * Description: Used to trim the integrity vector in a cloned bio.
587 * The ivec will be advanced corresponding to 'offset' data sectors
588 * and the length will be truncated corresponding to 'len' data
589 * sectors.
590 */
591void bio_integrity_trim(struct bio *bio, unsigned int offset,
592 unsigned int sectors)
593{
594 struct bio_integrity_payload *bip = bio->bi_integrity;
595 struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
596 unsigned int nr_sectors;
597
598 BUG_ON(bip == NULL);
599 BUG_ON(bi == NULL);
600 BUG_ON(!bio_flagged(bio, BIO_CLONED));
601
602 nr_sectors = bio_integrity_hw_sectors(bi, sectors);
603 bip->bip_sector = bip->bip_sector + offset;
604 bio_integrity_mark_head(bip, offset * bi->tuple_size);
605 bio_integrity_mark_tail(bip, sectors * bi->tuple_size);
606}
607EXPORT_SYMBOL(bio_integrity_trim);
608
609/**
610 * bio_integrity_split - Split integrity metadata
611 * @bio: Protected bio
612 * @bp: Resulting bio_pair
613 * @sectors: Offset
614 *
615 * Description: Splits an integrity page into a bio_pair.
616 */
617void bio_integrity_split(struct bio *bio, struct bio_pair *bp, int sectors)
618{
619 struct blk_integrity *bi;
620 struct bio_integrity_payload *bip = bio->bi_integrity;
621 unsigned int nr_sectors;
622
623 if (bio_integrity(bio) == 0)
624 return;
625
626 bi = bdev_get_integrity(bio->bi_bdev);
627 BUG_ON(bi == NULL);
628 BUG_ON(bip->bip_vcnt != 1);
629
630 nr_sectors = bio_integrity_hw_sectors(bi, sectors);
631
632 bp->bio1.bi_integrity = &bp->bip1;
633 bp->bio2.bi_integrity = &bp->bip2;
634
635 bp->iv1 = bip->bip_vec[0];
636 bp->iv2 = bip->bip_vec[0];
637
638 bp->bip1.bip_vec = &bp->iv1;
639 bp->bip2.bip_vec = &bp->iv2;
640
641 bp->iv1.bv_len = sectors * bi->tuple_size;
642 bp->iv2.bv_offset += sectors * bi->tuple_size;
643 bp->iv2.bv_len -= sectors * bi->tuple_size;
644
645 bp->bip1.bip_sector = bio->bi_integrity->bip_sector;
646 bp->bip2.bip_sector = bio->bi_integrity->bip_sector + nr_sectors;
647
648 bp->bip1.bip_vcnt = bp->bip2.bip_vcnt = 1;
649 bp->bip1.bip_idx = bp->bip2.bip_idx = 0;
650}
651EXPORT_SYMBOL(bio_integrity_split);
652
653/**
654 * bio_integrity_clone - Callback for cloning bios with integrity metadata
655 * @bio: New bio
656 * @bio_src: Original bio
657 * @bs: bio_set to allocate bip from
658 *
659 * Description: Called to allocate a bip when cloning a bio
660 */
661int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
662 struct bio_set *bs)
663{
664 struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
665 struct bio_integrity_payload *bip;
666
667 BUG_ON(bip_src == NULL);
668
669 bip = bio_integrity_alloc_bioset(bio, GFP_NOIO, bip_src->bip_vcnt, bs);
670
671 if (bip == NULL)
672 return -EIO;
673
674 memcpy(bip->bip_vec, bip_src->bip_vec,
675 bip_src->bip_vcnt * sizeof(struct bio_vec));
676
677 bip->bip_sector = bip_src->bip_sector;
678 bip->bip_vcnt = bip_src->bip_vcnt;
679 bip->bip_idx = bip_src->bip_idx;
680
681 return 0;
682}
683EXPORT_SYMBOL(bio_integrity_clone);
684
685int bioset_integrity_create(struct bio_set *bs, int pool_size)
686{
687 bs->bio_integrity_pool = mempool_create_slab_pool(pool_size,
688 bio_integrity_slab);
689 if (!bs->bio_integrity_pool)
690 return -1;
691
692 return 0;
693}
694EXPORT_SYMBOL(bioset_integrity_create);
695
696void bioset_integrity_free(struct bio_set *bs)
697{
698 if (bs->bio_integrity_pool)
699 mempool_destroy(bs->bio_integrity_pool);
700}
701EXPORT_SYMBOL(bioset_integrity_free);
702
703void __init bio_integrity_init_slab(void)
704{
705 bio_integrity_slab = KMEM_CACHE(bio_integrity_payload,
706 SLAB_HWCACHE_ALIGN|SLAB_PANIC);
707}
708EXPORT_SYMBOL(bio_integrity_init_slab);
709
710static int __init integrity_init(void)
711{
712 kintegrityd_wq = create_workqueue("kintegrityd");
713
714 if (!kintegrityd_wq)
715 panic("Failed to create kintegrityd\n");
716
717 return 0;
718}
719subsys_initcall(integrity_init);
diff --git a/fs/bio.c b/fs/bio.c
index 78562574cb52..88322b066acb 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -28,25 +28,10 @@
28#include <linux/blktrace_api.h> 28#include <linux/blktrace_api.h>
29#include <scsi/sg.h> /* for struct sg_iovec */ 29#include <scsi/sg.h> /* for struct sg_iovec */
30 30
31#define BIO_POOL_SIZE 2
32
33static struct kmem_cache *bio_slab __read_mostly; 31static struct kmem_cache *bio_slab __read_mostly;
34 32
35#define BIOVEC_NR_POOLS 6
36
37/*
38 * a small number of entries is fine, not going to be performance critical.
39 * basically we just need to survive
40 */
41#define BIO_SPLIT_ENTRIES 2
42mempool_t *bio_split_pool __read_mostly; 33mempool_t *bio_split_pool __read_mostly;
43 34
44struct biovec_slab {
45 int nr_vecs;
46 char *name;
47 struct kmem_cache *slab;
48};
49
50/* 35/*
51 * if you change this list, also change bvec_alloc or things will 36 * if you change this list, also change bvec_alloc or things will
52 * break badly! cannot be bigger than what you can fit into an 37 * break badly! cannot be bigger than what you can fit into an
@@ -60,23 +45,17 @@ static struct biovec_slab bvec_slabs[BIOVEC_NR_POOLS] __read_mostly = {
60#undef BV 45#undef BV
61 46
62/* 47/*
63 * bio_set is used to allow other portions of the IO system to
64 * allocate their own private memory pools for bio and iovec structures.
65 * These memory pools in turn all allocate from the bio_slab
66 * and the bvec_slabs[].
67 */
68struct bio_set {
69 mempool_t *bio_pool;
70 mempool_t *bvec_pools[BIOVEC_NR_POOLS];
71};
72
73/*
74 * fs_bio_set is the bio_set containing bio and iovec memory pools used by 48 * fs_bio_set is the bio_set containing bio and iovec memory pools used by
75 * IO code that does not need private memory pools. 49 * IO code that does not need private memory pools.
76 */ 50 */
77static struct bio_set *fs_bio_set; 51struct bio_set *fs_bio_set;
52
53unsigned int bvec_nr_vecs(unsigned short idx)
54{
55 return bvec_slabs[idx].nr_vecs;
56}
78 57
79static inline struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs) 58struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct bio_set *bs)
80{ 59{
81 struct bio_vec *bvl; 60 struct bio_vec *bvl;
82 61
@@ -117,6 +96,9 @@ void bio_free(struct bio *bio, struct bio_set *bio_set)
117 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]); 96 mempool_free(bio->bi_io_vec, bio_set->bvec_pools[pool_idx]);
118 } 97 }
119 98
99 if (bio_integrity(bio))
100 bio_integrity_free(bio, bio_set);
101
120 mempool_free(bio, bio_set->bio_pool); 102 mempool_free(bio, bio_set->bio_pool);
121} 103}
122 104
@@ -275,9 +257,19 @@ struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask)
275{ 257{
276 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set); 258 struct bio *b = bio_alloc_bioset(gfp_mask, bio->bi_max_vecs, fs_bio_set);
277 259
278 if (b) { 260 if (!b)
279 b->bi_destructor = bio_fs_destructor; 261 return NULL;
280 __bio_clone(b, bio); 262
263 b->bi_destructor = bio_fs_destructor;
264 __bio_clone(b, bio);
265
266 if (bio_integrity(bio)) {
267 int ret;
268
269 ret = bio_integrity_clone(b, bio, fs_bio_set);
270
271 if (ret < 0)
272 return NULL;
281 } 273 }
282 274
283 return b; 275 return b;
@@ -333,10 +325,19 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
333 if (page == prev->bv_page && 325 if (page == prev->bv_page &&
334 offset == prev->bv_offset + prev->bv_len) { 326 offset == prev->bv_offset + prev->bv_len) {
335 prev->bv_len += len; 327 prev->bv_len += len;
336 if (q->merge_bvec_fn && 328
337 q->merge_bvec_fn(q, bio, prev) < len) { 329 if (q->merge_bvec_fn) {
338 prev->bv_len -= len; 330 struct bvec_merge_data bvm = {
339 return 0; 331 .bi_bdev = bio->bi_bdev,
332 .bi_sector = bio->bi_sector,
333 .bi_size = bio->bi_size,
334 .bi_rw = bio->bi_rw,
335 };
336
337 if (q->merge_bvec_fn(q, &bvm, prev) < len) {
338 prev->bv_len -= len;
339 return 0;
340 }
340 } 341 }
341 342
342 goto done; 343 goto done;
@@ -377,11 +378,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
377 * queue to get further control 378 * queue to get further control
378 */ 379 */
379 if (q->merge_bvec_fn) { 380 if (q->merge_bvec_fn) {
381 struct bvec_merge_data bvm = {
382 .bi_bdev = bio->bi_bdev,
383 .bi_sector = bio->bi_sector,
384 .bi_size = bio->bi_size,
385 .bi_rw = bio->bi_rw,
386 };
387
380 /* 388 /*
381 * merge_bvec_fn() returns number of bytes it can accept 389 * merge_bvec_fn() returns number of bytes it can accept
382 * at this offset 390 * at this offset
383 */ 391 */
384 if (q->merge_bvec_fn(q, bio, bvec) < len) { 392 if (q->merge_bvec_fn(q, &bvm, bvec) < len) {
385 bvec->bv_page = NULL; 393 bvec->bv_page = NULL;
386 bvec->bv_len = 0; 394 bvec->bv_len = 0;
387 bvec->bv_offset = 0; 395 bvec->bv_offset = 0;
@@ -1249,6 +1257,9 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors)
1249 bp->bio1.bi_private = bi; 1257 bp->bio1.bi_private = bi;
1250 bp->bio2.bi_private = pool; 1258 bp->bio2.bi_private = pool;
1251 1259
1260 if (bio_integrity(bi))
1261 bio_integrity_split(bi, bp, first_sectors);
1262
1252 return bp; 1263 return bp;
1253} 1264}
1254 1265
@@ -1290,6 +1301,7 @@ void bioset_free(struct bio_set *bs)
1290 if (bs->bio_pool) 1301 if (bs->bio_pool)
1291 mempool_destroy(bs->bio_pool); 1302 mempool_destroy(bs->bio_pool);
1292 1303
1304 bioset_integrity_free(bs);
1293 biovec_free_pools(bs); 1305 biovec_free_pools(bs);
1294 1306
1295 kfree(bs); 1307 kfree(bs);
@@ -1306,6 +1318,9 @@ struct bio_set *bioset_create(int bio_pool_size, int bvec_pool_size)
1306 if (!bs->bio_pool) 1318 if (!bs->bio_pool)
1307 goto bad; 1319 goto bad;
1308 1320
1321 if (bioset_integrity_create(bs, bio_pool_size))
1322 goto bad;
1323
1309 if (!biovec_create_pools(bs, bvec_pool_size)) 1324 if (!biovec_create_pools(bs, bvec_pool_size))
1310 return bs; 1325 return bs;
1311 1326
@@ -1332,6 +1347,7 @@ static int __init init_bio(void)
1332{ 1347{
1333 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC); 1348 bio_slab = KMEM_CACHE(bio, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
1334 1349
1350 bio_integrity_init_slab();
1335 biovec_init_slabs(); 1351 biovec_init_slabs();
1336 1352
1337 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2); 1353 fs_bio_set = bioset_create(BIO_POOL_SIZE, 2);
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
index 9590b9024300..78f613cb9c76 100644
--- a/fs/ramfs/file-mmu.c
+++ b/fs/ramfs/file-mmu.c
@@ -45,6 +45,7 @@ const struct file_operations ramfs_file_operations = {
45 .mmap = generic_file_mmap, 45 .mmap = generic_file_mmap,
46 .fsync = simple_sync_file, 46 .fsync = simple_sync_file,
47 .splice_read = generic_file_splice_read, 47 .splice_read = generic_file_splice_read,
48 .splice_write = generic_file_splice_write,
48 .llseek = generic_file_llseek, 49 .llseek = generic_file_llseek,
49}; 50};
50 51
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 0989bc2c2f69..52312ec93ff4 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -43,6 +43,7 @@ const struct file_operations ramfs_file_operations = {
43 .aio_write = generic_file_aio_write, 43 .aio_write = generic_file_aio_write,
44 .fsync = simple_sync_file, 44 .fsync = simple_sync_file,
45 .splice_read = generic_file_splice_read, 45 .splice_read = generic_file_splice_read,
46 .splice_write = generic_file_splice_write,
46 .llseek = generic_file_llseek, 47 .llseek = generic_file_llseek,
47}; 48};
48 49
diff --git a/fs/splice.c b/fs/splice.c
index aa5f6f60b305..399442179d89 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -379,13 +379,22 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
379 lock_page(page); 379 lock_page(page);
380 380
381 /* 381 /*
382 * page was truncated, stop here. if this isn't the 382 * Page was truncated, or invalidated by the
383 * first page, we'll just complete what we already 383 * filesystem. Redo the find/create, but this time the
384 * added 384 * page is kept locked, so there's no chance of another
385 * race with truncate/invalidate.
385 */ 386 */
386 if (!page->mapping) { 387 if (!page->mapping) {
387 unlock_page(page); 388 unlock_page(page);
388 break; 389 page = find_or_create_page(mapping, index,
390 mapping_gfp_mask(mapping));
391
392 if (!page) {
393 error = -ENOMEM;
394 break;
395 }
396 page_cache_release(pages[page_nr]);
397 pages[page_nr] = page;
389 } 398 }
390 /* 399 /*
391 * page was already under io and is now done, great 400 * page was already under io and is now done, great