aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-12-08 16:58:54 -0500
committerChris Mason <chris.mason@oracle.com>2008-12-08 16:58:54 -0500
commitd20f7043fa65659136c1a7c3c456eeeb5c6f431f (patch)
tree05d1031cadec6d440a97221e3a32adb504a51699
parentc99e905c945c462085c6d64646dc5af0c0a16815 (diff)
Btrfs: move data checksumming into a dedicated tree
Btrfs stores checksums for each data block. Until now, they have been stored in the subvolume trees, indexed by the inode that is referencing the data block. This means that when we read the inode, we've probably read in at least some checksums as well. But, this has a few problems: * The checksums are indexed by logical offset in the file. When compression is on, this means we have to do the expensive checksumming on the uncompressed data. It would be faster if we could checksum the compressed data instead. * If we implement encryption, we'll be checksumming the plain text and storing that on disk. This is significantly less secure. * For either compression or encryption, we have to get the plain text back before we can verify the checksum as correct. This makes the raid layer balancing and extent moving much more expensive. * It makes the front end caching code more complex, as we have touch the subvolume and inodes as we cache extents. * There is potentitally one copy of the checksum in each subvolume referencing an extent. The solution used here is to store the extent checksums in a dedicated tree. This allows us to index the checksums by phyiscal extent start and length. It means: * The checksum is against the data stored on disk, after any compression or encryption is done. * The checksum is stored in a central location, and can be verified without following back references, or reading inodes. This makes compression significantly faster by reducing the amount of data that needs to be checksummed. It will also allow much faster raid management code in general. The checksums are indexed by a key with a fixed objectid (a magic value in ctree.h) and offset set to the starting byte of the extent. This allows us to copy the checksum items into the fsync log tree directly (or any other tree), without having to invent a second format for them. Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/compression.c124
-rw-r--r--fs/btrfs/ctree.h30
-rw-r--r--fs/btrfs/disk-io.c45
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/file-item.c185
-rw-r--r--fs/btrfs/inode.c45
-rw-r--r--fs/btrfs/ioctl.c55
-rw-r--r--fs/btrfs/ordered-data.c7
-rw-r--r--fs/btrfs/ordered-data.h10
-rw-r--r--fs/btrfs/tree-log.c121
-rw-r--r--fs/btrfs/volumes.c1
11 files changed, 387 insertions, 241 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 4febe2eb0b83..ad7274137309 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -69,11 +69,27 @@ struct compressed_bio {
69 69
70 /* IO errors */ 70 /* IO errors */
71 int errors; 71 int errors;
72 int mirror_num;
72 73
73 /* for reads, this is the bio we are copying the data into */ 74 /* for reads, this is the bio we are copying the data into */
74 struct bio *orig_bio; 75 struct bio *orig_bio;
76
77 /*
78 * the start of a variable length array of checksums only
79 * used by reads
80 */
81 u32 sums;
75}; 82};
76 83
84static inline int compressed_bio_size(struct btrfs_root *root,
85 unsigned long disk_size)
86{
87 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
88 return sizeof(struct compressed_bio) +
89 ((disk_size + root->sectorsize - 1) / root->sectorsize) *
90 csum_size;
91}
92
77static struct bio *compressed_bio_alloc(struct block_device *bdev, 93static struct bio *compressed_bio_alloc(struct block_device *bdev,
78 u64 first_byte, gfp_t gfp_flags) 94 u64 first_byte, gfp_t gfp_flags)
79{ 95{
@@ -96,6 +112,47 @@ static struct bio *compressed_bio_alloc(struct block_device *bdev,
96 return bio; 112 return bio;
97} 113}
98 114
115static int check_compressed_csum(struct inode *inode,
116 struct compressed_bio *cb,
117 u64 disk_start)
118{
119 int ret;
120 struct btrfs_root *root = BTRFS_I(inode)->root;
121 struct page *page;
122 unsigned long i;
123 char *kaddr;
124 u32 csum;
125 u32 *cb_sum = &cb->sums;
126
127 if (btrfs_test_opt(root, NODATASUM) ||
128 btrfs_test_flag(inode, NODATASUM))
129 return 0;
130
131 for (i = 0; i < cb->nr_pages; i++) {
132 page = cb->compressed_pages[i];
133 csum = ~(u32)0;
134
135 kaddr = kmap_atomic(page, KM_USER0);
136 csum = btrfs_csum_data(root, kaddr, csum, PAGE_CACHE_SIZE);
137 btrfs_csum_final(csum, (char *)&csum);
138 kunmap_atomic(kaddr, KM_USER0);
139
140 if (csum != *cb_sum) {
141 printk("btrfs csum failed ino %lu extent %llu csum %u "
142 "wanted %u mirror %d\n", inode->i_ino,
143 (unsigned long long)disk_start,
144 csum, *cb_sum, cb->mirror_num);
145 ret = -EIO;
146 goto fail;
147 }
148 cb_sum++;
149
150 }
151 ret = 0;
152fail:
153 return ret;
154}
155
99/* when we finish reading compressed pages from the disk, we 156/* when we finish reading compressed pages from the disk, we
100 * decompress them and then run the bio end_io routines on the 157 * decompress them and then run the bio end_io routines on the
101 * decompressed pages (in the inode address space). 158 * decompressed pages (in the inode address space).
@@ -124,16 +181,21 @@ static void end_compressed_bio_read(struct bio *bio, int err)
124 if (!atomic_dec_and_test(&cb->pending_bios)) 181 if (!atomic_dec_and_test(&cb->pending_bios))
125 goto out; 182 goto out;
126 183
184 inode = cb->inode;
185 ret = check_compressed_csum(inode, cb, (u64)bio->bi_sector << 9);
186 if (ret)
187 goto csum_failed;
188
127 /* ok, we're the last bio for this extent, lets start 189 /* ok, we're the last bio for this extent, lets start
128 * the decompression. 190 * the decompression.
129 */ 191 */
130 inode = cb->inode;
131 tree = &BTRFS_I(inode)->io_tree; 192 tree = &BTRFS_I(inode)->io_tree;
132 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, 193 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
133 cb->start, 194 cb->start,
134 cb->orig_bio->bi_io_vec, 195 cb->orig_bio->bi_io_vec,
135 cb->orig_bio->bi_vcnt, 196 cb->orig_bio->bi_vcnt,
136 cb->compressed_len); 197 cb->compressed_len);
198csum_failed:
137 if (ret) 199 if (ret)
138 cb->errors = 1; 200 cb->errors = 1;
139 201
@@ -148,8 +210,21 @@ static void end_compressed_bio_read(struct bio *bio, int err)
148 /* do io completion on the original bio */ 210 /* do io completion on the original bio */
149 if (cb->errors) { 211 if (cb->errors) {
150 bio_io_error(cb->orig_bio); 212 bio_io_error(cb->orig_bio);
151 } else 213 } else {
214 int bio_index = 0;
215 struct bio_vec *bvec = cb->orig_bio->bi_io_vec;
216
217 /*
218 * we have verified the checksum already, set page
219 * checked so the end_io handlers know about it
220 */
221 while(bio_index < cb->orig_bio->bi_vcnt) {
222 SetPageChecked(bvec->bv_page);
223 bvec++;
224 bio_index++;
225 }
152 bio_endio(cb->orig_bio, 0); 226 bio_endio(cb->orig_bio, 0);
227 }
153 228
154 /* finally free the cb struct */ 229 /* finally free the cb struct */
155 kfree(cb->compressed_pages); 230 kfree(cb->compressed_pages);
@@ -277,12 +352,13 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
277 int ret; 352 int ret;
278 353
279 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1)); 354 WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
280 cb = kmalloc(sizeof(*cb), GFP_NOFS); 355 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
281 atomic_set(&cb->pending_bios, 0); 356 atomic_set(&cb->pending_bios, 0);
282 cb->errors = 0; 357 cb->errors = 0;
283 cb->inode = inode; 358 cb->inode = inode;
284 cb->start = start; 359 cb->start = start;
285 cb->len = len; 360 cb->len = len;
361 cb->mirror_num = 0;
286 cb->compressed_pages = compressed_pages; 362 cb->compressed_pages = compressed_pages;
287 cb->compressed_len = compressed_len; 363 cb->compressed_len = compressed_len;
288 cb->orig_bio = NULL; 364 cb->orig_bio = NULL;
@@ -290,9 +366,6 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
290 366
291 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev; 367 bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
292 368
293 ret = btrfs_csum_file_bytes(root, inode, start, len);
294 BUG_ON(ret);
295
296 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); 369 bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS);
297 bio->bi_private = cb; 370 bio->bi_private = cb;
298 bio->bi_end_io = end_compressed_bio_write; 371 bio->bi_end_io = end_compressed_bio_write;
@@ -325,6 +398,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
325 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 398 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
326 BUG_ON(ret); 399 BUG_ON(ret);
327 400
401 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
402 BUG_ON(ret);
403
328 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 404 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
329 BUG_ON(ret); 405 BUG_ON(ret);
330 406
@@ -348,6 +424,9 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
348 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 424 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
349 BUG_ON(ret); 425 BUG_ON(ret);
350 426
427 ret = btrfs_csum_one_bio(root, inode, bio, start, 1);
428 BUG_ON(ret);
429
351 ret = btrfs_map_bio(root, WRITE, bio, 0, 1); 430 ret = btrfs_map_bio(root, WRITE, bio, 0, 1);
352 BUG_ON(ret); 431 BUG_ON(ret);
353 432
@@ -510,6 +589,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
510 u64 em_start; 589 u64 em_start;
511 struct extent_map *em; 590 struct extent_map *em;
512 int ret; 591 int ret;
592 u32 *sums;
513 593
514 tree = &BTRFS_I(inode)->io_tree; 594 tree = &BTRFS_I(inode)->io_tree;
515 em_tree = &BTRFS_I(inode)->extent_tree; 595 em_tree = &BTRFS_I(inode)->extent_tree;
@@ -521,15 +601,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
521 PAGE_CACHE_SIZE); 601 PAGE_CACHE_SIZE);
522 spin_unlock(&em_tree->lock); 602 spin_unlock(&em_tree->lock);
523 603
524 cb = kmalloc(sizeof(*cb), GFP_NOFS); 604 compressed_len = em->block_len;
605 cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
525 atomic_set(&cb->pending_bios, 0); 606 atomic_set(&cb->pending_bios, 0);
526 cb->errors = 0; 607 cb->errors = 0;
527 cb->inode = inode; 608 cb->inode = inode;
609 cb->mirror_num = mirror_num;
610 sums = &cb->sums;
528 611
529 cb->start = em->orig_start; 612 cb->start = em->orig_start;
530 compressed_len = em->block_len;
531 em_len = em->len; 613 em_len = em->len;
532 em_start = em->start; 614 em_start = em->start;
615
533 free_extent_map(em); 616 free_extent_map(em);
534 em = NULL; 617 em = NULL;
535 618
@@ -551,11 +634,6 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
551 634
552 add_ra_bio_pages(inode, em_start + em_len, cb); 635 add_ra_bio_pages(inode, em_start + em_len, cb);
553 636
554 if (!btrfs_test_opt(root, NODATASUM) &&
555 !btrfs_test_flag(inode, NODATASUM)) {
556 btrfs_lookup_bio_sums(root, inode, cb->orig_bio);
557 }
558
559 /* include any pages we added in add_ra-bio_pages */ 637 /* include any pages we added in add_ra-bio_pages */
560 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE; 638 uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
561 cb->len = uncompressed_len; 639 cb->len = uncompressed_len;
@@ -568,6 +646,8 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
568 for (page_index = 0; page_index < nr_pages; page_index++) { 646 for (page_index = 0; page_index < nr_pages; page_index++) {
569 page = cb->compressed_pages[page_index]; 647 page = cb->compressed_pages[page_index];
570 page->mapping = inode->i_mapping; 648 page->mapping = inode->i_mapping;
649 page->index = em_start >> PAGE_CACHE_SHIFT;
650
571 if (comp_bio->bi_size) 651 if (comp_bio->bi_size)
572 ret = tree->ops->merge_bio_hook(page, 0, 652 ret = tree->ops->merge_bio_hook(page, 0,
573 PAGE_CACHE_SIZE, 653 PAGE_CACHE_SIZE,
@@ -591,7 +671,16 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
591 */ 671 */
592 atomic_inc(&cb->pending_bios); 672 atomic_inc(&cb->pending_bios);
593 673
594 ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); 674 if (!btrfs_test_opt(root, NODATASUM) &&
675 !btrfs_test_flag(inode, NODATASUM)) {
676 btrfs_lookup_bio_sums(root, inode, comp_bio,
677 sums);
678 }
679 sums += (comp_bio->bi_size + root->sectorsize - 1) /
680 root->sectorsize;
681
682 ret = btrfs_map_bio(root, READ, comp_bio,
683 mirror_num, 0);
595 BUG_ON(ret); 684 BUG_ON(ret);
596 685
597 bio_put(comp_bio); 686 bio_put(comp_bio);
@@ -610,7 +699,12 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
610 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); 699 ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0);
611 BUG_ON(ret); 700 BUG_ON(ret);
612 701
613 ret = btrfs_map_bio(root, READ, comp_bio, 0, 0); 702 if (!btrfs_test_opt(root, NODATASUM) &&
703 !btrfs_test_flag(inode, NODATASUM)) {
704 btrfs_lookup_bio_sums(root, inode, comp_bio, sums);
705 }
706
707 ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
614 BUG_ON(ret); 708 BUG_ON(ret);
615 709
616 bio_put(comp_bio); 710 bio_put(comp_bio);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 96f2ec7ad5bd..242b961ae6de 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -73,6 +73,9 @@ struct btrfs_ordered_sum;
73/* directory objectid inside the root tree */ 73/* directory objectid inside the root tree */
74#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL 74#define BTRFS_ROOT_TREE_DIR_OBJECTID 6ULL
75 75
76/* holds checksums of all the data extents */
77#define BTRFS_CSUM_TREE_OBJECTID 7ULL
78
76/* orhpan objectid for tracking unlinked/truncated files */ 79/* orhpan objectid for tracking unlinked/truncated files */
77#define BTRFS_ORPHAN_OBJECTID -5ULL 80#define BTRFS_ORPHAN_OBJECTID -5ULL
78 81
@@ -84,6 +87,13 @@ struct btrfs_ordered_sum;
84#define BTRFS_TREE_RELOC_OBJECTID -8ULL 87#define BTRFS_TREE_RELOC_OBJECTID -8ULL
85#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL 88#define BTRFS_DATA_RELOC_TREE_OBJECTID -9ULL
86 89
90/*
91 * extent checksums all have this objectid
92 * this allows them to share the logging tree
93 * for fsyncs
94 */
95#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
96
87/* dummy objectid represents multiple objectids */ 97/* dummy objectid represents multiple objectids */
88#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 98#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
89 99
@@ -634,6 +644,7 @@ struct btrfs_fs_info {
634 struct btrfs_root *chunk_root; 644 struct btrfs_root *chunk_root;
635 struct btrfs_root *dev_root; 645 struct btrfs_root *dev_root;
636 struct btrfs_root *fs_root; 646 struct btrfs_root *fs_root;
647 struct btrfs_root *csum_root;
637 648
638 /* the log root tree is a directory of all the other log roots */ 649 /* the log root tree is a directory of all the other log roots */
639 struct btrfs_root *log_root_tree; 650 struct btrfs_root *log_root_tree;
@@ -716,6 +727,7 @@ struct btrfs_fs_info {
716 struct btrfs_workers workers; 727 struct btrfs_workers workers;
717 struct btrfs_workers delalloc_workers; 728 struct btrfs_workers delalloc_workers;
718 struct btrfs_workers endio_workers; 729 struct btrfs_workers endio_workers;
730 struct btrfs_workers endio_meta_workers;
719 struct btrfs_workers endio_write_workers; 731 struct btrfs_workers endio_write_workers;
720 struct btrfs_workers submit_workers; 732 struct btrfs_workers submit_workers;
721 /* 733 /*
@@ -858,13 +870,12 @@ struct btrfs_root {
858 * extent data is for file data 870 * extent data is for file data
859 */ 871 */
860#define BTRFS_EXTENT_DATA_KEY 108 872#define BTRFS_EXTENT_DATA_KEY 108
873
861/* 874/*
862 * csum items have the checksums for data in the extents 875 * extent csums are stored in a separate tree and hold csums for
876 * an entire extent on disk.
863 */ 877 */
864#define BTRFS_CSUM_ITEM_KEY 120 878#define BTRFS_EXTENT_CSUM_KEY 128
865
866
867/* reserve 21-31 for other file/dir stuff */
868 879
869/* 880/*
870 * root items point to tree roots. There are typically in the root 881 * root items point to tree roots. There are typically in the root
@@ -1917,7 +1928,7 @@ int btrfs_lookup_inode(struct btrfs_trans_handle *trans, struct btrfs_root
1917 1928
1918/* file-item.c */ 1929/* file-item.c */
1919int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 1930int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
1920 struct bio *bio); 1931 struct bio *bio, u32 *dst);
1921int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, 1932int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
1922 struct btrfs_root *root, 1933 struct btrfs_root *root,
1923 u64 objectid, u64 pos, 1934 u64 objectid, u64 pos,
@@ -1929,17 +1940,16 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
1929 struct btrfs_path *path, u64 objectid, 1940 struct btrfs_path *path, u64 objectid,
1930 u64 bytenr, int mod); 1941 u64 bytenr, int mod);
1931int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 1942int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
1932 struct btrfs_root *root, struct inode *inode, 1943 struct btrfs_root *root,
1933 struct btrfs_ordered_sum *sums); 1944 struct btrfs_ordered_sum *sums);
1934int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 1945int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
1935 struct bio *bio); 1946 struct bio *bio, u64 file_start, int contig);
1936int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode, 1947int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
1937 u64 start, unsigned long len); 1948 u64 start, unsigned long len);
1938struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 1949struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
1939 struct btrfs_root *root, 1950 struct btrfs_root *root,
1940 struct btrfs_path *path, 1951 struct btrfs_path *path,
1941 u64 objectid, u64 offset, 1952 u64 bytenr, int cow);
1942 int cow);
1943int btrfs_csum_truncate(struct btrfs_trans_handle *trans, 1953int btrfs_csum_truncate(struct btrfs_trans_handle *trans,
1944 struct btrfs_root *root, struct btrfs_path *path, 1954 struct btrfs_root *root, struct btrfs_path *path,
1945 u64 isize); 1955 u64 isize);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3eb7c2576fe5..61dc3b2c834b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -445,11 +445,18 @@ static void end_workqueue_bio(struct bio *bio, int err)
445 end_io_wq->error = err; 445 end_io_wq->error = err;
446 end_io_wq->work.func = end_workqueue_fn; 446 end_io_wq->work.func = end_workqueue_fn;
447 end_io_wq->work.flags = 0; 447 end_io_wq->work.flags = 0;
448 if (bio->bi_rw & (1 << BIO_RW)) 448
449 if (bio->bi_rw & (1 << BIO_RW)) {
449 btrfs_queue_worker(&fs_info->endio_write_workers, 450 btrfs_queue_worker(&fs_info->endio_write_workers,
450 &end_io_wq->work); 451 &end_io_wq->work);
451 else 452 } else {
452 btrfs_queue_worker(&fs_info->endio_workers, &end_io_wq->work); 453 if (end_io_wq->metadata)
454 btrfs_queue_worker(&fs_info->endio_meta_workers,
455 &end_io_wq->work);
456 else
457 btrfs_queue_worker(&fs_info->endio_workers,
458 &end_io_wq->work);
459 }
453} 460}
454 461
455int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 462int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
@@ -1208,6 +1215,9 @@ static void __unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
1208 info = (struct btrfs_fs_info *)bdi->unplug_io_data; 1215 info = (struct btrfs_fs_info *)bdi->unplug_io_data;
1209 list_for_each(cur, &info->fs_devices->devices) { 1216 list_for_each(cur, &info->fs_devices->devices) {
1210 device = list_entry(cur, struct btrfs_device, dev_list); 1217 device = list_entry(cur, struct btrfs_device, dev_list);
1218 if (!device->bdev)
1219 continue;
1220
1211 bdi = blk_get_backing_dev_info(device->bdev); 1221 bdi = blk_get_backing_dev_info(device->bdev);
1212 if (bdi->unplug_io_fn) { 1222 if (bdi->unplug_io_fn) {
1213 bdi->unplug_io_fn(bdi, page); 1223 bdi->unplug_io_fn(bdi, page);
@@ -1344,7 +1354,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
1344 * blocksize <= pagesize, it is basically a noop 1354 * blocksize <= pagesize, it is basically a noop
1345 */ 1355 */
1346 if (end_io_wq->metadata && !bio_ready_for_csum(bio)) { 1356 if (end_io_wq->metadata && !bio_ready_for_csum(bio)) {
1347 btrfs_queue_worker(&fs_info->endio_workers, 1357 btrfs_queue_worker(&fs_info->endio_meta_workers,
1348 &end_io_wq->work); 1358 &end_io_wq->work);
1349 return; 1359 return;
1350 } 1360 }
@@ -1454,6 +1464,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1454 struct buffer_head *bh; 1464 struct buffer_head *bh;
1455 struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root), 1465 struct btrfs_root *extent_root = kzalloc(sizeof(struct btrfs_root),
1456 GFP_NOFS); 1466 GFP_NOFS);
1467 struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
1468 GFP_NOFS);
1457 struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root), 1469 struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
1458 GFP_NOFS); 1470 GFP_NOFS);
1459 struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info), 1471 struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
@@ -1470,7 +1482,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1470 struct btrfs_super_block *disk_super; 1482 struct btrfs_super_block *disk_super;
1471 1483
1472 if (!extent_root || !tree_root || !fs_info || 1484 if (!extent_root || !tree_root || !fs_info ||
1473 !chunk_root || !dev_root) { 1485 !chunk_root || !dev_root || !csum_root) {
1474 err = -ENOMEM; 1486 err = -ENOMEM;
1475 goto fail; 1487 goto fail;
1476 } 1488 }
@@ -1487,6 +1499,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1487 init_completion(&fs_info->kobj_unregister); 1499 init_completion(&fs_info->kobj_unregister);
1488 fs_info->tree_root = tree_root; 1500 fs_info->tree_root = tree_root;
1489 fs_info->extent_root = extent_root; 1501 fs_info->extent_root = extent_root;
1502 fs_info->csum_root = csum_root;
1490 fs_info->chunk_root = chunk_root; 1503 fs_info->chunk_root = chunk_root;
1491 fs_info->dev_root = dev_root; 1504 fs_info->dev_root = dev_root;
1492 fs_info->fs_devices = fs_devices; 1505 fs_info->fs_devices = fs_devices;
@@ -1652,6 +1665,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1652 btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1); 1665 btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1);
1653 btrfs_init_workers(&fs_info->endio_workers, "endio", 1666 btrfs_init_workers(&fs_info->endio_workers, "endio",
1654 fs_info->thread_pool_size); 1667 fs_info->thread_pool_size);
1668 btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
1669 fs_info->thread_pool_size);
1655 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1670 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
1656 fs_info->thread_pool_size); 1671 fs_info->thread_pool_size);
1657 1672
@@ -1667,6 +1682,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1667 btrfs_start_workers(&fs_info->delalloc_workers, 1); 1682 btrfs_start_workers(&fs_info->delalloc_workers, 1);
1668 btrfs_start_workers(&fs_info->fixup_workers, 1); 1683 btrfs_start_workers(&fs_info->fixup_workers, 1);
1669 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size); 1684 btrfs_start_workers(&fs_info->endio_workers, fs_info->thread_pool_size);
1685 btrfs_start_workers(&fs_info->endio_meta_workers,
1686 fs_info->thread_pool_size);
1670 btrfs_start_workers(&fs_info->endio_write_workers, 1687 btrfs_start_workers(&fs_info->endio_write_workers,
1671 fs_info->thread_pool_size); 1688 fs_info->thread_pool_size);
1672 1689
@@ -1751,6 +1768,13 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1751 if (ret) 1768 if (ret)
1752 goto fail_extent_root; 1769 goto fail_extent_root;
1753 1770
1771 ret = find_and_setup_root(tree_root, fs_info,
1772 BTRFS_CSUM_TREE_OBJECTID, csum_root);
1773 if (ret)
1774 goto fail_extent_root;
1775
1776 csum_root->track_dirty = 1;
1777
1754 btrfs_read_block_groups(extent_root); 1778 btrfs_read_block_groups(extent_root);
1755 1779
1756 fs_info->generation = generation + 1; 1780 fs_info->generation = generation + 1;
@@ -1761,7 +1785,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1761 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 1785 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
1762 "btrfs-cleaner"); 1786 "btrfs-cleaner");
1763 if (!fs_info->cleaner_kthread) 1787 if (!fs_info->cleaner_kthread)
1764 goto fail_extent_root; 1788 goto fail_csum_root;
1765 1789
1766 fs_info->transaction_kthread = kthread_run(transaction_kthread, 1790 fs_info->transaction_kthread = kthread_run(transaction_kthread,
1767 tree_root, 1791 tree_root,
@@ -1825,6 +1849,8 @@ fail_cleaner:
1825 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 1849 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
1826 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 1850 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
1827 1851
1852fail_csum_root:
1853 free_extent_buffer(csum_root->node);
1828fail_extent_root: 1854fail_extent_root:
1829 free_extent_buffer(extent_root->node); 1855 free_extent_buffer(extent_root->node);
1830fail_tree_root: 1856fail_tree_root:
@@ -1838,6 +1864,7 @@ fail_sb_buffer:
1838 btrfs_stop_workers(&fs_info->delalloc_workers); 1864 btrfs_stop_workers(&fs_info->delalloc_workers);
1839 btrfs_stop_workers(&fs_info->workers); 1865 btrfs_stop_workers(&fs_info->workers);
1840 btrfs_stop_workers(&fs_info->endio_workers); 1866 btrfs_stop_workers(&fs_info->endio_workers);
1867 btrfs_stop_workers(&fs_info->endio_meta_workers);
1841 btrfs_stop_workers(&fs_info->endio_write_workers); 1868 btrfs_stop_workers(&fs_info->endio_write_workers);
1842 btrfs_stop_workers(&fs_info->submit_workers); 1869 btrfs_stop_workers(&fs_info->submit_workers);
1843fail_iput: 1870fail_iput:
@@ -1853,6 +1880,7 @@ fail:
1853 kfree(fs_info); 1880 kfree(fs_info);
1854 kfree(chunk_root); 1881 kfree(chunk_root);
1855 kfree(dev_root); 1882 kfree(dev_root);
1883 kfree(csum_root);
1856 return ERR_PTR(err); 1884 return ERR_PTR(err);
1857} 1885}
1858 1886
@@ -2131,6 +2159,9 @@ int close_ctree(struct btrfs_root *root)
2131 if (root->fs_info->dev_root->node); 2159 if (root->fs_info->dev_root->node);
2132 free_extent_buffer(root->fs_info->dev_root->node); 2160 free_extent_buffer(root->fs_info->dev_root->node);
2133 2161
2162 if (root->fs_info->csum_root->node);
2163 free_extent_buffer(root->fs_info->csum_root->node);
2164
2134 btrfs_free_block_groups(root->fs_info); 2165 btrfs_free_block_groups(root->fs_info);
2135 2166
2136 del_fs_roots(fs_info); 2167 del_fs_roots(fs_info);
@@ -2141,6 +2172,7 @@ int close_ctree(struct btrfs_root *root)
2141 btrfs_stop_workers(&fs_info->delalloc_workers); 2172 btrfs_stop_workers(&fs_info->delalloc_workers);
2142 btrfs_stop_workers(&fs_info->workers); 2173 btrfs_stop_workers(&fs_info->workers);
2143 btrfs_stop_workers(&fs_info->endio_workers); 2174 btrfs_stop_workers(&fs_info->endio_workers);
2175 btrfs_stop_workers(&fs_info->endio_meta_workers);
2144 btrfs_stop_workers(&fs_info->endio_write_workers); 2176 btrfs_stop_workers(&fs_info->endio_write_workers);
2145 btrfs_stop_workers(&fs_info->submit_workers); 2177 btrfs_stop_workers(&fs_info->submit_workers);
2146 2178
@@ -2163,6 +2195,7 @@ int close_ctree(struct btrfs_root *root)
2163 kfree(fs_info->tree_root); 2195 kfree(fs_info->tree_root);
2164 kfree(fs_info->chunk_root); 2196 kfree(fs_info->chunk_root);
2165 kfree(fs_info->dev_root); 2197 kfree(fs_info->dev_root);
2198 kfree(fs_info->csum_root);
2166 return 0; 2199 return 0;
2167} 2200}
2168 2201
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c3dfe2a0ec85..7449ecf32c50 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1732,6 +1732,9 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1732 int whole_page; 1732 int whole_page;
1733 int ret; 1733 int ret;
1734 1734
1735 if (err)
1736 uptodate = 0;
1737
1735 do { 1738 do {
1736 struct page *page = bvec->bv_page; 1739 struct page *page = bvec->bv_page;
1737 tree = &BTRFS_I(page->mapping->host)->io_tree; 1740 tree = &BTRFS_I(page->mapping->host)->io_tree;
@@ -1761,6 +1764,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1761 if (ret == 0) { 1764 if (ret == 0) {
1762 uptodate = 1765 uptodate =
1763 test_bit(BIO_UPTODATE, &bio->bi_flags); 1766 test_bit(BIO_UPTODATE, &bio->bi_flags);
1767 if (err)
1768 uptodate = 0;
1764 continue; 1769 continue;
1765 } 1770 }
1766 } 1771 }
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 234ed441736c..a3ad2ce00116 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -74,8 +74,7 @@ out:
74struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans, 74struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
75 struct btrfs_root *root, 75 struct btrfs_root *root,
76 struct btrfs_path *path, 76 struct btrfs_path *path,
77 u64 objectid, u64 offset, 77 u64 bytenr, int cow)
78 int cow)
79{ 78{
80 int ret; 79 int ret;
81 struct btrfs_key file_key; 80 struct btrfs_key file_key;
@@ -87,9 +86,9 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
87 btrfs_super_csum_size(&root->fs_info->super_copy); 86 btrfs_super_csum_size(&root->fs_info->super_copy);
88 int csums_in_item; 87 int csums_in_item;
89 88
90 file_key.objectid = objectid; 89 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
91 file_key.offset = offset; 90 file_key.offset = bytenr;
92 btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); 91 btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
93 ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow); 92 ret = btrfs_search_slot(trans, root, &file_key, path, 0, cow);
94 if (ret < 0) 93 if (ret < 0)
95 goto fail; 94 goto fail;
@@ -100,11 +99,10 @@ struct btrfs_csum_item *btrfs_lookup_csum(struct btrfs_trans_handle *trans,
100 goto fail; 99 goto fail;
101 path->slots[0]--; 100 path->slots[0]--;
102 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 101 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
103 if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || 102 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY)
104 found_key.objectid != objectid) {
105 goto fail; 103 goto fail;
106 } 104
107 csum_offset = (offset - found_key.offset) >> 105 csum_offset = (bytenr - found_key.offset) >>
108 root->fs_info->sb->s_blocksize_bits; 106 root->fs_info->sb->s_blocksize_bits;
109 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]); 107 csums_in_item = btrfs_item_size_nr(leaf, path->slots[0]);
110 csums_in_item /= csum_size; 108 csums_in_item /= csum_size;
@@ -143,7 +141,7 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
143} 141}
144 142
145int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, 143int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
146 struct bio *bio) 144 struct bio *bio, u32 *dst)
147{ 145{
148 u32 sum; 146 u32 sum;
149 struct bio_vec *bvec = bio->bi_io_vec; 147 struct bio_vec *bvec = bio->bi_io_vec;
@@ -151,6 +149,7 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
151 u64 offset; 149 u64 offset;
152 u64 item_start_offset = 0; 150 u64 item_start_offset = 0;
153 u64 item_last_offset = 0; 151 u64 item_last_offset = 0;
152 u64 disk_bytenr;
154 u32 diff; 153 u32 diff;
155 u16 csum_size = 154 u16 csum_size =
156 btrfs_super_csum_size(&root->fs_info->super_copy); 155 btrfs_super_csum_size(&root->fs_info->super_copy);
@@ -165,21 +164,22 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
165 164
166 WARN_ON(bio->bi_vcnt <= 0); 165 WARN_ON(bio->bi_vcnt <= 0);
167 166
167 disk_bytenr = (u64)bio->bi_sector << 9;
168 while(bio_index < bio->bi_vcnt) { 168 while(bio_index < bio->bi_vcnt) {
169 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 169 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
170 ret = btrfs_find_ordered_sum(inode, offset, &sum); 170 ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum);
171 if (ret == 0) 171 if (ret == 0)
172 goto found; 172 goto found;
173 173
174 if (!item || offset < item_start_offset || 174 if (!item || disk_bytenr < item_start_offset ||
175 offset >= item_last_offset) { 175 disk_bytenr >= item_last_offset) {
176 struct btrfs_key found_key; 176 struct btrfs_key found_key;
177 u32 item_size; 177 u32 item_size;
178 178
179 if (item) 179 if (item)
180 btrfs_release_path(root, path); 180 btrfs_release_path(root, path);
181 item = btrfs_lookup_csum(NULL, root, path, 181 item = btrfs_lookup_csum(NULL, root->fs_info->csum_root,
182 inode->i_ino, offset, 0); 182 path, disk_bytenr, 0);
183 if (IS_ERR(item)) { 183 if (IS_ERR(item)) {
184 ret = PTR_ERR(item); 184 ret = PTR_ERR(item);
185 if (ret == -ENOENT || ret == -EFBIG) 185 if (ret == -ENOENT || ret == -EFBIG)
@@ -208,7 +208,7 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
208 * this byte range must be able to fit inside 208 * this byte range must be able to fit inside
209 * a single leaf so it will also fit inside a u32 209 * a single leaf so it will also fit inside a u32
210 */ 210 */
211 diff = offset - item_start_offset; 211 diff = disk_bytenr - item_start_offset;
212 diff = diff / root->sectorsize; 212 diff = diff / root->sectorsize;
213 diff = diff * csum_size; 213 diff = diff * csum_size;
214 214
@@ -216,7 +216,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
216 ((unsigned long)item) + diff, 216 ((unsigned long)item) + diff,
217 csum_size); 217 csum_size);
218found: 218found:
219 set_state_private(io_tree, offset, sum); 219 if (dst)
220 *dst++ = sum;
221 else
222 set_state_private(io_tree, offset, sum);
223 disk_bytenr += bvec->bv_len;
220 bio_index++; 224 bio_index++;
221 bvec++; 225 bvec++;
222 } 226 }
@@ -224,75 +228,8 @@ found:
224 return 0; 228 return 0;
225} 229}
226 230
227int btrfs_csum_file_bytes(struct btrfs_root *root, struct inode *inode,
228 u64 start, unsigned long len)
229{
230 struct btrfs_ordered_sum *sums;
231 struct btrfs_sector_sum *sector_sum;
232 struct btrfs_ordered_extent *ordered;
233 char *data;
234 struct page *page;
235 unsigned long total_bytes = 0;
236 unsigned long this_sum_bytes = 0;
237
238 sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
239 if (!sums)
240 return -ENOMEM;
241
242 sector_sum = sums->sums;
243 sums->file_offset = start;
244 sums->len = len;
245 INIT_LIST_HEAD(&sums->list);
246 ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset);
247 BUG_ON(!ordered);
248
249 while(len > 0) {
250 if (start >= ordered->file_offset + ordered->len ||
251 start < ordered->file_offset) {
252 sums->len = this_sum_bytes;
253 this_sum_bytes = 0;
254 btrfs_add_ordered_sum(inode, ordered, sums);
255 btrfs_put_ordered_extent(ordered);
256
257 sums = kzalloc(btrfs_ordered_sum_size(root, len),
258 GFP_NOFS);
259 BUG_ON(!sums);
260 sector_sum = sums->sums;
261 sums->len = len;
262 sums->file_offset = start;
263 ordered = btrfs_lookup_ordered_extent(inode,
264 sums->file_offset);
265 BUG_ON(!ordered);
266 }
267
268 page = find_get_page(inode->i_mapping,
269 start >> PAGE_CACHE_SHIFT);
270
271 data = kmap_atomic(page, KM_USER0);
272 sector_sum->sum = ~(u32)0;
273 sector_sum->sum = btrfs_csum_data(root, data, sector_sum->sum,
274 PAGE_CACHE_SIZE);
275 kunmap_atomic(data, KM_USER0);
276 btrfs_csum_final(sector_sum->sum,
277 (char *)&sector_sum->sum);
278 sector_sum->offset = page_offset(page);
279 page_cache_release(page);
280
281 sector_sum++;
282 total_bytes += PAGE_CACHE_SIZE;
283 this_sum_bytes += PAGE_CACHE_SIZE;
284 start += PAGE_CACHE_SIZE;
285
286 WARN_ON(len < PAGE_CACHE_SIZE);
287 len -= PAGE_CACHE_SIZE;
288 }
289 btrfs_add_ordered_sum(inode, ordered, sums);
290 btrfs_put_ordered_extent(ordered);
291 return 0;
292}
293
294int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode, 231int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
295 struct bio *bio) 232 struct bio *bio, u64 file_start, int contig)
296{ 233{
297 struct btrfs_ordered_sum *sums; 234 struct btrfs_ordered_sum *sums;
298 struct btrfs_sector_sum *sector_sum; 235 struct btrfs_sector_sum *sector_sum;
@@ -303,6 +240,7 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
303 unsigned long total_bytes = 0; 240 unsigned long total_bytes = 0;
304 unsigned long this_sum_bytes = 0; 241 unsigned long this_sum_bytes = 0;
305 u64 offset; 242 u64 offset;
243 u64 disk_bytenr;
306 244
307 WARN_ON(bio->bi_vcnt <= 0); 245 WARN_ON(bio->bi_vcnt <= 0);
308 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS); 246 sums = kzalloc(btrfs_ordered_sum_size(root, bio->bi_size), GFP_NOFS);
@@ -310,16 +248,25 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
310 return -ENOMEM; 248 return -ENOMEM;
311 249
312 sector_sum = sums->sums; 250 sector_sum = sums->sums;
313 sums->file_offset = page_offset(bvec->bv_page) + bvec->bv_offset; 251 disk_bytenr = (u64)bio->bi_sector << 9;
314 sums->len = bio->bi_size; 252 sums->len = bio->bi_size;
315 INIT_LIST_HEAD(&sums->list); 253 INIT_LIST_HEAD(&sums->list);
316 ordered = btrfs_lookup_ordered_extent(inode, sums->file_offset); 254
255 if (contig)
256 offset = file_start;
257 else
258 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
259
260 ordered = btrfs_lookup_ordered_extent(inode, offset);
317 BUG_ON(!ordered); 261 BUG_ON(!ordered);
262 sums->bytenr = ordered->start;
318 263
319 while(bio_index < bio->bi_vcnt) { 264 while(bio_index < bio->bi_vcnt) {
320 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 265 if (!contig)
321 if (offset >= ordered->file_offset + ordered->len || 266 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
322 offset < ordered->file_offset) { 267
268 if (!contig && (offset >= ordered->file_offset + ordered->len ||
269 offset < ordered->file_offset)) {
323 unsigned long bytes_left; 270 unsigned long bytes_left;
324 sums->len = this_sum_bytes; 271 sums->len = this_sum_bytes;
325 this_sum_bytes = 0; 272 this_sum_bytes = 0;
@@ -333,10 +280,9 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
333 BUG_ON(!sums); 280 BUG_ON(!sums);
334 sector_sum = sums->sums; 281 sector_sum = sums->sums;
335 sums->len = bytes_left; 282 sums->len = bytes_left;
336 sums->file_offset = offset; 283 ordered = btrfs_lookup_ordered_extent(inode, offset);
337 ordered = btrfs_lookup_ordered_extent(inode,
338 sums->file_offset);
339 BUG_ON(!ordered); 284 BUG_ON(!ordered);
285 sums->bytenr = ordered->start;
340 } 286 }
341 287
342 data = kmap_atomic(bvec->bv_page, KM_USER0); 288 data = kmap_atomic(bvec->bv_page, KM_USER0);
@@ -348,13 +294,14 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
348 kunmap_atomic(data, KM_USER0); 294 kunmap_atomic(data, KM_USER0);
349 btrfs_csum_final(sector_sum->sum, 295 btrfs_csum_final(sector_sum->sum,
350 (char *)&sector_sum->sum); 296 (char *)&sector_sum->sum);
351 sector_sum->offset = page_offset(bvec->bv_page) + 297 sector_sum->bytenr = disk_bytenr;
352 bvec->bv_offset;
353 298
354 sector_sum++; 299 sector_sum++;
355 bio_index++; 300 bio_index++;
356 total_bytes += bvec->bv_len; 301 total_bytes += bvec->bv_len;
357 this_sum_bytes += bvec->bv_len; 302 this_sum_bytes += bvec->bv_len;
303 disk_bytenr += bvec->bv_len;
304 offset += bvec->bv_len;
358 bvec++; 305 bvec++;
359 } 306 }
360 this_sum_bytes = 0; 307 this_sum_bytes = 0;
@@ -364,11 +311,10 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
364} 311}
365 312
366int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 313int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
367 struct btrfs_root *root, struct inode *inode, 314 struct btrfs_root *root,
368 struct btrfs_ordered_sum *sums) 315 struct btrfs_ordered_sum *sums)
369{ 316{
370 u64 objectid = inode->i_ino; 317 u64 bytenr;
371 u64 offset;
372 int ret; 318 int ret;
373 struct btrfs_key file_key; 319 struct btrfs_key file_key;
374 struct btrfs_key found_key; 320 struct btrfs_key found_key;
@@ -396,13 +342,12 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
396again: 342again:
397 next_offset = (u64)-1; 343 next_offset = (u64)-1;
398 found_next = 0; 344 found_next = 0;
399 offset = sector_sum->offset; 345 file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
400 file_key.objectid = objectid; 346 file_key.offset = sector_sum->bytenr;
401 file_key.offset = offset; 347 bytenr = sector_sum->bytenr;
402 btrfs_set_key_type(&file_key, BTRFS_CSUM_ITEM_KEY); 348 btrfs_set_key_type(&file_key, BTRFS_EXTENT_CSUM_KEY);
403 349
404 mutex_lock(&BTRFS_I(inode)->csum_mutex); 350 item = btrfs_lookup_csum(trans, root, path, sector_sum->bytenr, 1);
405 item = btrfs_lookup_csum(trans, root, path, objectid, offset, 1);
406 if (!IS_ERR(item)) { 351 if (!IS_ERR(item)) {
407 leaf = path->nodes[0]; 352 leaf = path->nodes[0];
408 ret = 0; 353 ret = 0;
@@ -432,8 +377,8 @@ again:
432 slot = 0; 377 slot = 0;
433 } 378 }
434 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot); 379 btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
435 if (found_key.objectid != objectid || 380 if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
436 found_key.type != BTRFS_CSUM_ITEM_KEY) { 381 found_key.type != BTRFS_EXTENT_CSUM_KEY) {
437 found_next = 1; 382 found_next = 1;
438 goto insert; 383 goto insert;
439 } 384 }
@@ -460,10 +405,10 @@ again:
460 path->slots[0]--; 405 path->slots[0]--;
461 leaf = path->nodes[0]; 406 leaf = path->nodes[0];
462 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 407 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
463 csum_offset = (offset - found_key.offset) >> 408 csum_offset = (bytenr - found_key.offset) >>
464 root->fs_info->sb->s_blocksize_bits; 409 root->fs_info->sb->s_blocksize_bits;
465 if (btrfs_key_type(&found_key) != BTRFS_CSUM_ITEM_KEY || 410 if (btrfs_key_type(&found_key) != BTRFS_EXTENT_CSUM_KEY ||
466 found_key.objectid != objectid || 411 found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
467 csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) { 412 csum_offset >= MAX_CSUM_ITEMS(root, csum_size)) {
468 goto insert; 413 goto insert;
469 } 414 }
@@ -482,8 +427,18 @@ insert:
482 btrfs_release_path(root, path); 427 btrfs_release_path(root, path);
483 csum_offset = 0; 428 csum_offset = 0;
484 if (found_next) { 429 if (found_next) {
485 u64 tmp = min((u64)i_size_read(inode), next_offset); 430 u64 tmp = total_bytes + root->sectorsize;
486 tmp -= offset & ~((u64)root->sectorsize -1); 431 u64 next_sector = sector_sum->bytenr;
432 struct btrfs_sector_sum *next = sector_sum + 1;
433
434 while(tmp < sums->len) {
435 if (next_sector + root->sectorsize != next->bytenr)
436 break;
437 tmp += root->sectorsize;
438 next_sector = next->bytenr;
439 next++;
440 }
441 tmp = min(tmp, next_offset - file_key.offset);
487 tmp >>= root->fs_info->sb->s_blocksize_bits; 442 tmp >>= root->fs_info->sb->s_blocksize_bits;
488 tmp = max((u64)1, tmp); 443 tmp = max((u64)1, tmp);
489 tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size)); 444 tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
@@ -510,7 +465,6 @@ found:
510 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + 465 item_end = (struct btrfs_csum_item *)((unsigned char *)item_end +
511 btrfs_item_size_nr(leaf, path->slots[0])); 466 btrfs_item_size_nr(leaf, path->slots[0]));
512 eb_token = NULL; 467 eb_token = NULL;
513 mutex_unlock(&BTRFS_I(inode)->csum_mutex);
514 cond_resched(); 468 cond_resched();
515next_sector: 469next_sector:
516 470
@@ -541,9 +495,9 @@ next_sector:
541 if (total_bytes < sums->len) { 495 if (total_bytes < sums->len) {
542 item = (struct btrfs_csum_item *)((char *)item + 496 item = (struct btrfs_csum_item *)((char *)item +
543 csum_size); 497 csum_size);
544 if (item < item_end && offset + PAGE_CACHE_SIZE == 498 if (item < item_end && bytenr + PAGE_CACHE_SIZE ==
545 sector_sum->offset) { 499 sector_sum->bytenr) {
546 offset = sector_sum->offset; 500 bytenr = sector_sum->bytenr;
547 goto next_sector; 501 goto next_sector;
548 } 502 }
549 } 503 }
@@ -562,7 +516,6 @@ out:
562 return ret; 516 return ret;
563 517
564fail_unlock: 518fail_unlock:
565 mutex_unlock(&BTRFS_I(inode)->csum_mutex);
566 goto out; 519 goto out;
567} 520}
568 521
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 09efc9473a3d..c03d847b8c4e 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1221,7 +1221,7 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, struct bio *bio
1221 struct btrfs_root *root = BTRFS_I(inode)->root; 1221 struct btrfs_root *root = BTRFS_I(inode)->root;
1222 int ret = 0; 1222 int ret = 0;
1223 1223
1224 ret = btrfs_csum_one_bio(root, inode, bio); 1224 ret = btrfs_csum_one_bio(root, inode, bio, 0, 0);
1225 BUG_ON(ret); 1225 BUG_ON(ret);
1226 return 0; 1226 return 0;
1227} 1227}
@@ -1259,12 +1259,11 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1259 btrfs_test_flag(inode, NODATASUM); 1259 btrfs_test_flag(inode, NODATASUM);
1260 1260
1261 if (!(rw & (1 << BIO_RW))) { 1261 if (!(rw & (1 << BIO_RW))) {
1262 1262 if (bio_flags & EXTENT_BIO_COMPRESSED) {
1263 if (bio_flags & EXTENT_BIO_COMPRESSED)
1264 return btrfs_submit_compressed_read(inode, bio, 1263 return btrfs_submit_compressed_read(inode, bio,
1265 mirror_num, bio_flags); 1264 mirror_num, bio_flags);
1266 else if (!skip_sum) 1265 } else if (!skip_sum)
1267 btrfs_lookup_bio_sums(root, inode, bio); 1266 btrfs_lookup_bio_sums(root, inode, bio, NULL);
1268 goto mapit; 1267 goto mapit;
1269 } else if (!skip_sum) { 1268 } else if (!skip_sum) {
1270 /* we're doing a write, do the async checksumming */ 1269 /* we're doing a write, do the async checksumming */
@@ -1292,8 +1291,8 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1292 btrfs_set_trans_block_group(trans, inode); 1291 btrfs_set_trans_block_group(trans, inode);
1293 list_for_each(cur, list) { 1292 list_for_each(cur, list) {
1294 sum = list_entry(cur, struct btrfs_ordered_sum, list); 1293 sum = list_entry(cur, struct btrfs_ordered_sum, list);
1295 btrfs_csum_file_blocks(trans, BTRFS_I(inode)->root, 1294 btrfs_csum_file_blocks(trans,
1296 inode, sum); 1295 BTRFS_I(inode)->root->fs_info->csum_root, sum);
1297 } 1296 }
1298 return 0; 1297 return 0;
1299} 1298}
@@ -1545,6 +1544,7 @@ struct io_failure_record {
1545 u64 start; 1544 u64 start;
1546 u64 len; 1545 u64 len;
1547 u64 logical; 1546 u64 logical;
1547 unsigned long bio_flags;
1548 int last_mirror; 1548 int last_mirror;
1549}; 1549};
1550 1550
@@ -1563,7 +1563,6 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1563 int ret; 1563 int ret;
1564 int rw; 1564 int rw;
1565 u64 logical; 1565 u64 logical;
1566 unsigned long bio_flags = 0;
1567 1566
1568 ret = get_state_private(failure_tree, start, &private); 1567 ret = get_state_private(failure_tree, start, &private);
1569 if (ret) { 1568 if (ret) {
@@ -1573,6 +1572,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1573 failrec->start = start; 1572 failrec->start = start;
1574 failrec->len = end - start + 1; 1573 failrec->len = end - start + 1;
1575 failrec->last_mirror = 0; 1574 failrec->last_mirror = 0;
1575 failrec->bio_flags = 0;
1576 1576
1577 spin_lock(&em_tree->lock); 1577 spin_lock(&em_tree->lock);
1578 em = lookup_extent_mapping(em_tree, start, failrec->len); 1578 em = lookup_extent_mapping(em_tree, start, failrec->len);
@@ -1588,8 +1588,10 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1588 } 1588 }
1589 logical = start - em->start; 1589 logical = start - em->start;
1590 logical = em->block_start + logical; 1590 logical = em->block_start + logical;
1591 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) 1591 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
1592 bio_flags = EXTENT_BIO_COMPRESSED; 1592 logical = em->block_start;
1593 failrec->bio_flags = EXTENT_BIO_COMPRESSED;
1594 }
1593 failrec->logical = logical; 1595 failrec->logical = logical;
1594 free_extent_map(em); 1596 free_extent_map(em);
1595 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED | 1597 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
@@ -1626,6 +1628,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1626 bio->bi_sector = failrec->logical >> 9; 1628 bio->bi_sector = failrec->logical >> 9;
1627 bio->bi_bdev = failed_bio->bi_bdev; 1629 bio->bi_bdev = failed_bio->bi_bdev;
1628 bio->bi_size = 0; 1630 bio->bi_size = 0;
1631
1629 bio_add_page(bio, page, failrec->len, start - page_offset(page)); 1632 bio_add_page(bio, page, failrec->len, start - page_offset(page));
1630 if (failed_bio->bi_rw & (1 << BIO_RW)) 1633 if (failed_bio->bi_rw & (1 << BIO_RW))
1631 rw = WRITE; 1634 rw = WRITE;
@@ -1634,7 +1637,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,
1634 1637
1635 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, 1638 BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio,
1636 failrec->last_mirror, 1639 failrec->last_mirror,
1637 bio_flags); 1640 failrec->bio_flags);
1638 return 0; 1641 return 0;
1639} 1642}
1640 1643
@@ -1688,9 +1691,14 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1688 u32 csum = ~(u32)0; 1691 u32 csum = ~(u32)0;
1689 unsigned long flags; 1692 unsigned long flags;
1690 1693
1694 if (PageChecked(page)) {
1695 ClearPageChecked(page);
1696 goto good;
1697 }
1691 if (btrfs_test_opt(root, NODATASUM) || 1698 if (btrfs_test_opt(root, NODATASUM) ||
1692 btrfs_test_flag(inode, NODATASUM)) 1699 btrfs_test_flag(inode, NODATASUM))
1693 return 0; 1700 return 0;
1701
1694 if (state && state->start == start) { 1702 if (state && state->start == start) {
1695 private = state->private; 1703 private = state->private;
1696 ret = 0; 1704 ret = 0;
@@ -1709,7 +1717,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
1709 } 1717 }
1710 kunmap_atomic(kaddr, KM_IRQ0); 1718 kunmap_atomic(kaddr, KM_IRQ0);
1711 local_irq_restore(flags); 1719 local_irq_restore(flags);
1712 1720good:
1713 /* if the io failure tree for this inode is non-empty, 1721 /* if the io failure tree for this inode is non-empty,
1714 * check to see if we've recovered from a failed IO 1722 * check to see if we've recovered from a failed IO
1715 */ 1723 */
@@ -2243,6 +2251,7 @@ fail:
2243 return err; 2251 return err;
2244} 2252}
2245 2253
2254#if 0
2246/* 2255/*
2247 * when truncating bytes in a file, it is possible to avoid reading 2256 * when truncating bytes in a file, it is possible to avoid reading
2248 * the leaves that contain only checksum items. This can be the 2257 * the leaves that contain only checksum items. This can be the
@@ -2410,6 +2419,8 @@ out:
2410 return ret; 2419 return ret;
2411} 2420}
2412 2421
2422#endif
2423
2413/* 2424/*
2414 * this can truncate away extent items, csum items and directory items. 2425 * this can truncate away extent items, csum items and directory items.
2415 * It starts at a high offset and removes keys until it can't find 2426 * It starts at a high offset and removes keys until it can't find
@@ -2459,9 +2470,6 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2459 2470
2460 btrfs_init_path(path); 2471 btrfs_init_path(path);
2461 2472
2462 ret = drop_csum_leaves(trans, root, path, inode, new_size);
2463 BUG_ON(ret);
2464
2465search_again: 2473search_again:
2466 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 2474 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
2467 if (ret < 0) { 2475 if (ret < 0) {
@@ -2509,16 +2517,11 @@ search_again:
2509 } 2517 }
2510 item_end--; 2518 item_end--;
2511 } 2519 }
2512 if (found_type == BTRFS_CSUM_ITEM_KEY) {
2513 ret = btrfs_csum_truncate(trans, root, path,
2514 new_size);
2515 BUG_ON(ret);
2516 }
2517 if (item_end < new_size) { 2520 if (item_end < new_size) {
2518 if (found_type == BTRFS_DIR_ITEM_KEY) { 2521 if (found_type == BTRFS_DIR_ITEM_KEY) {
2519 found_type = BTRFS_INODE_ITEM_KEY; 2522 found_type = BTRFS_INODE_ITEM_KEY;
2520 } else if (found_type == BTRFS_EXTENT_ITEM_KEY) { 2523 } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
2521 found_type = BTRFS_CSUM_ITEM_KEY; 2524 found_type = BTRFS_EXTENT_DATA_KEY;
2522 } else if (found_type == BTRFS_EXTENT_DATA_KEY) { 2525 } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
2523 found_type = BTRFS_XATTR_ITEM_KEY; 2526 found_type = BTRFS_XATTR_ITEM_KEY;
2524 } else if (found_type == BTRFS_XATTR_ITEM_KEY) { 2527 } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index b4da53d55c82..6228b69c2b93 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -714,8 +714,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
714 u64 len = olen; 714 u64 len = olen;
715 u64 bs = root->fs_info->sb->s_blocksize; 715 u64 bs = root->fs_info->sb->s_blocksize;
716 u64 hint_byte; 716 u64 hint_byte;
717 u16 csum_size = 717
718 btrfs_super_csum_size(&root->fs_info->super_copy);
719 /* 718 /*
720 * TODO: 719 * TODO:
721 * - split compressed inline extents. annoying: we need to 720 * - split compressed inline extents. annoying: we need to
@@ -833,7 +832,7 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
833 slot = path->slots[0]; 832 slot = path->slots[0];
834 833
835 btrfs_item_key_to_cpu(leaf, &key, slot); 834 btrfs_item_key_to_cpu(leaf, &key, slot);
836 if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY || 835 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
837 key.objectid != src->i_ino) 836 key.objectid != src->i_ino)
838 break; 837 break;
839 838
@@ -958,56 +957,6 @@ static long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
958 btrfs_mark_buffer_dirty(leaf); 957 btrfs_mark_buffer_dirty(leaf);
959 } 958 }
960 959
961 if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
962 u32 size;
963 struct btrfs_key new_key;
964 u64 coverslen;
965 int coff, clen;
966
967 size = btrfs_item_size_nr(leaf, slot);
968 coverslen = (size / csum_size) <<
969 root->fs_info->sb->s_blocksize_bits;
970 printk("csums for %llu~%llu\n",
971 key.offset, coverslen);
972 if (key.offset + coverslen < off ||
973 key.offset >= off+len)
974 goto next;
975
976 read_extent_buffer(leaf, buf,
977 btrfs_item_ptr_offset(leaf, slot),
978 size);
979 btrfs_release_path(root, path);
980
981 coff = 0;
982 if (off > key.offset)
983 coff = ((off - key.offset) >>
984 root->fs_info->sb->s_blocksize_bits) *
985 csum_size;
986 clen = size - coff;
987 if (key.offset + coverslen > off+len)
988 clen -= ((key.offset+coverslen-off-len) >>
989 root->fs_info->sb->s_blocksize_bits) *
990 csum_size;
991 printk(" will dup %d~%d of %d\n",
992 coff, clen, size);
993
994 memcpy(&new_key, &key, sizeof(new_key));
995 new_key.objectid = inode->i_ino;
996 new_key.offset = key.offset + destoff - off;
997
998 ret = btrfs_insert_empty_item(trans, root, path,
999 &new_key, clen);
1000 if (ret)
1001 goto out;
1002
1003 leaf = path->nodes[0];
1004 slot = path->slots[0];
1005 write_extent_buffer(leaf, buf + coff,
1006 btrfs_item_ptr_offset(leaf, slot),
1007 clen);
1008 btrfs_mark_buffer_dirty(leaf);
1009 }
1010
1011 next: 960 next:
1012 btrfs_release_path(root, path); 961 btrfs_release_path(root, path);
1013 key.offset++; 962 key.offset++;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 027ad6b3839e..d9e232227da4 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -610,7 +610,8 @@ out:
610 * try to find a checksum. This is used because we allow pages to 610 * try to find a checksum. This is used because we allow pages to
611 * be reclaimed before their checksum is actually put into the btree 611 * be reclaimed before their checksum is actually put into the btree
612 */ 612 */
613int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum) 613int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
614 u32 *sum)
614{ 615{
615 struct btrfs_ordered_sum *ordered_sum; 616 struct btrfs_ordered_sum *ordered_sum;
616 struct btrfs_sector_sum *sector_sums; 617 struct btrfs_sector_sum *sector_sums;
@@ -629,11 +630,11 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum)
629 mutex_lock(&tree->mutex); 630 mutex_lock(&tree->mutex);
630 list_for_each_prev(cur, &ordered->list) { 631 list_for_each_prev(cur, &ordered->list) {
631 ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list); 632 ordered_sum = list_entry(cur, struct btrfs_ordered_sum, list);
632 if (offset >= ordered_sum->file_offset) { 633 if (disk_bytenr >= ordered_sum->bytenr) {
633 num_sectors = ordered_sum->len / sectorsize; 634 num_sectors = ordered_sum->len / sectorsize;
634 sector_sums = ordered_sum->sums; 635 sector_sums = ordered_sum->sums;
635 for (i = 0; i < num_sectors; i++) { 636 for (i = 0; i < num_sectors; i++) {
636 if (sector_sums[i].offset == offset) { 637 if (sector_sums[i].bytenr == disk_bytenr) {
637 *sum = sector_sums[i].sum; 638 *sum = sector_sums[i].sum;
638 ret = 0; 639 ret = 0;
639 goto out; 640 goto out;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 260bf95dfe0c..ab66d5e8d6d6 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -33,15 +33,17 @@ struct btrfs_ordered_inode_tree {
33 * the ordered extent are on disk 33 * the ordered extent are on disk
34 */ 34 */
35struct btrfs_sector_sum { 35struct btrfs_sector_sum {
36 u64 offset; 36 /* bytenr on disk */
37 u64 bytenr;
37 u32 sum; 38 u32 sum;
38}; 39};
39 40
40struct btrfs_ordered_sum { 41struct btrfs_ordered_sum {
41 u64 file_offset; 42 /* bytenr is the start of this extent on disk */
43 u64 bytenr;
44
42 /* 45 /*
43 * this is the length in bytes covered by the sums array below. 46 * this is the length in bytes covered by the sums array below.
44 * But, the sums array may not be contiguous in the file.
45 */ 47 */
46 unsigned long len; 48 unsigned long len;
47 struct list_head list; 49 struct list_head list;
@@ -147,7 +149,7 @@ struct btrfs_ordered_extent *
147btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); 149btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
148int btrfs_ordered_update_i_size(struct inode *inode, 150int btrfs_ordered_update_i_size(struct inode *inode,
149 struct btrfs_ordered_extent *ordered); 151 struct btrfs_ordered_extent *ordered);
150int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u32 *sum); 152int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
151int btrfs_wait_on_page_writeback_range(struct address_space *mapping, 153int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
152 pgoff_t start, pgoff_t end); 154 pgoff_t start, pgoff_t end);
153int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, 155int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c766649ad453..08469ec05850 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -934,24 +934,17 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
934 unsigned long file_bytes; 934 unsigned long file_bytes;
935 struct btrfs_ordered_sum *sums; 935 struct btrfs_ordered_sum *sums;
936 struct btrfs_sector_sum *sector_sum; 936 struct btrfs_sector_sum *sector_sum;
937 struct inode *inode;
938 unsigned long ptr; 937 unsigned long ptr;
939 938
940 file_bytes = (item_size / csum_size) * root->sectorsize; 939 file_bytes = (item_size / csum_size) * root->sectorsize;
941 inode = read_one_inode(root, key->objectid);
942 if (!inode) {
943 return -EIO;
944 }
945
946 sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS); 940 sums = kzalloc(btrfs_ordered_sum_size(root, file_bytes), GFP_NOFS);
947 if (!sums) { 941 if (!sums) {
948 iput(inode);
949 return -ENOMEM; 942 return -ENOMEM;
950 } 943 }
951 944
952 INIT_LIST_HEAD(&sums->list); 945 INIT_LIST_HEAD(&sums->list);
953 sums->len = file_bytes; 946 sums->len = file_bytes;
954 sums->file_offset = key->offset; 947 sums->bytenr = key->offset;
955 948
956 /* 949 /*
957 * copy all the sums into the ordered sum struct 950 * copy all the sums into the ordered sum struct
@@ -960,7 +953,7 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
960 cur_offset = key->offset; 953 cur_offset = key->offset;
961 ptr = btrfs_item_ptr_offset(eb, slot); 954 ptr = btrfs_item_ptr_offset(eb, slot);
962 while(item_size > 0) { 955 while(item_size > 0) {
963 sector_sum->offset = cur_offset; 956 sector_sum->bytenr = cur_offset;
964 read_extent_buffer(eb, &sector_sum->sum, ptr, csum_size); 957 read_extent_buffer(eb, &sector_sum->sum, ptr, csum_size);
965 sector_sum++; 958 sector_sum++;
966 item_size -= csum_size; 959 item_size -= csum_size;
@@ -969,11 +962,9 @@ static noinline int replay_one_csum(struct btrfs_trans_handle *trans,
969 } 962 }
970 963
971 /* let btrfs_csum_file_blocks add them into the file */ 964 /* let btrfs_csum_file_blocks add them into the file */
972 ret = btrfs_csum_file_blocks(trans, root, inode, sums); 965 ret = btrfs_csum_file_blocks(trans, root->fs_info->csum_root, sums);
973 BUG_ON(ret); 966 BUG_ON(ret);
974 kfree(sums); 967 kfree(sums);
975 iput(inode);
976
977 return 0; 968 return 0;
978} 969}
979/* 970/*
@@ -1670,7 +1661,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1670 ret = replay_one_extent(wc->trans, root, path, 1661 ret = replay_one_extent(wc->trans, root, path,
1671 eb, i, &key); 1662 eb, i, &key);
1672 BUG_ON(ret); 1663 BUG_ON(ret);
1673 } else if (key.type == BTRFS_CSUM_ITEM_KEY) { 1664 } else if (key.type == BTRFS_EXTENT_CSUM_KEY) {
1674 ret = replay_one_csum(wc->trans, root, path, 1665 ret = replay_one_csum(wc->trans, root, path,
1675 eb, i, &key); 1666 eb, i, &key);
1676 BUG_ON(ret); 1667 BUG_ON(ret);
@@ -2466,6 +2457,85 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
2466 return 0; 2457 return 0;
2467} 2458}
2468 2459
2460static noinline int copy_extent_csums(struct btrfs_trans_handle *trans,
2461 struct list_head *list,
2462 struct btrfs_root *root,
2463 u64 disk_bytenr, u64 len)
2464{
2465 struct btrfs_ordered_sum *sums;
2466 struct btrfs_sector_sum *sector_sum;
2467 int ret;
2468 struct btrfs_path *path;
2469 struct btrfs_csum_item *item = NULL;
2470 u64 end = disk_bytenr + len;
2471 u64 item_start_offset = 0;
2472 u64 item_last_offset = 0;
2473 u32 diff;
2474 u32 sum;
2475 u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy);
2476
2477 sums = kzalloc(btrfs_ordered_sum_size(root, len), GFP_NOFS);
2478
2479 sector_sum = sums->sums;
2480 sums->bytenr = disk_bytenr;
2481 sums->len = len;
2482 list_add_tail(&sums->list, list);
2483
2484 path = btrfs_alloc_path();
2485 while(disk_bytenr < end) {
2486 if (!item || disk_bytenr < item_start_offset ||
2487 disk_bytenr >= item_last_offset) {
2488 struct btrfs_key found_key;
2489 u32 item_size;
2490
2491 if (item)
2492 btrfs_release_path(root, path);
2493 item = btrfs_lookup_csum(NULL, root, path,
2494 disk_bytenr, 0);
2495 if (IS_ERR(item)) {
2496 ret = PTR_ERR(item);
2497 if (ret == -ENOENT || ret == -EFBIG)
2498 ret = 0;
2499 sum = 0;
2500 printk("log no csum found for byte %llu\n",
2501 (unsigned long long)disk_bytenr);
2502 item = NULL;
2503 btrfs_release_path(root, path);
2504 goto found;
2505 }
2506 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
2507 path->slots[0]);
2508
2509 item_start_offset = found_key.offset;
2510 item_size = btrfs_item_size_nr(path->nodes[0],
2511 path->slots[0]);
2512 item_last_offset = item_start_offset +
2513 (item_size / csum_size) *
2514 root->sectorsize;
2515 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
2516 struct btrfs_csum_item);
2517 }
2518 /*
2519 * this byte range must be able to fit inside
2520 * a single leaf so it will also fit inside a u32
2521 */
2522 diff = disk_bytenr - item_start_offset;
2523 diff = diff / root->sectorsize;
2524 diff = diff * csum_size;
2525
2526 read_extent_buffer(path->nodes[0], &sum,
2527 ((unsigned long)item) + diff,
2528 csum_size);
2529found:
2530 sector_sum->bytenr = disk_bytenr;
2531 sector_sum->sum = sum;
2532 disk_bytenr += root->sectorsize;
2533 sector_sum++;
2534 }
2535 btrfs_free_path(path);
2536 return 0;
2537}
2538
2469static noinline int copy_items(struct btrfs_trans_handle *trans, 2539static noinline int copy_items(struct btrfs_trans_handle *trans,
2470 struct btrfs_root *log, 2540 struct btrfs_root *log,
2471 struct btrfs_path *dst_path, 2541 struct btrfs_path *dst_path,
@@ -2481,6 +2551,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2481 u32 *ins_sizes; 2551 u32 *ins_sizes;
2482 char *ins_data; 2552 char *ins_data;
2483 int i; 2553 int i;
2554 struct list_head ordered_sums;
2555
2556 INIT_LIST_HEAD(&ordered_sums);
2484 2557
2485 ins_data = kmalloc(nr * sizeof(struct btrfs_key) + 2558 ins_data = kmalloc(nr * sizeof(struct btrfs_key) +
2486 nr * sizeof(u32), GFP_NOFS); 2559 nr * sizeof(u32), GFP_NOFS);
@@ -2535,6 +2608,9 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2535 extent); 2608 extent);
2536 u64 dl = btrfs_file_extent_disk_num_bytes(src, 2609 u64 dl = btrfs_file_extent_disk_num_bytes(src,
2537 extent); 2610 extent);
2611 u64 cs = btrfs_file_extent_offset(src, extent);
2612 u64 cl = btrfs_file_extent_num_bytes(src,
2613 extent);;
2538 /* ds == 0 is a hole */ 2614 /* ds == 0 is a hole */
2539 if (ds != 0) { 2615 if (ds != 0) {
2540 ret = btrfs_inc_extent_ref(trans, log, 2616 ret = btrfs_inc_extent_ref(trans, log,
@@ -2544,6 +2620,11 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2544 trans->transid, 2620 trans->transid,
2545 ins_keys[i].objectid); 2621 ins_keys[i].objectid);
2546 BUG_ON(ret); 2622 BUG_ON(ret);
2623 ret = copy_extent_csums(trans,
2624 &ordered_sums,
2625 log->fs_info->csum_root,
2626 ds + cs, cl);
2627 BUG_ON(ret);
2547 } 2628 }
2548 } 2629 }
2549 } 2630 }
@@ -2553,6 +2634,20 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
2553 btrfs_mark_buffer_dirty(dst_path->nodes[0]); 2634 btrfs_mark_buffer_dirty(dst_path->nodes[0]);
2554 btrfs_release_path(log, dst_path); 2635 btrfs_release_path(log, dst_path);
2555 kfree(ins_data); 2636 kfree(ins_data);
2637
2638 /*
2639 * we have to do this after the loop above to avoid changing the
2640 * log tree while trying to change the log tree.
2641 */
2642 while(!list_empty(&ordered_sums)) {
2643 struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next,
2644 struct btrfs_ordered_sum,
2645 list);
2646 ret = btrfs_csum_file_blocks(trans, log, sums);
2647 BUG_ON(ret);
2648 list_del(&sums->list);
2649 kfree(sums);
2650 }
2556 return 0; 2651 return 0;
2557} 2652}
2558 2653
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 6c523b3360f6..2049d179ccd5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2771,6 +2771,7 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
2771 device->work.func = pending_bios_fn; 2771 device->work.func = pending_bios_fn;
2772 fs_devices->num_devices++; 2772 fs_devices->num_devices++;
2773 spin_lock_init(&device->io_lock); 2773 spin_lock_init(&device->io_lock);
2774 INIT_LIST_HEAD(&device->dev_alloc_list);
2774 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); 2775 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
2775 return device; 2776 return device;
2776} 2777}