diff options
author | Josef Bacik <josef@redhat.com> | 2011-06-10 15:31:13 -0400 |
---|---|---|
committer | Josef Bacik <josef@redhat.com> | 2011-07-11 09:58:49 -0400 |
commit | 2f356126c589d562f98e2287f9c7b983388dc62f (patch) | |
tree | 8674f29e4c8ccc2d536472f5fef5755b252093a3 /fs/btrfs/free-space-cache.c | |
parent | fdb5effd5c2a7e01dc3a4217bb194e2d3a5b160f (diff) |
Btrfs: use the normal checksumming infrastructure for free space cache
We used to store the checksums of the space cache directly in the space cache,
however that doesn't work out too well if we have more space than we can fit the
checksums into the first page. So instead use the normal checksumming
infrastructure. There were problems with doing this originally but those
problems don't exist now so this works out fine. Thanks,
Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 169 |
1 files changed, 59 insertions, 110 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index bf0d61567f3d..fd7fa2a74f06 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
98 | return inode; | 98 | return inode; |
99 | 99 | ||
100 | spin_lock(&block_group->lock); | 100 | spin_lock(&block_group->lock); |
101 | if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { | ||
102 | printk(KERN_INFO "Old style space inode found, converting.\n"); | ||
103 | BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; | ||
104 | block_group->disk_cache_state = BTRFS_DC_CLEAR; | ||
105 | } | ||
106 | |||
101 | if (!btrfs_fs_closing(root->fs_info)) { | 107 | if (!btrfs_fs_closing(root->fs_info)) { |
102 | block_group->inode = igrab(inode); | 108 | block_group->inode = igrab(inode); |
103 | block_group->iref = 1; | 109 | block_group->iref = 1; |
@@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root, | |||
135 | btrfs_set_inode_gid(leaf, inode_item, 0); | 141 | btrfs_set_inode_gid(leaf, inode_item, 0); |
136 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); | 142 | btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); |
137 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | | 143 | btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | |
138 | BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); | 144 | BTRFS_INODE_PREALLOC); |
139 | btrfs_set_inode_nlink(leaf, inode_item, 1); | 145 | btrfs_set_inode_nlink(leaf, inode_item, 1); |
140 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); | 146 | btrfs_set_inode_transid(leaf, inode_item, trans->transid); |
141 | btrfs_set_inode_block_group(leaf, inode_item, offset); | 147 | btrfs_set_inode_block_group(leaf, inode_item, offset); |
@@ -239,17 +245,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
239 | struct btrfs_free_space_header *header; | 245 | struct btrfs_free_space_header *header; |
240 | struct extent_buffer *leaf; | 246 | struct extent_buffer *leaf; |
241 | struct page *page; | 247 | struct page *page; |
242 | u32 *checksums = NULL, *crc; | ||
243 | char *disk_crcs = NULL; | ||
244 | struct btrfs_key key; | 248 | struct btrfs_key key; |
245 | struct list_head bitmaps; | 249 | struct list_head bitmaps; |
246 | u64 num_entries; | 250 | u64 num_entries; |
247 | u64 num_bitmaps; | 251 | u64 num_bitmaps; |
248 | u64 generation; | 252 | u64 generation; |
249 | u32 cur_crc = ~(u32)0; | ||
250 | pgoff_t index = 0; | 253 | pgoff_t index = 0; |
251 | unsigned long first_page_offset; | ||
252 | int num_checksums; | ||
253 | int ret = 0; | 254 | int ret = 0; |
254 | 255 | ||
255 | INIT_LIST_HEAD(&bitmaps); | 256 | INIT_LIST_HEAD(&bitmaps); |
@@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
292 | if (!num_entries) | 293 | if (!num_entries) |
293 | goto out; | 294 | goto out; |
294 | 295 | ||
295 | /* Setup everything for doing checksumming */ | ||
296 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | ||
297 | checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
298 | if (!checksums) | ||
299 | goto out; | ||
300 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | ||
301 | disk_crcs = kzalloc(first_page_offset, GFP_NOFS); | ||
302 | if (!disk_crcs) | ||
303 | goto out; | ||
304 | |||
305 | ret = readahead_cache(inode); | 296 | ret = readahead_cache(inode); |
306 | if (ret) | 297 | if (ret) |
307 | goto out; | 298 | goto out; |
@@ -311,17 +302,11 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
311 | struct btrfs_free_space *e; | 302 | struct btrfs_free_space *e; |
312 | void *addr; | 303 | void *addr; |
313 | unsigned long offset = 0; | 304 | unsigned long offset = 0; |
314 | unsigned long start_offset = 0; | ||
315 | int need_loop = 0; | 305 | int need_loop = 0; |
316 | 306 | ||
317 | if (!num_entries && !num_bitmaps) | 307 | if (!num_entries && !num_bitmaps) |
318 | break; | 308 | break; |
319 | 309 | ||
320 | if (index == 0) { | ||
321 | start_offset = first_page_offset; | ||
322 | offset = start_offset; | ||
323 | } | ||
324 | |||
325 | page = grab_cache_page(inode->i_mapping, index); | 310 | page = grab_cache_page(inode->i_mapping, index); |
326 | if (!page) | 311 | if (!page) |
327 | goto free_cache; | 312 | goto free_cache; |
@@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
342 | if (index == 0) { | 327 | if (index == 0) { |
343 | u64 *gen; | 328 | u64 *gen; |
344 | 329 | ||
345 | memcpy(disk_crcs, addr, first_page_offset); | 330 | /* |
346 | gen = addr + (sizeof(u32) * num_checksums); | 331 | * We put a bogus crc in the front of the first page in |
332 | * case old kernels try to mount a fs with the new | ||
333 | * format to make sure they discard the cache. | ||
334 | */ | ||
335 | addr += sizeof(u64); | ||
336 | offset += sizeof(u64); | ||
337 | |||
338 | gen = addr; | ||
347 | if (*gen != BTRFS_I(inode)->generation) { | 339 | if (*gen != BTRFS_I(inode)->generation) { |
348 | printk(KERN_ERR "btrfs: space cache generation" | 340 | printk(KERN_ERR "btrfs: space cache generation" |
349 | " (%llu) does not match inode (%llu)\n", | 341 | " (%llu) does not match inode (%llu)\n", |
@@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, | |||
355 | page_cache_release(page); | 347 | page_cache_release(page); |
356 | goto free_cache; | 348 | goto free_cache; |
357 | } | 349 | } |
358 | crc = (u32 *)disk_crcs; | 350 | addr += sizeof(u64); |
359 | } | 351 | offset += sizeof(u64); |
360 | entry = addr + start_offset; | ||
361 | |||
362 | /* First lets check our crc before we do anything fun */ | ||
363 | cur_crc = ~(u32)0; | ||
364 | cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, | ||
365 | PAGE_CACHE_SIZE - start_offset); | ||
366 | btrfs_csum_final(cur_crc, (char *)&cur_crc); | ||
367 | if (cur_crc != *crc) { | ||
368 | printk(KERN_ERR "btrfs: crc mismatch for page %lu\n", | ||
369 | index); | ||
370 | kunmap(page); | ||
371 | unlock_page(page); | ||
372 | page_cache_release(page); | ||
373 | goto free_cache; | ||
374 | } | 352 | } |
375 | crc++; | 353 | entry = addr; |
376 | 354 | ||
377 | while (1) { | 355 | while (1) { |
378 | if (!num_entries) | 356 | if (!num_entries) |
@@ -470,8 +448,6 @@ next: | |||
470 | 448 | ||
471 | ret = 1; | 449 | ret = 1; |
472 | out: | 450 | out: |
473 | kfree(checksums); | ||
474 | kfree(disk_crcs); | ||
475 | return ret; | 451 | return ret; |
476 | free_cache: | 452 | free_cache: |
477 | __btrfs_remove_free_space_cache(ctl); | 453 | __btrfs_remove_free_space_cache(ctl); |
@@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
569 | struct btrfs_key key; | 545 | struct btrfs_key key; |
570 | u64 start, end, len; | 546 | u64 start, end, len; |
571 | u64 bytes = 0; | 547 | u64 bytes = 0; |
572 | u32 *crc, *checksums; | 548 | u32 crc = ~(u32)0; |
573 | unsigned long first_page_offset; | ||
574 | int index = 0, num_pages = 0; | 549 | int index = 0, num_pages = 0; |
575 | int entries = 0; | 550 | int entries = 0; |
576 | int bitmaps = 0; | 551 | int bitmaps = 0; |
@@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
590 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> | 565 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
591 | PAGE_CACHE_SHIFT; | 566 | PAGE_CACHE_SHIFT; |
592 | 567 | ||
593 | /* Since the first page has all of our checksums and our generation we | ||
594 | * need to calculate the offset into the page that we can start writing | ||
595 | * our entries. | ||
596 | */ | ||
597 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); | ||
598 | |||
599 | filemap_write_and_wait(inode->i_mapping); | 568 | filemap_write_and_wait(inode->i_mapping); |
600 | btrfs_wait_ordered_range(inode, inode->i_size & | 569 | btrfs_wait_ordered_range(inode, inode->i_size & |
601 | ~(root->sectorsize - 1), (u64)-1); | 570 | ~(root->sectorsize - 1), (u64)-1); |
602 | 571 | ||
603 | /* make sure we don't overflow that first page */ | ||
604 | if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { | ||
605 | /* this is really the same as running out of space, where we also return 0 */ | ||
606 | printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); | ||
607 | ret = 0; | ||
608 | goto out_update; | ||
609 | } | ||
610 | |||
611 | /* We need a checksum per page. */ | ||
612 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); | ||
613 | if (!crc) | ||
614 | return -1; | ||
615 | |||
616 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | 572 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); |
617 | if (!pages) { | 573 | if (!pages) |
618 | kfree(crc); | ||
619 | return -1; | 574 | return -1; |
620 | } | ||
621 | 575 | ||
622 | /* Get the cluster for this block_group if it exists */ | 576 | /* Get the cluster for this block_group if it exists */ |
623 | if (block_group && !list_empty(&block_group->cluster_list)) | 577 | if (block_group && !list_empty(&block_group->cluster_list)) |
@@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
648 | unlock_page(pages[i]); | 602 | unlock_page(pages[i]); |
649 | page_cache_release(pages[i]); | 603 | page_cache_release(pages[i]); |
650 | } | 604 | } |
651 | goto out_free; | 605 | goto out; |
652 | } | 606 | } |
653 | pages[index] = page; | 607 | pages[index] = page; |
654 | index++; | 608 | index++; |
@@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
668 | /* Write out the extent entries */ | 622 | /* Write out the extent entries */ |
669 | do { | 623 | do { |
670 | struct btrfs_free_space_entry *entry; | 624 | struct btrfs_free_space_entry *entry; |
671 | void *addr; | 625 | void *addr, *orig; |
672 | unsigned long offset = 0; | 626 | unsigned long offset = 0; |
673 | unsigned long start_offset = 0; | ||
674 | 627 | ||
675 | next_page = false; | 628 | next_page = false; |
676 | 629 | ||
677 | if (index == 0) { | ||
678 | start_offset = first_page_offset; | ||
679 | offset = start_offset; | ||
680 | } | ||
681 | |||
682 | if (index >= num_pages) { | 630 | if (index >= num_pages) { |
683 | out_of_space = true; | 631 | out_of_space = true; |
684 | break; | 632 | break; |
@@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
686 | 634 | ||
687 | page = pages[index]; | 635 | page = pages[index]; |
688 | 636 | ||
689 | addr = kmap(page); | 637 | orig = addr = kmap(page); |
690 | entry = addr + start_offset; | 638 | if (index == 0) { |
639 | u64 *gen; | ||
691 | 640 | ||
692 | memset(addr, 0, PAGE_CACHE_SIZE); | 641 | /* |
642 | * We're going to put in a bogus crc for this page to | ||
643 | * make sure that old kernels who aren't aware of this | ||
644 | * format will be sure to discard the cache. | ||
645 | */ | ||
646 | addr += sizeof(u64); | ||
647 | offset += sizeof(u64); | ||
648 | |||
649 | gen = addr; | ||
650 | *gen = trans->transid; | ||
651 | addr += sizeof(u64); | ||
652 | offset += sizeof(u64); | ||
653 | } | ||
654 | entry = addr; | ||
655 | |||
656 | memset(addr, 0, PAGE_CACHE_SIZE - offset); | ||
693 | while (node && !next_page) { | 657 | while (node && !next_page) { |
694 | struct btrfs_free_space *e; | 658 | struct btrfs_free_space *e; |
695 | 659 | ||
@@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
752 | next_page = true; | 716 | next_page = true; |
753 | entry++; | 717 | entry++; |
754 | } | 718 | } |
755 | *crc = ~(u32)0; | ||
756 | *crc = btrfs_csum_data(root, addr + start_offset, *crc, | ||
757 | PAGE_CACHE_SIZE - start_offset); | ||
758 | kunmap(page); | ||
759 | 719 | ||
760 | btrfs_csum_final(*crc, (char *)crc); | 720 | /* Generate bogus crc value */ |
761 | crc++; | 721 | if (index == 0) { |
722 | u32 *tmp; | ||
723 | crc = btrfs_csum_data(root, orig + sizeof(u64), crc, | ||
724 | PAGE_CACHE_SIZE - sizeof(u64)); | ||
725 | btrfs_csum_final(crc, (char *)&crc); | ||
726 | crc++; | ||
727 | tmp = orig; | ||
728 | *tmp = crc; | ||
729 | } | ||
730 | |||
731 | kunmap(page); | ||
762 | 732 | ||
763 | bytes += PAGE_CACHE_SIZE; | 733 | bytes += PAGE_CACHE_SIZE; |
764 | 734 | ||
@@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
779 | 749 | ||
780 | addr = kmap(page); | 750 | addr = kmap(page); |
781 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | 751 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); |
782 | *crc = ~(u32)0; | ||
783 | *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); | ||
784 | kunmap(page); | 752 | kunmap(page); |
785 | btrfs_csum_final(*crc, (char *)crc); | ||
786 | crc++; | ||
787 | bytes += PAGE_CACHE_SIZE; | 753 | bytes += PAGE_CACHE_SIZE; |
788 | 754 | ||
789 | list_del_init(&entry->list); | 755 | list_del_init(&entry->list); |
@@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
796 | i_size_read(inode) - 1, &cached_state, | 762 | i_size_read(inode) - 1, &cached_state, |
797 | GFP_NOFS); | 763 | GFP_NOFS); |
798 | ret = 0; | 764 | ret = 0; |
799 | goto out_free; | 765 | goto out; |
800 | } | 766 | } |
801 | 767 | ||
802 | /* Zero out the rest of the pages just to make sure */ | 768 | /* Zero out the rest of the pages just to make sure */ |
@@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
811 | index++; | 777 | index++; |
812 | } | 778 | } |
813 | 779 | ||
814 | /* Write the checksums and trans id to the first page */ | ||
815 | { | ||
816 | void *addr; | ||
817 | u64 *gen; | ||
818 | |||
819 | page = pages[0]; | ||
820 | |||
821 | addr = kmap(page); | ||
822 | memcpy(addr, checksums, sizeof(u32) * num_pages); | ||
823 | gen = addr + (sizeof(u32) * num_pages); | ||
824 | *gen = trans->transid; | ||
825 | kunmap(page); | ||
826 | } | ||
827 | |||
828 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | 780 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, |
829 | bytes, &cached_state); | 781 | bytes, &cached_state); |
830 | btrfs_drop_pages(pages, num_pages); | 782 | btrfs_drop_pages(pages, num_pages); |
@@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
833 | 785 | ||
834 | if (ret) { | 786 | if (ret) { |
835 | ret = 0; | 787 | ret = 0; |
836 | goto out_free; | 788 | goto out; |
837 | } | 789 | } |
838 | 790 | ||
839 | BTRFS_I(inode)->generation = trans->transid; | 791 | BTRFS_I(inode)->generation = trans->transid; |
@@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
850 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, | 802 | clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, |
851 | EXTENT_DIRTY | EXTENT_DELALLOC | | 803 | EXTENT_DIRTY | EXTENT_DELALLOC | |
852 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); | 804 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); |
853 | goto out_free; | 805 | goto out; |
854 | } | 806 | } |
855 | leaf = path->nodes[0]; | 807 | leaf = path->nodes[0]; |
856 | if (ret > 0) { | 808 | if (ret > 0) { |
@@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
866 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, | 818 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, |
867 | GFP_NOFS); | 819 | GFP_NOFS); |
868 | btrfs_release_path(path); | 820 | btrfs_release_path(path); |
869 | goto out_free; | 821 | goto out; |
870 | } | 822 | } |
871 | } | 823 | } |
872 | header = btrfs_item_ptr(leaf, path->slots[0], | 824 | header = btrfs_item_ptr(leaf, path->slots[0], |
@@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, | |||
879 | 831 | ||
880 | ret = 1; | 832 | ret = 1; |
881 | 833 | ||
882 | out_free: | 834 | out: |
883 | kfree(checksums); | ||
884 | kfree(pages); | 835 | kfree(pages); |
885 | |||
886 | out_update: | ||
887 | if (ret != 1) { | 836 | if (ret != 1) { |
888 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); | 837 | invalidate_inode_pages2_range(inode->i_mapping, 0, index); |
889 | BTRFS_I(inode)->generation = 0; | 838 | BTRFS_I(inode)->generation = 0; |