diff options
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 203 |
1 files changed, 136 insertions, 67 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0037427d8a9d..11d2e9cea09e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "free-space-cache.h" | 24 | #include "free-space-cache.h" |
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "disk-io.h" | 26 | #include "disk-io.h" |
27 | #include "extent_io.h" | ||
27 | 28 | ||
28 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 29 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
29 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | 30 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
81 | return ERR_PTR(-ENOENT); | 82 | return ERR_PTR(-ENOENT); |
82 | } | 83 | } |
83 | 84 | ||
85 | inode->i_mapping->flags &= ~__GFP_FS; | ||
86 | |||
84 | spin_lock(&block_group->lock); | 87 | spin_lock(&block_group->lock); |
85 | if (!root->fs_info->closing) { | 88 | if (!root->fs_info->closing) { |
86 | block_group->inode = igrab(inode); | 89 | block_group->inode = igrab(inode); |
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
222 | u64 num_entries; | 225 | u64 num_entries; |
223 | u64 num_bitmaps; | 226 | u64 num_bitmaps; |
224 | u64 generation; | 227 | u64 generation; |
228 | u64 used = btrfs_block_group_used(&block_group->item); | ||
225 | u32 cur_crc = ~(u32)0; | 229 | u32 cur_crc = ~(u32)0; |
226 | pgoff_t index = 0; | 230 | pgoff_t index = 0; |
227 | unsigned long first_page_offset; | 231 | unsigned long first_page_offset; |
@@ -467,6 +471,17 @@ next: | |||
467 | index++; | 471 | index++; |
468 | } | 472 | } |
469 | 473 | ||
474 | spin_lock(&block_group->tree_lock); | ||
475 | if (block_group->free_space != (block_group->key.offset - used - | ||
476 | block_group->bytes_super)) { | ||
477 | spin_unlock(&block_group->tree_lock); | ||
478 | printk(KERN_ERR "block group %llu has an wrong amount of free " | ||
479 | "space\n", block_group->key.objectid); | ||
480 | ret = 0; | ||
481 | goto free_cache; | ||
482 | } | ||
483 | spin_unlock(&block_group->tree_lock); | ||
484 | |||
470 | ret = 1; | 485 | ret = 1; |
471 | out: | 486 | out: |
472 | kfree(checksums); | 487 | kfree(checksums); |
@@ -493,18 +508,23 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
493 | struct inode *inode; | 508 | struct inode *inode; |
494 | struct rb_node *node; | 509 | struct rb_node *node; |
495 | struct list_head *pos, *n; | 510 | struct list_head *pos, *n; |
511 | struct page **pages; | ||
496 | struct page *page; | 512 | struct page *page; |
497 | struct extent_state *cached_state = NULL; | 513 | struct extent_state *cached_state = NULL; |
514 | struct btrfs_free_cluster *cluster = NULL; | ||
515 | struct extent_io_tree *unpin = NULL; | ||
498 | struct list_head bitmap_list; | 516 | struct list_head bitmap_list; |
499 | struct btrfs_key key; | 517 | struct btrfs_key key; |
518 | u64 start, end, len; | ||
500 | u64 bytes = 0; | 519 | u64 bytes = 0; |
501 | u32 *crc, *checksums; | 520 | u32 *crc, *checksums; |
502 | pgoff_t index = 0, last_index = 0; | ||
503 | unsigned long first_page_offset; | 521 | unsigned long first_page_offset; |
504 | int num_checksums; | 522 | int index = 0, num_pages = 0; |
505 | int entries = 0; | 523 | int entries = 0; |
506 | int bitmaps = 0; | 524 | int bitmaps = 0; |
507 | int ret = 0; | 525 | int ret = 0; |
526 | bool next_page = false; | ||
527 | bool out_of_space = false; | ||
508 | 528 | ||
509 | root = root->fs_info->tree_root; | 529 | root = root->fs_info->tree_root; |
510 | 530 | ||
@@ -532,24 +552,43 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
532 | return 0; | 552 | return 0; |
533 | } | 553 | } |
534 | 554 | ||
535 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 555 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
556 | PAGE_CACHE_SHIFT; | ||
536 | filemap_write_and_wait(inode->i_mapping); | 557 | filemap_write_and_wait(inode->i_mapping); |
537 | btrfs_wait_ordered_range(inode, inode->i_size & | 558 | btrfs_wait_ordered_range(inode, inode->i_size & |
538 | ~(root->sectorsize - 1), (u64)-1); | 559 | ~(root->sectorsize - 1), (u64)-1); |
539 | 560 | ||
540 | /* We need a checksum per page. */ | 561 | /* We need a checksum per page. */ |
541 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | 562 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); |
542 | crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
543 | if (!crc) { | 563 | if (!crc) { |
544 | iput(inode); | 564 | iput(inode); |
545 | return 0; | 565 | return 0; |
546 | } | 566 | } |
547 | 567 | ||
568 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | ||
569 | if (!pages) { | ||
570 | kfree(crc); | ||
571 | iput(inode); | ||
572 | return 0; | ||
573 | } | ||
574 | |||
548 | /* Since the first page has all of our checksums and our generation we | 575 | /* Since the first page has all of our checksums and our generation we |
549 | * need to calculate the offset into the page that we can start writing | 576 | * need to calculate the offset into the page that we can start writing |
550 | * our entries. | 577 | * our entries. |
551 | */ | 578 | */ |
552 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | 579 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); |
580 | |||
581 | /* Get the cluster for this block_group if it exists */ | ||
582 | if (!list_empty(&block_group->cluster_list)) | ||
583 | cluster = list_entry(block_group->cluster_list.next, | ||
584 | struct btrfs_free_cluster, | ||
585 | block_group_list); | ||
586 | |||
587 | /* | ||
588 | * We shouldn't have switched the pinned extents yet so this is the | ||
589 | * right one | ||
590 | */ | ||
591 | unpin = root->fs_info->pinned_extents; | ||
553 | 592 | ||
554 | /* | 593 | /* |
555 | * Lock all pages first so we can lock the extent safely. | 594 | * Lock all pages first so we can lock the extent safely. |
@@ -559,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
559 | * after find_get_page at this point. Just putting this here so people | 598 | * after find_get_page at this point. Just putting this here so people |
560 | * know and don't freak out. | 599 | * know and don't freak out. |
561 | */ | 600 | */ |
562 | while (index <= last_index) { | 601 | while (index < num_pages) { |
563 | page = grab_cache_page(inode->i_mapping, index); | 602 | page = grab_cache_page(inode->i_mapping, index); |
564 | if (!page) { | 603 | if (!page) { |
565 | pgoff_t i = 0; | 604 | int i; |
566 | 605 | ||
567 | while (i < index) { | 606 | for (i = 0; i < num_pages; i++) { |
568 | page = find_get_page(inode->i_mapping, i); | 607 | unlock_page(pages[i]); |
569 | unlock_page(page); | 608 | page_cache_release(pages[i]); |
570 | page_cache_release(page); | ||
571 | page_cache_release(page); | ||
572 | i++; | ||
573 | } | 609 | } |
574 | goto out_free; | 610 | goto out_free; |
575 | } | 611 | } |
612 | pages[index] = page; | ||
576 | index++; | 613 | index++; |
577 | } | 614 | } |
578 | 615 | ||
@@ -580,6 +617,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
580 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 617 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
581 | 0, &cached_state, GFP_NOFS); | 618 | 0, &cached_state, GFP_NOFS); |
582 | 619 | ||
620 | /* | ||
621 | * When searching for pinned extents, we need to start at our start | ||
622 | * offset. | ||
623 | */ | ||
624 | start = block_group->key.objectid; | ||
625 | |||
583 | /* Write out the extent entries */ | 626 | /* Write out the extent entries */ |
584 | do { | 627 | do { |
585 | struct btrfs_free_space_entry *entry; | 628 | struct btrfs_free_space_entry *entry; |
@@ -587,18 +630,25 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
587 | unsigned long offset = 0; | 630 | unsigned long offset = 0; |
588 | unsigned long start_offset = 0; | 631 | unsigned long start_offset = 0; |
589 | 632 | ||
633 | next_page = false; | ||
634 | |||
590 | if (index == 0) { | 635 | if (index == 0) { |
591 | start_offset = first_page_offset; | 636 | start_offset = first_page_offset; |
592 | offset = start_offset; | 637 | offset = start_offset; |
593 | } | 638 | } |
594 | 639 | ||
595 | page = find_get_page(inode->i_mapping, index); | 640 | if (index >= num_pages) { |
641 | out_of_space = true; | ||
642 | break; | ||
643 | } | ||
644 | |||
645 | page = pages[index]; | ||
596 | 646 | ||
597 | addr = kmap(page); | 647 | addr = kmap(page); |
598 | entry = addr + start_offset; | 648 | entry = addr + start_offset; |
599 | 649 | ||
600 | memset(addr, 0, PAGE_CACHE_SIZE); | 650 | memset(addr, 0, PAGE_CACHE_SIZE); |
601 | while (1) { | 651 | while (node && !next_page) { |
602 | struct btrfs_free_space *e; | 652 | struct btrfs_free_space *e; |
603 | 653 | ||
604 | e = rb_entry(node, struct btrfs_free_space, offset_index); | 654 | e = rb_entry(node, struct btrfs_free_space, offset_index); |
@@ -614,12 +664,49 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
614 | entry->type = BTRFS_FREE_SPACE_EXTENT; | 664 | entry->type = BTRFS_FREE_SPACE_EXTENT; |
615 | } | 665 | } |
616 | node = rb_next(node); | 666 | node = rb_next(node); |
617 | if (!node) | 667 | if (!node && cluster) { |
618 | break; | 668 | node = rb_first(&cluster->root); |
669 | cluster = NULL; | ||
670 | } | ||
619 | offset += sizeof(struct btrfs_free_space_entry); | 671 | offset += sizeof(struct btrfs_free_space_entry); |
620 | if (offset + sizeof(struct btrfs_free_space_entry) >= | 672 | if (offset + sizeof(struct btrfs_free_space_entry) >= |
621 | PAGE_CACHE_SIZE) | 673 | PAGE_CACHE_SIZE) |
674 | next_page = true; | ||
675 | entry++; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * We want to add any pinned extents to our free space cache | ||
680 | * so we don't leak the space | ||
681 | */ | ||
682 | while (!next_page && (start < block_group->key.objectid + | ||
683 | block_group->key.offset)) { | ||
684 | ret = find_first_extent_bit(unpin, start, &start, &end, | ||
685 | EXTENT_DIRTY); | ||
686 | if (ret) { | ||
687 | ret = 0; | ||
622 | break; | 688 | break; |
689 | } | ||
690 | |||
691 | /* This pinned extent is out of our range */ | ||
692 | if (start >= block_group->key.objectid + | ||
693 | block_group->key.offset) | ||
694 | break; | ||
695 | |||
696 | len = block_group->key.objectid + | ||
697 | block_group->key.offset - start; | ||
698 | len = min(len, end + 1 - start); | ||
699 | |||
700 | entries++; | ||
701 | entry->offset = cpu_to_le64(start); | ||
702 | entry->bytes = cpu_to_le64(len); | ||
703 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
704 | |||
705 | start = end + 1; | ||
706 | offset += sizeof(struct btrfs_free_space_entry); | ||
707 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
708 | PAGE_CACHE_SIZE) | ||
709 | next_page = true; | ||
623 | entry++; | 710 | entry++; |
624 | } | 711 | } |
625 | *crc = ~(u32)0; | 712 | *crc = ~(u32)0; |
@@ -632,25 +719,8 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
632 | 719 | ||
633 | bytes += PAGE_CACHE_SIZE; | 720 | bytes += PAGE_CACHE_SIZE; |
634 | 721 | ||
635 | ClearPageChecked(page); | ||
636 | set_page_extent_mapped(page); | ||
637 | SetPageUptodate(page); | ||
638 | set_page_dirty(page); | ||
639 | |||
640 | /* | ||
641 | * We need to release our reference we got for grab_cache_page, | ||
642 | * except for the first page which will hold our checksums, we | ||
643 | * do that below. | ||
644 | */ | ||
645 | if (index != 0) { | ||
646 | unlock_page(page); | ||
647 | page_cache_release(page); | ||
648 | } | ||
649 | |||
650 | page_cache_release(page); | ||
651 | |||
652 | index++; | 722 | index++; |
653 | } while (node); | 723 | } while (node || next_page); |
654 | 724 | ||
655 | /* Write out the bitmaps */ | 725 | /* Write out the bitmaps */ |
656 | list_for_each_safe(pos, n, &bitmap_list) { | 726 | list_for_each_safe(pos, n, &bitmap_list) { |
@@ -658,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
658 | struct btrfs_free_space *entry = | 728 | struct btrfs_free_space *entry = |
659 | list_entry(pos, struct btrfs_free_space, list); | 729 | list_entry(pos, struct btrfs_free_space, list); |
660 | 730 | ||
661 | page = find_get_page(inode->i_mapping, index); | 731 | if (index >= num_pages) { |
732 | out_of_space = true; | ||
733 | break; | ||
734 | } | ||
735 | page = pages[index]; | ||
662 | 736 | ||
663 | addr = kmap(page); | 737 | addr = kmap(page); |
664 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | 738 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); |
@@ -669,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
669 | crc++; | 743 | crc++; |
670 | bytes += PAGE_CACHE_SIZE; | 744 | bytes += PAGE_CACHE_SIZE; |
671 | 745 | ||
672 | ClearPageChecked(page); | ||
673 | set_page_extent_mapped(page); | ||
674 | SetPageUptodate(page); | ||
675 | set_page_dirty(page); | ||
676 | unlock_page(page); | ||
677 | page_cache_release(page); | ||
678 | page_cache_release(page); | ||
679 | list_del_init(&entry->list); | 746 | list_del_init(&entry->list); |
680 | index++; | 747 | index++; |
681 | } | 748 | } |
682 | 749 | ||
750 | if (out_of_space) { | ||
751 | btrfs_drop_pages(pages, num_pages); | ||
752 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
753 | i_size_read(inode) - 1, &cached_state, | ||
754 | GFP_NOFS); | ||
755 | ret = 0; | ||
756 | goto out_free; | ||
757 | } | ||
758 | |||
683 | /* Zero out the rest of the pages just to make sure */ | 759 | /* Zero out the rest of the pages just to make sure */ |
684 | while (index <= last_index) { | 760 | while (index < num_pages) { |
685 | void *addr; | 761 | void *addr; |
686 | 762 | ||
687 | page = find_get_page(inode->i_mapping, index); | 763 | page = pages[index]; |
688 | |||
689 | addr = kmap(page); | 764 | addr = kmap(page); |
690 | memset(addr, 0, PAGE_CACHE_SIZE); | 765 | memset(addr, 0, PAGE_CACHE_SIZE); |
691 | kunmap(page); | 766 | kunmap(page); |
692 | ClearPageChecked(page); | ||
693 | set_page_extent_mapped(page); | ||
694 | SetPageUptodate(page); | ||
695 | set_page_dirty(page); | ||
696 | unlock_page(page); | ||
697 | page_cache_release(page); | ||
698 | page_cache_release(page); | ||
699 | bytes += PAGE_CACHE_SIZE; | 767 | bytes += PAGE_CACHE_SIZE; |
700 | index++; | 768 | index++; |
701 | } | 769 | } |
702 | 770 | ||
703 | btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); | ||
704 | |||
705 | /* Write the checksums and trans id to the first page */ | 771 | /* Write the checksums and trans id to the first page */ |
706 | { | 772 | { |
707 | void *addr; | 773 | void *addr; |
708 | u64 *gen; | 774 | u64 *gen; |
709 | 775 | ||
710 | page = find_get_page(inode->i_mapping, 0); | 776 | page = pages[0]; |
711 | 777 | ||
712 | addr = kmap(page); | 778 | addr = kmap(page); |
713 | memcpy(addr, checksums, sizeof(u32) * num_checksums); | 779 | memcpy(addr, checksums, sizeof(u32) * num_pages); |
714 | gen = addr + (sizeof(u32) * num_checksums); | 780 | gen = addr + (sizeof(u32) * num_pages); |
715 | *gen = trans->transid; | 781 | *gen = trans->transid; |
716 | kunmap(page); | 782 | kunmap(page); |
717 | ClearPageChecked(page); | ||
718 | set_page_extent_mapped(page); | ||
719 | SetPageUptodate(page); | ||
720 | set_page_dirty(page); | ||
721 | unlock_page(page); | ||
722 | page_cache_release(page); | ||
723 | page_cache_release(page); | ||
724 | } | 783 | } |
725 | BTRFS_I(inode)->generation = trans->transid; | ||
726 | 784 | ||
785 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | ||
786 | bytes, &cached_state); | ||
787 | btrfs_drop_pages(pages, num_pages); | ||
727 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 788 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
728 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 789 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); |
729 | 790 | ||
791 | if (ret) { | ||
792 | ret = 0; | ||
793 | goto out_free; | ||
794 | } | ||
795 | |||
796 | BTRFS_I(inode)->generation = trans->transid; | ||
797 | |||
730 | filemap_write_and_wait(inode->i_mapping); | 798 | filemap_write_and_wait(inode->i_mapping); |
731 | 799 | ||
732 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 800 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
@@ -777,6 +845,7 @@ out_free: | |||
777 | BTRFS_I(inode)->generation = 0; | 845 | BTRFS_I(inode)->generation = 0; |
778 | } | 846 | } |
779 | kfree(checksums); | 847 | kfree(checksums); |
848 | kfree(pages); | ||
780 | btrfs_update_inode(trans, root, inode); | 849 | btrfs_update_inode(trans, root, inode); |
781 | iput(inode); | 850 | iput(inode); |
782 | return ret; | 851 | return ret; |