diff options
author | Josef Bacik <josef@redhat.com> | 2011-04-06 13:05:22 -0400 |
---|---|---|
committer | Josef Bacik <josef@redhat.com> | 2011-04-08 13:00:27 -0400 |
commit | be1a12a0dfed06cf1e62e35bf91620dc610a451a (patch) | |
tree | f2cd5976aae8a466b926f9fd7c0e10a082b5a11a /fs | |
parent | c9ddec74aa950a220cc4caa5215cfc5d886050b7 (diff) |
Btrfs: deal with the case that we run out of space in the cache
Currently we don't handle running out of space in the cache, so to fix this we
keep track of how far in the cache we are. Then we only dirty the pages if we
successfully modify all of them, otherwise if we have an error or run out of
space we can just drop them and not worry about the vm writing them out.
Thanks,
Tested-by Johannes Hirte <johannes.hirte@fem.tu-ilmenau.de>
Signed-off-by: Josef Bacik <josef@redhat.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ctree.h | 5 | ||||
-rw-r--r-- | fs/btrfs/file.c | 21 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 117 |
3 files changed, 69 insertions, 74 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3458b5725540..0d00a07b5b29 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -2576,6 +2576,11 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, | |||
2576 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, | 2576 | int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, |
2577 | struct inode *inode, u64 start, u64 end); | 2577 | struct inode *inode, u64 start, u64 end); |
2578 | int btrfs_release_file(struct inode *inode, struct file *file); | 2578 | int btrfs_release_file(struct inode *inode, struct file *file); |
2579 | void btrfs_drop_pages(struct page **pages, size_t num_pages); | ||
2580 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | ||
2581 | struct page **pages, size_t num_pages, | ||
2582 | loff_t pos, size_t write_bytes, | ||
2583 | struct extent_state **cached); | ||
2579 | 2584 | ||
2580 | /* tree-defrag.c */ | 2585 | /* tree-defrag.c */ |
2581 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | 2586 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e621ea54a3fd..75899a01dded 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -104,7 +104,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | |||
104 | /* | 104 | /* |
105 | * unlocks pages after btrfs_file_write is done with them | 105 | * unlocks pages after btrfs_file_write is done with them |
106 | */ | 106 | */ |
107 | static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | 107 | void btrfs_drop_pages(struct page **pages, size_t num_pages) |
108 | { | 108 | { |
109 | size_t i; | 109 | size_t i; |
110 | for (i = 0; i < num_pages; i++) { | 110 | for (i = 0; i < num_pages; i++) { |
@@ -127,16 +127,13 @@ static noinline void btrfs_drop_pages(struct page **pages, size_t num_pages) | |||
127 | * this also makes the decision about creating an inline extent vs | 127 | * this also makes the decision about creating an inline extent vs |
128 | * doing real data extents, marking pages dirty and delalloc as required. | 128 | * doing real data extents, marking pages dirty and delalloc as required. |
129 | */ | 129 | */ |
130 | static noinline int dirty_and_release_pages(struct btrfs_root *root, | 130 | int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, |
131 | struct file *file, | 131 | struct page **pages, size_t num_pages, |
132 | struct page **pages, | 132 | loff_t pos, size_t write_bytes, |
133 | size_t num_pages, | 133 | struct extent_state **cached) |
134 | loff_t pos, | ||
135 | size_t write_bytes) | ||
136 | { | 134 | { |
137 | int err = 0; | 135 | int err = 0; |
138 | int i; | 136 | int i; |
139 | struct inode *inode = fdentry(file)->d_inode; | ||
140 | u64 num_bytes; | 137 | u64 num_bytes; |
141 | u64 start_pos; | 138 | u64 start_pos; |
142 | u64 end_of_last_block; | 139 | u64 end_of_last_block; |
@@ -149,7 +146,7 @@ static noinline int dirty_and_release_pages(struct btrfs_root *root, | |||
149 | 146 | ||
150 | end_of_last_block = start_pos + num_bytes - 1; | 147 | end_of_last_block = start_pos + num_bytes - 1; |
151 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 148 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
152 | NULL); | 149 | cached); |
153 | if (err) | 150 | if (err) |
154 | return err; | 151 | return err; |
155 | 152 | ||
@@ -992,9 +989,9 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
992 | } | 989 | } |
993 | 990 | ||
994 | if (copied > 0) { | 991 | if (copied > 0) { |
995 | ret = dirty_and_release_pages(root, file, pages, | 992 | ret = btrfs_dirty_pages(root, inode, pages, |
996 | dirty_pages, pos, | 993 | dirty_pages, pos, copied, |
997 | copied); | 994 | NULL); |
998 | if (ret) { | 995 | if (ret) { |
999 | btrfs_delalloc_release_space(inode, | 996 | btrfs_delalloc_release_space(inode, |
1000 | dirty_pages << PAGE_CACHE_SHIFT); | 997 | dirty_pages << PAGE_CACHE_SHIFT); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index f561c953205b..a3f420def0e9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -508,6 +508,7 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
508 | struct inode *inode; | 508 | struct inode *inode; |
509 | struct rb_node *node; | 509 | struct rb_node *node; |
510 | struct list_head *pos, *n; | 510 | struct list_head *pos, *n; |
511 | struct page **pages; | ||
511 | struct page *page; | 512 | struct page *page; |
512 | struct extent_state *cached_state = NULL; | 513 | struct extent_state *cached_state = NULL; |
513 | struct btrfs_free_cluster *cluster = NULL; | 514 | struct btrfs_free_cluster *cluster = NULL; |
@@ -517,13 +518,13 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
517 | u64 start, end, len; | 518 | u64 start, end, len; |
518 | u64 bytes = 0; | 519 | u64 bytes = 0; |
519 | u32 *crc, *checksums; | 520 | u32 *crc, *checksums; |
520 | pgoff_t index = 0, last_index = 0; | ||
521 | unsigned long first_page_offset; | 521 | unsigned long first_page_offset; |
522 | int num_checksums; | 522 | int index = 0, num_pages = 0; |
523 | int entries = 0; | 523 | int entries = 0; |
524 | int bitmaps = 0; | 524 | int bitmaps = 0; |
525 | int ret = 0; | 525 | int ret = 0; |
526 | bool next_page = false; | 526 | bool next_page = false; |
527 | bool out_of_space = false; | ||
527 | 528 | ||
528 | root = root->fs_info->tree_root; | 529 | root = root->fs_info->tree_root; |
529 | 530 | ||
@@ -551,24 +552,31 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
551 | return 0; | 552 | return 0; |
552 | } | 553 | } |
553 | 554 | ||
554 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 555 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
556 | PAGE_CACHE_SHIFT; | ||
555 | filemap_write_and_wait(inode->i_mapping); | 557 | filemap_write_and_wait(inode->i_mapping); |
556 | btrfs_wait_ordered_range(inode, inode->i_size & | 558 | btrfs_wait_ordered_range(inode, inode->i_size & |
557 | ~(root->sectorsize - 1), (u64)-1); | 559 | ~(root->sectorsize - 1), (u64)-1); |
558 | 560 | ||
559 | /* We need a checksum per page. */ | 561 | /* We need a checksum per page. */ |
560 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | 562 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); |
561 | crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
562 | if (!crc) { | 563 | if (!crc) { |
563 | iput(inode); | 564 | iput(inode); |
564 | return 0; | 565 | return 0; |
565 | } | 566 | } |
566 | 567 | ||
568 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | ||
569 | if (!pages) { | ||
570 | kfree(crc); | ||
571 | iput(inode); | ||
572 | return 0; | ||
573 | } | ||
574 | |||
567 | /* Since the first page has all of our checksums and our generation we | 575 | /* Since the first page has all of our checksums and our generation we |
568 | * need to calculate the offset into the page that we can start writing | 576 | * need to calculate the offset into the page that we can start writing |
569 | * our entries. | 577 | * our entries. |
570 | */ | 578 | */ |
571 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | 579 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); |
572 | 580 | ||
573 | /* Get the cluster for this block_group if it exists */ | 581 | /* Get the cluster for this block_group if it exists */ |
574 | if (!list_empty(&block_group->cluster_list)) | 582 | if (!list_empty(&block_group->cluster_list)) |
@@ -590,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
590 | * after find_get_page at this point. Just putting this here so people | 598 | * after find_get_page at this point. Just putting this here so people |
591 | * know and don't freak out. | 599 | * know and don't freak out. |
592 | */ | 600 | */ |
593 | while (index <= last_index) { | 601 | while (index < num_pages) { |
594 | page = grab_cache_page(inode->i_mapping, index); | 602 | page = grab_cache_page(inode->i_mapping, index); |
595 | if (!page) { | 603 | if (!page) { |
596 | pgoff_t i = 0; | 604 | int i; |
597 | 605 | ||
598 | while (i < index) { | 606 | for (i = 0; i < num_pages; i++) { |
599 | page = find_get_page(inode->i_mapping, i); | 607 | unlock_page(pages[i]); |
600 | unlock_page(page); | 608 | page_cache_release(pages[i]); |
601 | page_cache_release(page); | ||
602 | page_cache_release(page); | ||
603 | i++; | ||
604 | } | 609 | } |
605 | goto out_free; | 610 | goto out_free; |
606 | } | 611 | } |
612 | pages[index] = page; | ||
607 | index++; | 613 | index++; |
608 | } | 614 | } |
609 | 615 | ||
@@ -631,7 +637,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
631 | offset = start_offset; | 637 | offset = start_offset; |
632 | } | 638 | } |
633 | 639 | ||
634 | page = find_get_page(inode->i_mapping, index); | 640 | if (index >= num_pages) { |
641 | out_of_space = true; | ||
642 | break; | ||
643 | } | ||
644 | |||
645 | page = pages[index]; | ||
635 | 646 | ||
636 | addr = kmap(page); | 647 | addr = kmap(page); |
637 | entry = addr + start_offset; | 648 | entry = addr + start_offset; |
@@ -708,23 +719,6 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
708 | 719 | ||
709 | bytes += PAGE_CACHE_SIZE; | 720 | bytes += PAGE_CACHE_SIZE; |
710 | 721 | ||
711 | ClearPageChecked(page); | ||
712 | set_page_extent_mapped(page); | ||
713 | SetPageUptodate(page); | ||
714 | set_page_dirty(page); | ||
715 | |||
716 | /* | ||
717 | * We need to release our reference we got for grab_cache_page, | ||
718 | * except for the first page which will hold our checksums, we | ||
719 | * do that below. | ||
720 | */ | ||
721 | if (index != 0) { | ||
722 | unlock_page(page); | ||
723 | page_cache_release(page); | ||
724 | } | ||
725 | |||
726 | page_cache_release(page); | ||
727 | |||
728 | index++; | 722 | index++; |
729 | } while (node || next_page); | 723 | } while (node || next_page); |
730 | 724 | ||
@@ -734,6 +728,10 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
734 | struct btrfs_free_space *entry = | 728 | struct btrfs_free_space *entry = |
735 | list_entry(pos, struct btrfs_free_space, list); | 729 | list_entry(pos, struct btrfs_free_space, list); |
736 | 730 | ||
731 | if (index >= num_pages) { | ||
732 | out_of_space = true; | ||
733 | break; | ||
734 | } | ||
737 | page = find_get_page(inode->i_mapping, index); | 735 | page = find_get_page(inode->i_mapping, index); |
738 | 736 | ||
739 | addr = kmap(page); | 737 | addr = kmap(page); |
@@ -745,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
745 | crc++; | 743 | crc++; |
746 | bytes += PAGE_CACHE_SIZE; | 744 | bytes += PAGE_CACHE_SIZE; |
747 | 745 | ||
748 | ClearPageChecked(page); | ||
749 | set_page_extent_mapped(page); | ||
750 | SetPageUptodate(page); | ||
751 | set_page_dirty(page); | ||
752 | unlock_page(page); | ||
753 | page_cache_release(page); | ||
754 | page_cache_release(page); | ||
755 | list_del_init(&entry->list); | 746 | list_del_init(&entry->list); |
756 | index++; | 747 | index++; |
757 | } | 748 | } |
758 | 749 | ||
750 | if (out_of_space) { | ||
751 | btrfs_drop_pages(pages, num_pages); | ||
752 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
753 | i_size_read(inode) - 1, &cached_state, | ||
754 | GFP_NOFS); | ||
755 | ret = 0; | ||
756 | goto out_free; | ||
757 | } | ||
758 | |||
759 | /* Zero out the rest of the pages just to make sure */ | 759 | /* Zero out the rest of the pages just to make sure */ |
760 | while (index <= last_index) { | 760 | while (index < num_pages) { |
761 | void *addr; | 761 | void *addr; |
762 | 762 | ||
763 | page = find_get_page(inode->i_mapping, index); | 763 | page = pages[index]; |
764 | |||
765 | addr = kmap(page); | 764 | addr = kmap(page); |
766 | memset(addr, 0, PAGE_CACHE_SIZE); | 765 | memset(addr, 0, PAGE_CACHE_SIZE); |
767 | kunmap(page); | 766 | kunmap(page); |
768 | ClearPageChecked(page); | ||
769 | set_page_extent_mapped(page); | ||
770 | SetPageUptodate(page); | ||
771 | set_page_dirty(page); | ||
772 | unlock_page(page); | ||
773 | page_cache_release(page); | ||
774 | page_cache_release(page); | ||
775 | bytes += PAGE_CACHE_SIZE; | 767 | bytes += PAGE_CACHE_SIZE; |
776 | index++; | 768 | index++; |
777 | } | 769 | } |
778 | 770 | ||
779 | btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); | ||
780 | |||
781 | /* Write the checksums and trans id to the first page */ | 771 | /* Write the checksums and trans id to the first page */ |
782 | { | 772 | { |
783 | void *addr; | 773 | void *addr; |
784 | u64 *gen; | 774 | u64 *gen; |
785 | 775 | ||
786 | page = find_get_page(inode->i_mapping, 0); | 776 | page = pages[0]; |
787 | 777 | ||
788 | addr = kmap(page); | 778 | addr = kmap(page); |
789 | memcpy(addr, checksums, sizeof(u32) * num_checksums); | 779 | memcpy(addr, checksums, sizeof(u32) * num_pages); |
790 | gen = addr + (sizeof(u32) * num_checksums); | 780 | gen = addr + (sizeof(u32) * num_pages); |
791 | *gen = trans->transid; | 781 | *gen = trans->transid; |
792 | kunmap(page); | 782 | kunmap(page); |
793 | ClearPageChecked(page); | ||
794 | set_page_extent_mapped(page); | ||
795 | SetPageUptodate(page); | ||
796 | set_page_dirty(page); | ||
797 | unlock_page(page); | ||
798 | page_cache_release(page); | ||
799 | page_cache_release(page); | ||
800 | } | 783 | } |
801 | BTRFS_I(inode)->generation = trans->transid; | ||
802 | 784 | ||
785 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | ||
786 | bytes, &cached_state); | ||
787 | btrfs_drop_pages(pages, num_pages); | ||
803 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 788 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
804 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 789 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); |
805 | 790 | ||
791 | if (ret) { | ||
792 | ret = 0; | ||
793 | goto out_free; | ||
794 | } | ||
795 | |||
796 | BTRFS_I(inode)->generation = trans->transid; | ||
797 | |||
806 | filemap_write_and_wait(inode->i_mapping); | 798 | filemap_write_and_wait(inode->i_mapping); |
807 | 799 | ||
808 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 800 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
@@ -853,6 +845,7 @@ out_free: | |||
853 | BTRFS_I(inode)->generation = 0; | 845 | BTRFS_I(inode)->generation = 0; |
854 | } | 846 | } |
855 | kfree(checksums); | 847 | kfree(checksums); |
848 | kfree(pages); | ||
856 | btrfs_update_inode(trans, root, inode); | 849 | btrfs_update_inode(trans, root, inode); |
857 | iput(inode); | 850 | iput(inode); |
858 | return ret; | 851 | return ret; |