diff options
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r-- | fs/btrfs/ordered-data.c | 127 |
1 files changed, 21 insertions, 106 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d6f0806c682f..897fba835f89 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
159 | * | 159 | * |
160 | * len is the length of the extent | 160 | * len is the length of the extent |
161 | * | 161 | * |
162 | * This also sets the EXTENT_ORDERED bit on the range in the inode. | ||
163 | * | ||
164 | * The tree is given a single reference on the ordered extent that was | 162 | * The tree is given a single reference on the ordered extent that was |
165 | * inserted. | 163 | * inserted. |
166 | */ | 164 | */ |
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
181 | entry->start = start; | 179 | entry->start = start; |
182 | entry->len = len; | 180 | entry->len = len; |
183 | entry->disk_len = disk_len; | 181 | entry->disk_len = disk_len; |
182 | entry->bytes_left = len; | ||
184 | entry->inode = inode; | 183 | entry->inode = inode; |
185 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 184 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
186 | set_bit(type, &entry->flags); | 185 | set_bit(type, &entry->flags); |
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
195 | &entry->rb_node); | 194 | &entry->rb_node); |
196 | BUG_ON(node); | 195 | BUG_ON(node); |
197 | 196 | ||
198 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, | ||
199 | entry_end(entry) - 1, GFP_NOFS); | ||
200 | |||
201 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 197 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
202 | list_add_tail(&entry->root_extent_list, | 198 | list_add_tail(&entry->root_extent_list, |
203 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 199 | &BTRFS_I(inode)->root->fs_info->ordered_extents); |
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
241 | struct btrfs_ordered_inode_tree *tree; | 237 | struct btrfs_ordered_inode_tree *tree; |
242 | struct rb_node *node; | 238 | struct rb_node *node; |
243 | struct btrfs_ordered_extent *entry; | 239 | struct btrfs_ordered_extent *entry; |
244 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
245 | int ret; | 240 | int ret; |
246 | 241 | ||
247 | tree = &BTRFS_I(inode)->ordered_tree; | 242 | tree = &BTRFS_I(inode)->ordered_tree; |
248 | mutex_lock(&tree->mutex); | 243 | mutex_lock(&tree->mutex); |
249 | clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, | ||
250 | GFP_NOFS); | ||
251 | node = tree_search(tree, file_offset); | 244 | node = tree_search(tree, file_offset); |
252 | if (!node) { | 245 | if (!node) { |
253 | ret = 1; | 246 | ret = 1; |
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
260 | goto out; | 253 | goto out; |
261 | } | 254 | } |
262 | 255 | ||
263 | ret = test_range_bit(io_tree, entry->file_offset, | 256 | if (io_size > entry->bytes_left) { |
264 | entry->file_offset + entry->len - 1, | 257 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", |
265 | EXTENT_ORDERED, 0); | 258 | (unsigned long long)entry->bytes_left, |
266 | if (ret == 0) | 259 | (unsigned long long)io_size); |
260 | } | ||
261 | entry->bytes_left -= io_size; | ||
262 | if (entry->bytes_left == 0) | ||
267 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 263 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
264 | else | ||
265 | ret = 1; | ||
268 | out: | 266 | out: |
269 | mutex_unlock(&tree->mutex); | 267 | mutex_unlock(&tree->mutex); |
270 | return ret == 0; | 268 | return ret == 0; |
@@ -460,7 +458,7 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
460 | * start IO on any dirty ones so the wait doesn't stall waiting | 458 | * start IO on any dirty ones so the wait doesn't stall waiting |
461 | * for pdflush to find them | 459 | * for pdflush to find them |
462 | */ | 460 | */ |
463 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); | 461 | filemap_fdatawrite_range(inode->i_mapping, start, end); |
464 | if (wait) { | 462 | if (wait) { |
465 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 463 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
466 | &entry->flags)); | 464 | &entry->flags)); |
@@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
476 | u64 orig_end; | 474 | u64 orig_end; |
477 | u64 wait_end; | 475 | u64 wait_end; |
478 | struct btrfs_ordered_extent *ordered; | 476 | struct btrfs_ordered_extent *ordered; |
477 | int found; | ||
479 | 478 | ||
480 | if (start + len < start) { | 479 | if (start + len < start) { |
481 | orig_end = INT_LIMIT(loff_t); | 480 | orig_end = INT_LIMIT(loff_t); |
@@ -489,19 +488,18 @@ again: | |||
489 | /* start IO across the range first to instantiate any delalloc | 488 | /* start IO across the range first to instantiate any delalloc |
490 | * extents | 489 | * extents |
491 | */ | 490 | */ |
492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 491 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
493 | 492 | ||
494 | /* The compression code will leave pages locked but return from | 493 | /* The compression code will leave pages locked but return from |
495 | * writepage without setting the page writeback. Starting again | 494 | * writepage without setting the page writeback. Starting again |
496 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | 495 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. |
497 | */ | 496 | */ |
498 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 497 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
499 | 498 | ||
500 | btrfs_wait_on_page_writeback_range(inode->i_mapping, | 499 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); |
501 | start >> PAGE_CACHE_SHIFT, | ||
502 | orig_end >> PAGE_CACHE_SHIFT); | ||
503 | 500 | ||
504 | end = orig_end; | 501 | end = orig_end; |
502 | found = 0; | ||
505 | while (1) { | 503 | while (1) { |
506 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | 504 | ordered = btrfs_lookup_first_ordered_extent(inode, end); |
507 | if (!ordered) | 505 | if (!ordered) |
@@ -514,6 +512,7 @@ again: | |||
514 | btrfs_put_ordered_extent(ordered); | 512 | btrfs_put_ordered_extent(ordered); |
515 | break; | 513 | break; |
516 | } | 514 | } |
515 | found++; | ||
517 | btrfs_start_ordered_extent(inode, ordered, 1); | 516 | btrfs_start_ordered_extent(inode, ordered, 1); |
518 | end = ordered->file_offset; | 517 | end = ordered->file_offset; |
519 | btrfs_put_ordered_extent(ordered); | 518 | btrfs_put_ordered_extent(ordered); |
@@ -521,8 +520,8 @@ again: | |||
521 | break; | 520 | break; |
522 | end--; | 521 | end--; |
523 | } | 522 | } |
524 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | 523 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, |
525 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | 524 | EXTENT_DELALLOC, 0, NULL)) { |
526 | schedule_timeout(1); | 525 | schedule_timeout(1); |
527 | goto again; | 526 | goto again; |
528 | } | 527 | } |
@@ -613,7 +612,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
613 | */ | 612 | */ |
614 | if (test_range_bit(io_tree, disk_i_size, | 613 | if (test_range_bit(io_tree, disk_i_size, |
615 | ordered->file_offset + ordered->len - 1, | 614 | ordered->file_offset + ordered->len - 1, |
616 | EXTENT_DELALLOC, 0)) { | 615 | EXTENT_DELALLOC, 0, NULL)) { |
617 | goto out; | 616 | goto out; |
618 | } | 617 | } |
619 | /* | 618 | /* |
@@ -664,7 +663,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
664 | */ | 663 | */ |
665 | if (i_size_test > entry_end(ordered) && | 664 | if (i_size_test > entry_end(ordered) && |
666 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, | 665 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, |
667 | EXTENT_DELALLOC, 0)) { | 666 | EXTENT_DELALLOC, 0, NULL)) { |
668 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); | 667 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); |
669 | } | 668 | } |
670 | BTRFS_I(inode)->disk_i_size = new_i_size; | 669 | BTRFS_I(inode)->disk_i_size = new_i_size; |
@@ -715,90 +714,6 @@ out: | |||
715 | } | 714 | } |
716 | 715 | ||
717 | 716 | ||
718 | /** | ||
719 | * taken from mm/filemap.c because it isn't exported | ||
720 | * | ||
721 | * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range | ||
722 | * @mapping: address space structure to write | ||
723 | * @start: offset in bytes where the range starts | ||
724 | * @end: offset in bytes where the range ends (inclusive) | ||
725 | * @sync_mode: enable synchronous operation | ||
726 | * | ||
727 | * Start writeback against all of a mapping's dirty pages that lie | ||
728 | * within the byte offsets <start, end> inclusive. | ||
729 | * | ||
730 | * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as | ||
731 | * opposed to a regular memory cleansing writeback. The difference between | ||
732 | * these two operations is that if a dirty page/buffer is encountered, it must | ||
733 | * be waited upon, and not just skipped over. | ||
734 | */ | ||
735 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
736 | loff_t end, int sync_mode) | ||
737 | { | ||
738 | struct writeback_control wbc = { | ||
739 | .sync_mode = sync_mode, | ||
740 | .nr_to_write = mapping->nrpages * 2, | ||
741 | .range_start = start, | ||
742 | .range_end = end, | ||
743 | .for_writepages = 1, | ||
744 | }; | ||
745 | return btrfs_writepages(mapping, &wbc); | ||
746 | } | ||
747 | |||
748 | /** | ||
749 | * taken from mm/filemap.c because it isn't exported | ||
750 | * | ||
751 | * wait_on_page_writeback_range - wait for writeback to complete | ||
752 | * @mapping: target address_space | ||
753 | * @start: beginning page index | ||
754 | * @end: ending page index | ||
755 | * | ||
756 | * Wait for writeback to complete against pages indexed by start->end | ||
757 | * inclusive | ||
758 | */ | ||
759 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
760 | pgoff_t start, pgoff_t end) | ||
761 | { | ||
762 | struct pagevec pvec; | ||
763 | int nr_pages; | ||
764 | int ret = 0; | ||
765 | pgoff_t index; | ||
766 | |||
767 | if (end < start) | ||
768 | return 0; | ||
769 | |||
770 | pagevec_init(&pvec, 0); | ||
771 | index = start; | ||
772 | while ((index <= end) && | ||
773 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
774 | PAGECACHE_TAG_WRITEBACK, | ||
775 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | ||
776 | unsigned i; | ||
777 | |||
778 | for (i = 0; i < nr_pages; i++) { | ||
779 | struct page *page = pvec.pages[i]; | ||
780 | |||
781 | /* until radix tree lookup accepts end_index */ | ||
782 | if (page->index > end) | ||
783 | continue; | ||
784 | |||
785 | wait_on_page_writeback(page); | ||
786 | if (PageError(page)) | ||
787 | ret = -EIO; | ||
788 | } | ||
789 | pagevec_release(&pvec); | ||
790 | cond_resched(); | ||
791 | } | ||
792 | |||
793 | /* Check for outstanding write errors */ | ||
794 | if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) | ||
795 | ret = -ENOSPC; | ||
796 | if (test_and_clear_bit(AS_EIO, &mapping->flags)) | ||
797 | ret = -EIO; | ||
798 | |||
799 | return ret; | ||
800 | } | ||
801 | |||
802 | /* | 717 | /* |
803 | * add a given inode to the list of inodes that must be fully on | 718 | * add a given inode to the list of inodes that must be fully on |
804 | * disk before a transaction commit finishes. | 719 | * disk before a transaction commit finishes. |