diff options
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r-- | fs/btrfs/ordered-data.c | 133 |
1 files changed, 27 insertions, 106 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index d6f0806c682f..5799bc46a309 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
159 | * | 159 | * |
160 | * len is the length of the extent | 160 | * len is the length of the extent |
161 | * | 161 | * |
162 | * This also sets the EXTENT_ORDERED bit on the range in the inode. | ||
163 | * | ||
164 | * The tree is given a single reference on the ordered extent that was | 162 | * The tree is given a single reference on the ordered extent that was |
165 | * inserted. | 163 | * inserted. |
166 | */ | 164 | */ |
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
181 | entry->start = start; | 179 | entry->start = start; |
182 | entry->len = len; | 180 | entry->len = len; |
183 | entry->disk_len = disk_len; | 181 | entry->disk_len = disk_len; |
182 | entry->bytes_left = len; | ||
184 | entry->inode = inode; | 183 | entry->inode = inode; |
185 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 184 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
186 | set_bit(type, &entry->flags); | 185 | set_bit(type, &entry->flags); |
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
195 | &entry->rb_node); | 194 | &entry->rb_node); |
196 | BUG_ON(node); | 195 | BUG_ON(node); |
197 | 196 | ||
198 | set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset, | ||
199 | entry_end(entry) - 1, GFP_NOFS); | ||
200 | |||
201 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 197 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
202 | list_add_tail(&entry->root_extent_list, | 198 | list_add_tail(&entry->root_extent_list, |
203 | &BTRFS_I(inode)->root->fs_info->ordered_extents); | 199 | &BTRFS_I(inode)->root->fs_info->ordered_extents); |
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
241 | struct btrfs_ordered_inode_tree *tree; | 237 | struct btrfs_ordered_inode_tree *tree; |
242 | struct rb_node *node; | 238 | struct rb_node *node; |
243 | struct btrfs_ordered_extent *entry; | 239 | struct btrfs_ordered_extent *entry; |
244 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
245 | int ret; | 240 | int ret; |
246 | 241 | ||
247 | tree = &BTRFS_I(inode)->ordered_tree; | 242 | tree = &BTRFS_I(inode)->ordered_tree; |
248 | mutex_lock(&tree->mutex); | 243 | mutex_lock(&tree->mutex); |
249 | clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1, | ||
250 | GFP_NOFS); | ||
251 | node = tree_search(tree, file_offset); | 244 | node = tree_search(tree, file_offset); |
252 | if (!node) { | 245 | if (!node) { |
253 | ret = 1; | 246 | ret = 1; |
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
260 | goto out; | 253 | goto out; |
261 | } | 254 | } |
262 | 255 | ||
263 | ret = test_range_bit(io_tree, entry->file_offset, | 256 | if (io_size > entry->bytes_left) { |
264 | entry->file_offset + entry->len - 1, | 257 | printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n", |
265 | EXTENT_ORDERED, 0); | 258 | (unsigned long long)entry->bytes_left, |
266 | if (ret == 0) | 259 | (unsigned long long)io_size); |
260 | } | ||
261 | entry->bytes_left -= io_size; | ||
262 | if (entry->bytes_left == 0) | ||
267 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 263 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
264 | else | ||
265 | ret = 1; | ||
268 | out: | 266 | out: |
269 | mutex_unlock(&tree->mutex); | 267 | mutex_unlock(&tree->mutex); |
270 | return ret == 0; | 268 | return ret == 0; |
@@ -308,6 +306,12 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
308 | tree->last = NULL; | 306 | tree->last = NULL; |
309 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 307 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
310 | 308 | ||
309 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
310 | BTRFS_I(inode)->outstanding_extents--; | ||
311 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
312 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
313 | inode, 1); | ||
314 | |||
311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 315 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
312 | list_del_init(&entry->root_extent_list); | 316 | list_del_init(&entry->root_extent_list); |
313 | 317 | ||
@@ -460,7 +464,7 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
460 | * start IO on any dirty ones so the wait doesn't stall waiting | 464 | * start IO on any dirty ones so the wait doesn't stall waiting |
461 | * for pdflush to find them | 465 | * for pdflush to find them |
462 | */ | 466 | */ |
463 | btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); | 467 | filemap_fdatawrite_range(inode->i_mapping, start, end); |
464 | if (wait) { | 468 | if (wait) { |
465 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 469 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
466 | &entry->flags)); | 470 | &entry->flags)); |
@@ -476,6 +480,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) | |||
476 | u64 orig_end; | 480 | u64 orig_end; |
477 | u64 wait_end; | 481 | u64 wait_end; |
478 | struct btrfs_ordered_extent *ordered; | 482 | struct btrfs_ordered_extent *ordered; |
483 | int found; | ||
479 | 484 | ||
480 | if (start + len < start) { | 485 | if (start + len < start) { |
481 | orig_end = INT_LIMIT(loff_t); | 486 | orig_end = INT_LIMIT(loff_t); |
@@ -489,19 +494,18 @@ again: | |||
489 | /* start IO across the range first to instantiate any delalloc | 494 | /* start IO across the range first to instantiate any delalloc |
490 | * extents | 495 | * extents |
491 | */ | 496 | */ |
492 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 497 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
493 | 498 | ||
494 | /* The compression code will leave pages locked but return from | 499 | /* The compression code will leave pages locked but return from |
495 | * writepage without setting the page writeback. Starting again | 500 | * writepage without setting the page writeback. Starting again |
496 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. | 501 | * with WB_SYNC_ALL will end up waiting for the IO to actually start. |
497 | */ | 502 | */ |
498 | btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); | 503 | filemap_fdatawrite_range(inode->i_mapping, start, orig_end); |
499 | 504 | ||
500 | btrfs_wait_on_page_writeback_range(inode->i_mapping, | 505 | filemap_fdatawait_range(inode->i_mapping, start, orig_end); |
501 | start >> PAGE_CACHE_SHIFT, | ||
502 | orig_end >> PAGE_CACHE_SHIFT); | ||
503 | 506 | ||
504 | end = orig_end; | 507 | end = orig_end; |
508 | found = 0; | ||
505 | while (1) { | 509 | while (1) { |
506 | ordered = btrfs_lookup_first_ordered_extent(inode, end); | 510 | ordered = btrfs_lookup_first_ordered_extent(inode, end); |
507 | if (!ordered) | 511 | if (!ordered) |
@@ -514,6 +518,7 @@ again: | |||
514 | btrfs_put_ordered_extent(ordered); | 518 | btrfs_put_ordered_extent(ordered); |
515 | break; | 519 | break; |
516 | } | 520 | } |
521 | found++; | ||
517 | btrfs_start_ordered_extent(inode, ordered, 1); | 522 | btrfs_start_ordered_extent(inode, ordered, 1); |
518 | end = ordered->file_offset; | 523 | end = ordered->file_offset; |
519 | btrfs_put_ordered_extent(ordered); | 524 | btrfs_put_ordered_extent(ordered); |
@@ -521,8 +526,8 @@ again: | |||
521 | break; | 526 | break; |
522 | end--; | 527 | end--; |
523 | } | 528 | } |
524 | if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, | 529 | if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, |
525 | EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { | 530 | EXTENT_DELALLOC, 0, NULL)) { |
526 | schedule_timeout(1); | 531 | schedule_timeout(1); |
527 | goto again; | 532 | goto again; |
528 | } | 533 | } |
@@ -613,7 +618,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
613 | */ | 618 | */ |
614 | if (test_range_bit(io_tree, disk_i_size, | 619 | if (test_range_bit(io_tree, disk_i_size, |
615 | ordered->file_offset + ordered->len - 1, | 620 | ordered->file_offset + ordered->len - 1, |
616 | EXTENT_DELALLOC, 0)) { | 621 | EXTENT_DELALLOC, 0, NULL)) { |
617 | goto out; | 622 | goto out; |
618 | } | 623 | } |
619 | /* | 624 | /* |
@@ -664,7 +669,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, | |||
664 | */ | 669 | */ |
665 | if (i_size_test > entry_end(ordered) && | 670 | if (i_size_test > entry_end(ordered) && |
666 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, | 671 | !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, |
667 | EXTENT_DELALLOC, 0)) { | 672 | EXTENT_DELALLOC, 0, NULL)) { |
668 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); | 673 | new_i_size = min_t(u64, i_size_test, i_size_read(inode)); |
669 | } | 674 | } |
670 | BTRFS_I(inode)->disk_i_size = new_i_size; | 675 | BTRFS_I(inode)->disk_i_size = new_i_size; |
@@ -715,90 +720,6 @@ out: | |||
715 | } | 720 | } |
716 | 721 | ||
717 | 722 | ||
718 | /** | ||
719 | * taken from mm/filemap.c because it isn't exported | ||
720 | * | ||
721 | * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range | ||
722 | * @mapping: address space structure to write | ||
723 | * @start: offset in bytes where the range starts | ||
724 | * @end: offset in bytes where the range ends (inclusive) | ||
725 | * @sync_mode: enable synchronous operation | ||
726 | * | ||
727 | * Start writeback against all of a mapping's dirty pages that lie | ||
728 | * within the byte offsets <start, end> inclusive. | ||
729 | * | ||
730 | * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as | ||
731 | * opposed to a regular memory cleansing writeback. The difference between | ||
732 | * these two operations is that if a dirty page/buffer is encountered, it must | ||
733 | * be waited upon, and not just skipped over. | ||
734 | */ | ||
735 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | ||
736 | loff_t end, int sync_mode) | ||
737 | { | ||
738 | struct writeback_control wbc = { | ||
739 | .sync_mode = sync_mode, | ||
740 | .nr_to_write = mapping->nrpages * 2, | ||
741 | .range_start = start, | ||
742 | .range_end = end, | ||
743 | .for_writepages = 1, | ||
744 | }; | ||
745 | return btrfs_writepages(mapping, &wbc); | ||
746 | } | ||
747 | |||
748 | /** | ||
749 | * taken from mm/filemap.c because it isn't exported | ||
750 | * | ||
751 | * wait_on_page_writeback_range - wait for writeback to complete | ||
752 | * @mapping: target address_space | ||
753 | * @start: beginning page index | ||
754 | * @end: ending page index | ||
755 | * | ||
756 | * Wait for writeback to complete against pages indexed by start->end | ||
757 | * inclusive | ||
758 | */ | ||
759 | int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | ||
760 | pgoff_t start, pgoff_t end) | ||
761 | { | ||
762 | struct pagevec pvec; | ||
763 | int nr_pages; | ||
764 | int ret = 0; | ||
765 | pgoff_t index; | ||
766 | |||
767 | if (end < start) | ||
768 | return 0; | ||
769 | |||
770 | pagevec_init(&pvec, 0); | ||
771 | index = start; | ||
772 | while ((index <= end) && | ||
773 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
774 | PAGECACHE_TAG_WRITEBACK, | ||
775 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) { | ||
776 | unsigned i; | ||
777 | |||
778 | for (i = 0; i < nr_pages; i++) { | ||
779 | struct page *page = pvec.pages[i]; | ||
780 | |||
781 | /* until radix tree lookup accepts end_index */ | ||
782 | if (page->index > end) | ||
783 | continue; | ||
784 | |||
785 | wait_on_page_writeback(page); | ||
786 | if (PageError(page)) | ||
787 | ret = -EIO; | ||
788 | } | ||
789 | pagevec_release(&pvec); | ||
790 | cond_resched(); | ||
791 | } | ||
792 | |||
793 | /* Check for outstanding write errors */ | ||
794 | if (test_and_clear_bit(AS_ENOSPC, &mapping->flags)) | ||
795 | ret = -ENOSPC; | ||
796 | if (test_and_clear_bit(AS_EIO, &mapping->flags)) | ||
797 | ret = -EIO; | ||
798 | |||
799 | return ret; | ||
800 | } | ||
801 | |||
802 | /* | 723 | /* |
803 | * add a given inode to the list of inodes that must be fully on | 724 | * add a given inode to the list of inodes that must be fully on |
804 | * disk before a transaction commit finishes. | 725 | * disk before a transaction commit finishes. |