aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c127
1 files changed, 21 insertions, 106 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index d6f0806c682f..897fba835f89 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -159,8 +159,6 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
159 * 159 *
160 * len is the length of the extent 160 * len is the length of the extent
161 * 161 *
162 * This also sets the EXTENT_ORDERED bit on the range in the inode.
163 *
164 * The tree is given a single reference on the ordered extent that was 162 * The tree is given a single reference on the ordered extent that was
165 * inserted. 163 * inserted.
166 */ 164 */
@@ -181,6 +179,7 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
181 entry->start = start; 179 entry->start = start;
182 entry->len = len; 180 entry->len = len;
183 entry->disk_len = disk_len; 181 entry->disk_len = disk_len;
182 entry->bytes_left = len;
184 entry->inode = inode; 183 entry->inode = inode;
185 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 184 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
186 set_bit(type, &entry->flags); 185 set_bit(type, &entry->flags);
@@ -195,9 +194,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
195 &entry->rb_node); 194 &entry->rb_node);
196 BUG_ON(node); 195 BUG_ON(node);
197 196
198 set_extent_ordered(&BTRFS_I(inode)->io_tree, file_offset,
199 entry_end(entry) - 1, GFP_NOFS);
200
201 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
202 list_add_tail(&entry->root_extent_list, 198 list_add_tail(&entry->root_extent_list,
203 &BTRFS_I(inode)->root->fs_info->ordered_extents); 199 &BTRFS_I(inode)->root->fs_info->ordered_extents);
@@ -241,13 +237,10 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
241 struct btrfs_ordered_inode_tree *tree; 237 struct btrfs_ordered_inode_tree *tree;
242 struct rb_node *node; 238 struct rb_node *node;
243 struct btrfs_ordered_extent *entry; 239 struct btrfs_ordered_extent *entry;
244 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
245 int ret; 240 int ret;
246 241
247 tree = &BTRFS_I(inode)->ordered_tree; 242 tree = &BTRFS_I(inode)->ordered_tree;
248 mutex_lock(&tree->mutex); 243 mutex_lock(&tree->mutex);
249 clear_extent_ordered(io_tree, file_offset, file_offset + io_size - 1,
250 GFP_NOFS);
251 node = tree_search(tree, file_offset); 244 node = tree_search(tree, file_offset);
252 if (!node) { 245 if (!node) {
253 ret = 1; 246 ret = 1;
@@ -260,11 +253,16 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
260 goto out; 253 goto out;
261 } 254 }
262 255
263 ret = test_range_bit(io_tree, entry->file_offset, 256 if (io_size > entry->bytes_left) {
264 entry->file_offset + entry->len - 1, 257 printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
265 EXTENT_ORDERED, 0); 258 (unsigned long long)entry->bytes_left,
266 if (ret == 0) 259 (unsigned long long)io_size);
260 }
261 entry->bytes_left -= io_size;
262 if (entry->bytes_left == 0)
267 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 263 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
264 else
265 ret = 1;
268out: 266out:
269 mutex_unlock(&tree->mutex); 267 mutex_unlock(&tree->mutex);
270 return ret == 0; 268 return ret == 0;
@@ -460,7 +458,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
460 * start IO on any dirty ones so the wait doesn't stall waiting 458 * start IO on any dirty ones so the wait doesn't stall waiting
461 * for pdflush to find them 459 * for pdflush to find them
462 */ 460 */
463 btrfs_fdatawrite_range(inode->i_mapping, start, end, WB_SYNC_ALL); 461 filemap_fdatawrite_range(inode->i_mapping, start, end);
464 if (wait) { 462 if (wait) {
465 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, 463 wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE,
466 &entry->flags)); 464 &entry->flags));
@@ -476,6 +474,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
476 u64 orig_end; 474 u64 orig_end;
477 u64 wait_end; 475 u64 wait_end;
478 struct btrfs_ordered_extent *ordered; 476 struct btrfs_ordered_extent *ordered;
477 int found;
479 478
480 if (start + len < start) { 479 if (start + len < start) {
481 orig_end = INT_LIMIT(loff_t); 480 orig_end = INT_LIMIT(loff_t);
@@ -489,19 +488,18 @@ again:
489 /* start IO across the range first to instantiate any delalloc 488 /* start IO across the range first to instantiate any delalloc
490 * extents 489 * extents
491 */ 490 */
492 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); 491 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
493 492
494 /* The compression code will leave pages locked but return from 493 /* The compression code will leave pages locked but return from
495 * writepage without setting the page writeback. Starting again 494 * writepage without setting the page writeback. Starting again
496 * with WB_SYNC_ALL will end up waiting for the IO to actually start. 495 * with WB_SYNC_ALL will end up waiting for the IO to actually start.
497 */ 496 */
498 btrfs_fdatawrite_range(inode->i_mapping, start, orig_end, WB_SYNC_ALL); 497 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
499 498
500 btrfs_wait_on_page_writeback_range(inode->i_mapping, 499 filemap_fdatawait_range(inode->i_mapping, start, orig_end);
501 start >> PAGE_CACHE_SHIFT,
502 orig_end >> PAGE_CACHE_SHIFT);
503 500
504 end = orig_end; 501 end = orig_end;
502 found = 0;
505 while (1) { 503 while (1) {
506 ordered = btrfs_lookup_first_ordered_extent(inode, end); 504 ordered = btrfs_lookup_first_ordered_extent(inode, end);
507 if (!ordered) 505 if (!ordered)
@@ -514,6 +512,7 @@ again:
514 btrfs_put_ordered_extent(ordered); 512 btrfs_put_ordered_extent(ordered);
515 break; 513 break;
516 } 514 }
515 found++;
517 btrfs_start_ordered_extent(inode, ordered, 1); 516 btrfs_start_ordered_extent(inode, ordered, 1);
518 end = ordered->file_offset; 517 end = ordered->file_offset;
519 btrfs_put_ordered_extent(ordered); 518 btrfs_put_ordered_extent(ordered);
@@ -521,8 +520,8 @@ again:
521 break; 520 break;
522 end--; 521 end--;
523 } 522 }
524 if (test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end, 523 if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
525 EXTENT_ORDERED | EXTENT_DELALLOC, 0)) { 524 EXTENT_DELALLOC, 0, NULL)) {
526 schedule_timeout(1); 525 schedule_timeout(1);
527 goto again; 526 goto again;
528 } 527 }
@@ -613,7 +612,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
613 */ 612 */
614 if (test_range_bit(io_tree, disk_i_size, 613 if (test_range_bit(io_tree, disk_i_size,
615 ordered->file_offset + ordered->len - 1, 614 ordered->file_offset + ordered->len - 1,
616 EXTENT_DELALLOC, 0)) { 615 EXTENT_DELALLOC, 0, NULL)) {
617 goto out; 616 goto out;
618 } 617 }
619 /* 618 /*
@@ -664,7 +663,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
664 */ 663 */
665 if (i_size_test > entry_end(ordered) && 664 if (i_size_test > entry_end(ordered) &&
666 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 665 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
667 EXTENT_DELALLOC, 0)) { 666 EXTENT_DELALLOC, 0, NULL)) {
668 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 667 new_i_size = min_t(u64, i_size_test, i_size_read(inode));
669 } 668 }
670 BTRFS_I(inode)->disk_i_size = new_i_size; 669 BTRFS_I(inode)->disk_i_size = new_i_size;
@@ -715,90 +714,6 @@ out:
715} 714}
716 715
717 716
718/**
719 * taken from mm/filemap.c because it isn't exported
720 *
721 * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
722 * @mapping: address space structure to write
723 * @start: offset in bytes where the range starts
724 * @end: offset in bytes where the range ends (inclusive)
725 * @sync_mode: enable synchronous operation
726 *
727 * Start writeback against all of a mapping's dirty pages that lie
728 * within the byte offsets <start, end> inclusive.
729 *
730 * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
731 * opposed to a regular memory cleansing writeback. The difference between
732 * these two operations is that if a dirty page/buffer is encountered, it must
733 * be waited upon, and not just skipped over.
734 */
735int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start,
736 loff_t end, int sync_mode)
737{
738 struct writeback_control wbc = {
739 .sync_mode = sync_mode,
740 .nr_to_write = mapping->nrpages * 2,
741 .range_start = start,
742 .range_end = end,
743 .for_writepages = 1,
744 };
745 return btrfs_writepages(mapping, &wbc);
746}
747
748/**
749 * taken from mm/filemap.c because it isn't exported
750 *
751 * wait_on_page_writeback_range - wait for writeback to complete
752 * @mapping: target address_space
753 * @start: beginning page index
754 * @end: ending page index
755 *
756 * Wait for writeback to complete against pages indexed by start->end
757 * inclusive
758 */
759int btrfs_wait_on_page_writeback_range(struct address_space *mapping,
760 pgoff_t start, pgoff_t end)
761{
762 struct pagevec pvec;
763 int nr_pages;
764 int ret = 0;
765 pgoff_t index;
766
767 if (end < start)
768 return 0;
769
770 pagevec_init(&pvec, 0);
771 index = start;
772 while ((index <= end) &&
773 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
774 PAGECACHE_TAG_WRITEBACK,
775 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1)) != 0) {
776 unsigned i;
777
778 for (i = 0; i < nr_pages; i++) {
779 struct page *page = pvec.pages[i];
780
781 /* until radix tree lookup accepts end_index */
782 if (page->index > end)
783 continue;
784
785 wait_on_page_writeback(page);
786 if (PageError(page))
787 ret = -EIO;
788 }
789 pagevec_release(&pvec);
790 cond_resched();
791 }
792
793 /* Check for outstanding write errors */
794 if (test_and_clear_bit(AS_ENOSPC, &mapping->flags))
795 ret = -ENOSPC;
796 if (test_and_clear_bit(AS_EIO, &mapping->flags))
797 ret = -EIO;
798
799 return ret;
800}
801
802/* 717/*
803 * add a given inode to the list of inodes that must be fully on 718 * add a given inode to the list of inodes that must be fully on
804 * disk before a transaction commit finishes. 719 * disk before a transaction commit finishes.