aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c165
1 files changed, 77 insertions, 88 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index bbf6d0d9aebe..9e138cdc36c5 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -196,7 +196,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
196 entry->len = len; 196 entry->len = len;
197 entry->disk_len = disk_len; 197 entry->disk_len = disk_len;
198 entry->bytes_left = len; 198 entry->bytes_left = len;
199 entry->inode = inode; 199 entry->inode = igrab(inode);
200 entry->compress_type = compress_type; 200 entry->compress_type = compress_type;
201 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) 201 if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
202 set_bit(type, &entry->flags); 202 set_bit(type, &entry->flags);
@@ -212,12 +212,12 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
212 212
213 trace_btrfs_ordered_extent_add(inode, entry); 213 trace_btrfs_ordered_extent_add(inode, entry);
214 214
215 spin_lock(&tree->lock); 215 spin_lock_irq(&tree->lock);
216 node = tree_insert(&tree->tree, file_offset, 216 node = tree_insert(&tree->tree, file_offset,
217 &entry->rb_node); 217 &entry->rb_node);
218 if (node) 218 if (node)
219 ordered_data_tree_panic(inode, -EEXIST, file_offset); 219 ordered_data_tree_panic(inode, -EEXIST, file_offset);
220 spin_unlock(&tree->lock); 220 spin_unlock_irq(&tree->lock);
221 221
222 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 222 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
223 list_add_tail(&entry->root_extent_list, 223 list_add_tail(&entry->root_extent_list,
@@ -264,9 +264,9 @@ void btrfs_add_ordered_sum(struct inode *inode,
264 struct btrfs_ordered_inode_tree *tree; 264 struct btrfs_ordered_inode_tree *tree;
265 265
266 tree = &BTRFS_I(inode)->ordered_tree; 266 tree = &BTRFS_I(inode)->ordered_tree;
267 spin_lock(&tree->lock); 267 spin_lock_irq(&tree->lock);
268 list_add_tail(&sum->list, &entry->list); 268 list_add_tail(&sum->list, &entry->list);
269 spin_unlock(&tree->lock); 269 spin_unlock_irq(&tree->lock);
270} 270}
271 271
272/* 272/*
@@ -283,18 +283,19 @@ void btrfs_add_ordered_sum(struct inode *inode,
283 */ 283 */
284int btrfs_dec_test_first_ordered_pending(struct inode *inode, 284int btrfs_dec_test_first_ordered_pending(struct inode *inode,
285 struct btrfs_ordered_extent **cached, 285 struct btrfs_ordered_extent **cached,
286 u64 *file_offset, u64 io_size) 286 u64 *file_offset, u64 io_size, int uptodate)
287{ 287{
288 struct btrfs_ordered_inode_tree *tree; 288 struct btrfs_ordered_inode_tree *tree;
289 struct rb_node *node; 289 struct rb_node *node;
290 struct btrfs_ordered_extent *entry = NULL; 290 struct btrfs_ordered_extent *entry = NULL;
291 int ret; 291 int ret;
292 unsigned long flags;
292 u64 dec_end; 293 u64 dec_end;
293 u64 dec_start; 294 u64 dec_start;
294 u64 to_dec; 295 u64 to_dec;
295 296
296 tree = &BTRFS_I(inode)->ordered_tree; 297 tree = &BTRFS_I(inode)->ordered_tree;
297 spin_lock(&tree->lock); 298 spin_lock_irqsave(&tree->lock, flags);
298 node = tree_search(tree, *file_offset); 299 node = tree_search(tree, *file_offset);
299 if (!node) { 300 if (!node) {
300 ret = 1; 301 ret = 1;
@@ -323,6 +324,9 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
323 (unsigned long long)to_dec); 324 (unsigned long long)to_dec);
324 } 325 }
325 entry->bytes_left -= to_dec; 326 entry->bytes_left -= to_dec;
327 if (!uptodate)
328 set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
329
326 if (entry->bytes_left == 0) 330 if (entry->bytes_left == 0)
327 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 331 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
328 else 332 else
@@ -332,7 +336,7 @@ out:
332 *cached = entry; 336 *cached = entry;
333 atomic_inc(&entry->refs); 337 atomic_inc(&entry->refs);
334 } 338 }
335 spin_unlock(&tree->lock); 339 spin_unlock_irqrestore(&tree->lock, flags);
336 return ret == 0; 340 return ret == 0;
337} 341}
338 342
@@ -347,15 +351,21 @@ out:
347 */ 351 */
348int btrfs_dec_test_ordered_pending(struct inode *inode, 352int btrfs_dec_test_ordered_pending(struct inode *inode,
349 struct btrfs_ordered_extent **cached, 353 struct btrfs_ordered_extent **cached,
350 u64 file_offset, u64 io_size) 354 u64 file_offset, u64 io_size, int uptodate)
351{ 355{
352 struct btrfs_ordered_inode_tree *tree; 356 struct btrfs_ordered_inode_tree *tree;
353 struct rb_node *node; 357 struct rb_node *node;
354 struct btrfs_ordered_extent *entry = NULL; 358 struct btrfs_ordered_extent *entry = NULL;
359 unsigned long flags;
355 int ret; 360 int ret;
356 361
357 tree = &BTRFS_I(inode)->ordered_tree; 362 tree = &BTRFS_I(inode)->ordered_tree;
358 spin_lock(&tree->lock); 363 spin_lock_irqsave(&tree->lock, flags);
364 if (cached && *cached) {
365 entry = *cached;
366 goto have_entry;
367 }
368
359 node = tree_search(tree, file_offset); 369 node = tree_search(tree, file_offset);
360 if (!node) { 370 if (!node) {
361 ret = 1; 371 ret = 1;
@@ -363,6 +373,7 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
363 } 373 }
364 374
365 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 375 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
376have_entry:
366 if (!offset_in_entry(entry, file_offset)) { 377 if (!offset_in_entry(entry, file_offset)) {
367 ret = 1; 378 ret = 1;
368 goto out; 379 goto out;
@@ -374,6 +385,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
374 (unsigned long long)io_size); 385 (unsigned long long)io_size);
375 } 386 }
376 entry->bytes_left -= io_size; 387 entry->bytes_left -= io_size;
388 if (!uptodate)
389 set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
390
377 if (entry->bytes_left == 0) 391 if (entry->bytes_left == 0)
378 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); 392 ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
379 else 393 else
@@ -383,7 +397,7 @@ out:
383 *cached = entry; 397 *cached = entry;
384 atomic_inc(&entry->refs); 398 atomic_inc(&entry->refs);
385 } 399 }
386 spin_unlock(&tree->lock); 400 spin_unlock_irqrestore(&tree->lock, flags);
387 return ret == 0; 401 return ret == 0;
388} 402}
389 403
@@ -399,6 +413,8 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
399 trace_btrfs_ordered_extent_put(entry->inode, entry); 413 trace_btrfs_ordered_extent_put(entry->inode, entry);
400 414
401 if (atomic_dec_and_test(&entry->refs)) { 415 if (atomic_dec_and_test(&entry->refs)) {
416 if (entry->inode)
417 btrfs_add_delayed_iput(entry->inode);
402 while (!list_empty(&entry->list)) { 418 while (!list_empty(&entry->list)) {
403 cur = entry->list.next; 419 cur = entry->list.next;
404 sum = list_entry(cur, struct btrfs_ordered_sum, list); 420 sum = list_entry(cur, struct btrfs_ordered_sum, list);
@@ -411,21 +427,22 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
411 427
412/* 428/*
413 * remove an ordered extent from the tree. No references are dropped 429 * remove an ordered extent from the tree. No references are dropped
414 * and you must wake_up entry->wait. You must hold the tree lock 430 * and waiters are woken up.
415 * while you call this function.
416 */ 431 */
417static void __btrfs_remove_ordered_extent(struct inode *inode, 432void btrfs_remove_ordered_extent(struct inode *inode,
418 struct btrfs_ordered_extent *entry) 433 struct btrfs_ordered_extent *entry)
419{ 434{
420 struct btrfs_ordered_inode_tree *tree; 435 struct btrfs_ordered_inode_tree *tree;
421 struct btrfs_root *root = BTRFS_I(inode)->root; 436 struct btrfs_root *root = BTRFS_I(inode)->root;
422 struct rb_node *node; 437 struct rb_node *node;
423 438
424 tree = &BTRFS_I(inode)->ordered_tree; 439 tree = &BTRFS_I(inode)->ordered_tree;
440 spin_lock_irq(&tree->lock);
425 node = &entry->rb_node; 441 node = &entry->rb_node;
426 rb_erase(node, &tree->tree); 442 rb_erase(node, &tree->tree);
427 tree->last = NULL; 443 tree->last = NULL;
428 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 444 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
445 spin_unlock_irq(&tree->lock);
429 446
430 spin_lock(&root->fs_info->ordered_extent_lock); 447 spin_lock(&root->fs_info->ordered_extent_lock);
431 list_del_init(&entry->root_extent_list); 448 list_del_init(&entry->root_extent_list);
@@ -442,21 +459,6 @@ static void __btrfs_remove_ordered_extent(struct inode *inode,
442 list_del_init(&BTRFS_I(inode)->ordered_operations); 459 list_del_init(&BTRFS_I(inode)->ordered_operations);
443 } 460 }
444 spin_unlock(&root->fs_info->ordered_extent_lock); 461 spin_unlock(&root->fs_info->ordered_extent_lock);
445}
446
447/*
448 * remove an ordered extent from the tree. No references are dropped
449 * but any waiters are woken.
450 */
451void btrfs_remove_ordered_extent(struct inode *inode,
452 struct btrfs_ordered_extent *entry)
453{
454 struct btrfs_ordered_inode_tree *tree;
455
456 tree = &BTRFS_I(inode)->ordered_tree;
457 spin_lock(&tree->lock);
458 __btrfs_remove_ordered_extent(inode, entry);
459 spin_unlock(&tree->lock);
460 wake_up(&entry->wait); 462 wake_up(&entry->wait);
461} 463}
462 464
@@ -621,19 +623,11 @@ void btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
621 if (orig_end > INT_LIMIT(loff_t)) 623 if (orig_end > INT_LIMIT(loff_t))
622 orig_end = INT_LIMIT(loff_t); 624 orig_end = INT_LIMIT(loff_t);
623 } 625 }
624again: 626
625 /* start IO across the range first to instantiate any delalloc 627 /* start IO across the range first to instantiate any delalloc
626 * extents 628 * extents
627 */ 629 */
628 filemap_fdatawrite_range(inode->i_mapping, start, orig_end); 630 filemap_write_and_wait_range(inode->i_mapping, start, orig_end);
629
630 /* The compression code will leave pages locked but return from
631 * writepage without setting the page writeback. Starting again
632 * with WB_SYNC_ALL will end up waiting for the IO to actually start.
633 */
634 filemap_fdatawrite_range(inode->i_mapping, start, orig_end);
635
636 filemap_fdatawait_range(inode->i_mapping, start, orig_end);
637 631
638 end = orig_end; 632 end = orig_end;
639 found = 0; 633 found = 0;
@@ -657,11 +651,6 @@ again:
657 break; 651 break;
658 end--; 652 end--;
659 } 653 }
660 if (found || test_range_bit(&BTRFS_I(inode)->io_tree, start, orig_end,
661 EXTENT_DELALLOC, 0, NULL)) {
662 schedule_timeout(1);
663 goto again;
664 }
665} 654}
666 655
667/* 656/*
@@ -676,7 +665,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
676 struct btrfs_ordered_extent *entry = NULL; 665 struct btrfs_ordered_extent *entry = NULL;
677 666
678 tree = &BTRFS_I(inode)->ordered_tree; 667 tree = &BTRFS_I(inode)->ordered_tree;
679 spin_lock(&tree->lock); 668 spin_lock_irq(&tree->lock);
680 node = tree_search(tree, file_offset); 669 node = tree_search(tree, file_offset);
681 if (!node) 670 if (!node)
682 goto out; 671 goto out;
@@ -687,7 +676,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
687 if (entry) 676 if (entry)
688 atomic_inc(&entry->refs); 677 atomic_inc(&entry->refs);
689out: 678out:
690 spin_unlock(&tree->lock); 679 spin_unlock_irq(&tree->lock);
691 return entry; 680 return entry;
692} 681}
693 682
@@ -703,7 +692,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
703 struct btrfs_ordered_extent *entry = NULL; 692 struct btrfs_ordered_extent *entry = NULL;
704 693
705 tree = &BTRFS_I(inode)->ordered_tree; 694 tree = &BTRFS_I(inode)->ordered_tree;
706 spin_lock(&tree->lock); 695 spin_lock_irq(&tree->lock);
707 node = tree_search(tree, file_offset); 696 node = tree_search(tree, file_offset);
708 if (!node) { 697 if (!node) {
709 node = tree_search(tree, file_offset + len); 698 node = tree_search(tree, file_offset + len);
@@ -728,7 +717,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
728out: 717out:
729 if (entry) 718 if (entry)
730 atomic_inc(&entry->refs); 719 atomic_inc(&entry->refs);
731 spin_unlock(&tree->lock); 720 spin_unlock_irq(&tree->lock);
732 return entry; 721 return entry;
733} 722}
734 723
@@ -744,7 +733,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
744 struct btrfs_ordered_extent *entry = NULL; 733 struct btrfs_ordered_extent *entry = NULL;
745 734
746 tree = &BTRFS_I(inode)->ordered_tree; 735 tree = &BTRFS_I(inode)->ordered_tree;
747 spin_lock(&tree->lock); 736 spin_lock_irq(&tree->lock);
748 node = tree_search(tree, file_offset); 737 node = tree_search(tree, file_offset);
749 if (!node) 738 if (!node)
750 goto out; 739 goto out;
@@ -752,7 +741,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
752 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 741 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
753 atomic_inc(&entry->refs); 742 atomic_inc(&entry->refs);
754out: 743out:
755 spin_unlock(&tree->lock); 744 spin_unlock_irq(&tree->lock);
756 return entry; 745 return entry;
757} 746}
758 747
@@ -764,7 +753,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
764 struct btrfs_ordered_extent *ordered) 753 struct btrfs_ordered_extent *ordered)
765{ 754{
766 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 755 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
767 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
768 u64 disk_i_size; 756 u64 disk_i_size;
769 u64 new_i_size; 757 u64 new_i_size;
770 u64 i_size_test; 758 u64 i_size_test;
@@ -779,7 +767,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
779 else 767 else
780 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); 768 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
781 769
782 spin_lock(&tree->lock); 770 spin_lock_irq(&tree->lock);
783 disk_i_size = BTRFS_I(inode)->disk_i_size; 771 disk_i_size = BTRFS_I(inode)->disk_i_size;
784 772
785 /* truncate file */ 773 /* truncate file */
@@ -798,14 +786,6 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
798 } 786 }
799 787
800 /* 788 /*
801 * we can't update the disk_isize if there are delalloc bytes
802 * between disk_i_size and this ordered extent
803 */
804 if (test_range_bit(io_tree, disk_i_size, offset - 1,
805 EXTENT_DELALLOC, 0, NULL)) {
806 goto out;
807 }
808 /*
809 * walk backward from this ordered extent to disk_i_size. 789 * walk backward from this ordered extent to disk_i_size.
810 * if we find an ordered extent then we can't update disk i_size 790 * if we find an ordered extent then we can't update disk i_size
811 * yet 791 * yet
@@ -825,15 +805,18 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
825 } 805 }
826 node = prev; 806 node = prev;
827 } 807 }
828 while (node) { 808 for (; node; node = rb_prev(node)) {
829 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 809 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
810
811 /* We treat this entry as if it doesnt exist */
812 if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
813 continue;
830 if (test->file_offset + test->len <= disk_i_size) 814 if (test->file_offset + test->len <= disk_i_size)
831 break; 815 break;
832 if (test->file_offset >= i_size) 816 if (test->file_offset >= i_size)
833 break; 817 break;
834 if (test->file_offset >= disk_i_size) 818 if (test->file_offset >= disk_i_size)
835 goto out; 819 goto out;
836 node = rb_prev(node);
837 } 820 }
838 new_i_size = min_t(u64, offset, i_size); 821 new_i_size = min_t(u64, offset, i_size);
839 822
@@ -851,43 +834,49 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
851 else 834 else
852 node = rb_first(&tree->tree); 835 node = rb_first(&tree->tree);
853 } 836 }
854 i_size_test = 0; 837
855 if (node) { 838 /*
856 /* 839 * We are looking for an area between our current extent and the next
857 * do we have an area where IO might have finished 840 * ordered extent to update the i_size to. There are 3 cases here
858 * between our ordered extent and the next one. 841 *
859 */ 842 * 1) We don't actually have anything and we can update to i_size.
843 * 2) We have stuff but they already did their i_size update so again we
844 * can just update to i_size.
845 * 3) We have an outstanding ordered extent so the most we can update
846 * our disk_i_size to is the start of the next offset.
847 */
848 i_size_test = i_size;
849 for (; node; node = rb_next(node)) {
860 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 850 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
861 if (test->file_offset > offset) 851
852 if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
853 continue;
854 if (test->file_offset > offset) {
862 i_size_test = test->file_offset; 855 i_size_test = test->file_offset;
863 } else { 856 break;
864 i_size_test = i_size; 857 }
865 } 858 }
866 859
867 /* 860 /*
868 * i_size_test is the end of a region after this ordered 861 * i_size_test is the end of a region after this ordered
869 * extent where there are no ordered extents. As long as there 862 * extent where there are no ordered extents, we can safely set
870 * are no delalloc bytes in this area, it is safe to update 863 * disk_i_size to this.
871 * disk_i_size to the end of the region.
872 */ 864 */
873 if (i_size_test > offset && 865 if (i_size_test > offset)
874 !test_range_bit(io_tree, offset, i_size_test - 1,
875 EXTENT_DELALLOC, 0, NULL)) {
876 new_i_size = min_t(u64, i_size_test, i_size); 866 new_i_size = min_t(u64, i_size_test, i_size);
877 }
878 BTRFS_I(inode)->disk_i_size = new_i_size; 867 BTRFS_I(inode)->disk_i_size = new_i_size;
879 ret = 0; 868 ret = 0;
880out: 869out:
881 /* 870 /*
882 * we need to remove the ordered extent with the tree lock held 871 * We need to do this because we can't remove ordered extents until
883 * so that other people calling this function don't find our fully 872 * after the i_disk_size has been updated and then the inode has been
884 * processed ordered entry and skip updating the i_size 873 * updated to reflect the change, so we need to tell anybody who finds
874 * this ordered extent that we've already done all the real work, we
875 * just haven't completed all the other work.
885 */ 876 */
886 if (ordered) 877 if (ordered)
887 __btrfs_remove_ordered_extent(inode, ordered); 878 set_bit(BTRFS_ORDERED_UPDATED_ISIZE, &ordered->flags);
888 spin_unlock(&tree->lock); 879 spin_unlock_irq(&tree->lock);
889 if (ordered)
890 wake_up(&ordered->wait);
891 return ret; 880 return ret;
892} 881}
893 882
@@ -912,7 +901,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
912 if (!ordered) 901 if (!ordered)
913 return 1; 902 return 1;
914 903
915 spin_lock(&tree->lock); 904 spin_lock_irq(&tree->lock);
916 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 905 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
917 if (disk_bytenr >= ordered_sum->bytenr) { 906 if (disk_bytenr >= ordered_sum->bytenr) {
918 num_sectors = ordered_sum->len / sectorsize; 907 num_sectors = ordered_sum->len / sectorsize;
@@ -927,7 +916,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
927 } 916 }
928 } 917 }
929out: 918out:
930 spin_unlock(&tree->lock); 919 spin_unlock_irq(&tree->lock);
931 btrfs_put_ordered_extent(ordered); 920 btrfs_put_ordered_extent(ordered);
932 return ret; 921 return ret;
933} 922}