aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c161
1 files changed, 115 insertions, 46 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a309..a127c0ebb2dc 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -16,7 +16,6 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/gfp.h>
20#include <linux/slab.h> 19#include <linux/slab.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/writeback.h> 21#include <linux/writeback.h>
@@ -174,7 +173,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
174 if (!entry) 173 if (!entry)
175 return -ENOMEM; 174 return -ENOMEM;
176 175
177 mutex_lock(&tree->mutex);
178 entry->file_offset = file_offset; 176 entry->file_offset = file_offset;
179 entry->start = start; 177 entry->start = start;
180 entry->len = len; 178 entry->len = len;
@@ -190,16 +188,17 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
190 INIT_LIST_HEAD(&entry->list); 188 INIT_LIST_HEAD(&entry->list);
191 INIT_LIST_HEAD(&entry->root_extent_list); 189 INIT_LIST_HEAD(&entry->root_extent_list);
192 190
191 spin_lock(&tree->lock);
193 node = tree_insert(&tree->tree, file_offset, 192 node = tree_insert(&tree->tree, file_offset,
194 &entry->rb_node); 193 &entry->rb_node);
195 BUG_ON(node); 194 BUG_ON(node);
195 spin_unlock(&tree->lock);
196 196
197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
198 list_add_tail(&entry->root_extent_list, 198 list_add_tail(&entry->root_extent_list,
199 &BTRFS_I(inode)->root->fs_info->ordered_extents); 199 &BTRFS_I(inode)->root->fs_info->ordered_extents);
200 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 200 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
201 201
202 mutex_unlock(&tree->mutex);
203 BUG_ON(node); 202 BUG_ON(node);
204 return 0; 203 return 0;
205} 204}
@@ -216,9 +215,9 @@ int btrfs_add_ordered_sum(struct inode *inode,
216 struct btrfs_ordered_inode_tree *tree; 215 struct btrfs_ordered_inode_tree *tree;
217 216
218 tree = &BTRFS_I(inode)->ordered_tree; 217 tree = &BTRFS_I(inode)->ordered_tree;
219 mutex_lock(&tree->mutex); 218 spin_lock(&tree->lock);
220 list_add_tail(&sum->list, &entry->list); 219 list_add_tail(&sum->list, &entry->list);
221 mutex_unlock(&tree->mutex); 220 spin_unlock(&tree->lock);
222 return 0; 221 return 0;
223} 222}
224 223
@@ -232,15 +231,16 @@ int btrfs_add_ordered_sum(struct inode *inode,
232 * to make sure this function only returns 1 once for a given ordered extent. 231 * to make sure this function only returns 1 once for a given ordered extent.
233 */ 232 */
234int btrfs_dec_test_ordered_pending(struct inode *inode, 233int btrfs_dec_test_ordered_pending(struct inode *inode,
234 struct btrfs_ordered_extent **cached,
235 u64 file_offset, u64 io_size) 235 u64 file_offset, u64 io_size)
236{ 236{
237 struct btrfs_ordered_inode_tree *tree; 237 struct btrfs_ordered_inode_tree *tree;
238 struct rb_node *node; 238 struct rb_node *node;
239 struct btrfs_ordered_extent *entry; 239 struct btrfs_ordered_extent *entry = NULL;
240 int ret; 240 int ret;
241 241
242 tree = &BTRFS_I(inode)->ordered_tree; 242 tree = &BTRFS_I(inode)->ordered_tree;
243 mutex_lock(&tree->mutex); 243 spin_lock(&tree->lock);
244 node = tree_search(tree, file_offset); 244 node = tree_search(tree, file_offset);
245 if (!node) { 245 if (!node) {
246 ret = 1; 246 ret = 1;
@@ -264,7 +264,11 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
264 else 264 else
265 ret = 1; 265 ret = 1;
266out: 266out:
267 mutex_unlock(&tree->mutex); 267 if (!ret && cached && entry) {
268 *cached = entry;
269 atomic_inc(&entry->refs);
270 }
271 spin_unlock(&tree->lock);
268 return ret == 0; 272 return ret == 0;
269} 273}
270 274
@@ -291,28 +295,30 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 295
292/* 296/*
293 * remove an ordered extent from the tree. No references are dropped 297 * remove an ordered extent from the tree. No references are dropped
294 * but, anyone waiting on this extent is woken up. 298 * and you must wake_up entry->wait. You must hold the tree lock
299 * while you call this function.
295 */ 300 */
296int btrfs_remove_ordered_extent(struct inode *inode, 301static int __btrfs_remove_ordered_extent(struct inode *inode,
297 struct btrfs_ordered_extent *entry) 302 struct btrfs_ordered_extent *entry)
298{ 303{
299 struct btrfs_ordered_inode_tree *tree; 304 struct btrfs_ordered_inode_tree *tree;
305 struct btrfs_root *root = BTRFS_I(inode)->root;
300 struct rb_node *node; 306 struct rb_node *node;
301 307
302 tree = &BTRFS_I(inode)->ordered_tree; 308 tree = &BTRFS_I(inode)->ordered_tree;
303 mutex_lock(&tree->mutex);
304 node = &entry->rb_node; 309 node = &entry->rb_node;
305 rb_erase(node, &tree->tree); 310 rb_erase(node, &tree->tree);
306 tree->last = NULL; 311 tree->last = NULL;
307 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 312 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
308 313
309 spin_lock(&BTRFS_I(inode)->accounting_lock); 314 spin_lock(&BTRFS_I(inode)->accounting_lock);
315 WARN_ON(!BTRFS_I(inode)->outstanding_extents);
310 BTRFS_I(inode)->outstanding_extents--; 316 BTRFS_I(inode)->outstanding_extents--;
311 spin_unlock(&BTRFS_I(inode)->accounting_lock); 317 spin_unlock(&BTRFS_I(inode)->accounting_lock);
312 btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, 318 btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
313 inode, 1); 319 inode, 1);
314 320
315 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 321 spin_lock(&root->fs_info->ordered_extent_lock);
316 list_del_init(&entry->root_extent_list); 322 list_del_init(&entry->root_extent_list);
317 323
318 /* 324 /*
@@ -324,18 +330,36 @@ int btrfs_remove_ordered_extent(struct inode *inode,
324 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { 330 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
325 list_del_init(&BTRFS_I(inode)->ordered_operations); 331 list_del_init(&BTRFS_I(inode)->ordered_operations);
326 } 332 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 333 spin_unlock(&root->fs_info->ordered_extent_lock);
328 334
329 mutex_unlock(&tree->mutex);
330 wake_up(&entry->wait);
331 return 0; 335 return 0;
332} 336}
333 337
334/* 338/*
339 * remove an ordered extent from the tree. No references are dropped
340 * but any waiters are woken.
341 */
342int btrfs_remove_ordered_extent(struct inode *inode,
343 struct btrfs_ordered_extent *entry)
344{
345 struct btrfs_ordered_inode_tree *tree;
346 int ret;
347
348 tree = &BTRFS_I(inode)->ordered_tree;
349 spin_lock(&tree->lock);
350 ret = __btrfs_remove_ordered_extent(inode, entry);
351 spin_unlock(&tree->lock);
352 wake_up(&entry->wait);
353
354 return ret;
355}
356
357/*
335 * wait for all the ordered extents in a root. This is done when balancing 358 * wait for all the ordered extents in a root. This is done when balancing
336 * space between drives. 359 * space between drives.
337 */ 360 */
338int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) 361int btrfs_wait_ordered_extents(struct btrfs_root *root,
362 int nocow_only, int delay_iput)
339{ 363{
340 struct list_head splice; 364 struct list_head splice;
341 struct list_head *cur; 365 struct list_head *cur;
@@ -372,7 +396,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
372 if (inode) { 396 if (inode) {
373 btrfs_start_ordered_extent(inode, ordered, 1); 397 btrfs_start_ordered_extent(inode, ordered, 1);
374 btrfs_put_ordered_extent(ordered); 398 btrfs_put_ordered_extent(ordered);
375 iput(inode); 399 if (delay_iput)
400 btrfs_add_delayed_iput(inode);
401 else
402 iput(inode);
376 } else { 403 } else {
377 btrfs_put_ordered_extent(ordered); 404 btrfs_put_ordered_extent(ordered);
378 } 405 }
@@ -430,7 +457,7 @@ again:
430 btrfs_wait_ordered_range(inode, 0, (u64)-1); 457 btrfs_wait_ordered_range(inode, 0, (u64)-1);
431 else 458 else
432 filemap_flush(inode->i_mapping); 459 filemap_flush(inode->i_mapping);
433 iput(inode); 460 btrfs_add_delayed_iput(inode);
434 } 461 }
435 462
436 cond_resched(); 463 cond_resched();
@@ -546,7 +573,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
546 struct btrfs_ordered_extent *entry = NULL; 573 struct btrfs_ordered_extent *entry = NULL;
547 574
548 tree = &BTRFS_I(inode)->ordered_tree; 575 tree = &BTRFS_I(inode)->ordered_tree;
549 mutex_lock(&tree->mutex); 576 spin_lock(&tree->lock);
550 node = tree_search(tree, file_offset); 577 node = tree_search(tree, file_offset);
551 if (!node) 578 if (!node)
552 goto out; 579 goto out;
@@ -557,7 +584,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
557 if (entry) 584 if (entry)
558 atomic_inc(&entry->refs); 585 atomic_inc(&entry->refs);
559out: 586out:
560 mutex_unlock(&tree->mutex); 587 spin_unlock(&tree->lock);
561 return entry; 588 return entry;
562} 589}
563 590
@@ -573,7 +600,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
573 struct btrfs_ordered_extent *entry = NULL; 600 struct btrfs_ordered_extent *entry = NULL;
574 601
575 tree = &BTRFS_I(inode)->ordered_tree; 602 tree = &BTRFS_I(inode)->ordered_tree;
576 mutex_lock(&tree->mutex); 603 spin_lock(&tree->lock);
577 node = tree_search(tree, file_offset); 604 node = tree_search(tree, file_offset);
578 if (!node) 605 if (!node)
579 goto out; 606 goto out;
@@ -581,7 +608,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
581 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 608 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
582 atomic_inc(&entry->refs); 609 atomic_inc(&entry->refs);
583out: 610out:
584 mutex_unlock(&tree->mutex); 611 spin_unlock(&tree->lock);
585 return entry; 612 return entry;
586} 613}
587 614
@@ -589,7 +616,7 @@ out:
589 * After an extent is done, call this to conditionally update the on disk 616 * After an extent is done, call this to conditionally update the on disk
590 * i_size. i_size is updated to cover any fully written part of the file. 617 * i_size. i_size is updated to cover any fully written part of the file.
591 */ 618 */
592int btrfs_ordered_update_i_size(struct inode *inode, 619int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
593 struct btrfs_ordered_extent *ordered) 620 struct btrfs_ordered_extent *ordered)
594{ 621{
595 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree; 622 struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +624,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
597 u64 disk_i_size; 624 u64 disk_i_size;
598 u64 new_i_size; 625 u64 new_i_size;
599 u64 i_size_test; 626 u64 i_size_test;
627 u64 i_size = i_size_read(inode);
600 struct rb_node *node; 628 struct rb_node *node;
629 struct rb_node *prev = NULL;
601 struct btrfs_ordered_extent *test; 630 struct btrfs_ordered_extent *test;
631 int ret = 1;
602 632
603 mutex_lock(&tree->mutex); 633 if (ordered)
634 offset = entry_end(ordered);
635 else
636 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
637
638 spin_lock(&tree->lock);
604 disk_i_size = BTRFS_I(inode)->disk_i_size; 639 disk_i_size = BTRFS_I(inode)->disk_i_size;
605 640
641 /* truncate file */
642 if (disk_i_size > i_size) {
643 BTRFS_I(inode)->disk_i_size = i_size;
644 ret = 0;
645 goto out;
646 }
647
606 /* 648 /*
607 * if the disk i_size is already at the inode->i_size, or 649 * if the disk i_size is already at the inode->i_size, or
608 * this ordered extent is inside the disk i_size, we're done 650 * this ordered extent is inside the disk i_size, we're done
609 */ 651 */
610 if (disk_i_size >= inode->i_size || 652 if (disk_i_size == i_size || offset <= disk_i_size) {
611 ordered->file_offset + ordered->len <= disk_i_size) {
612 goto out; 653 goto out;
613 } 654 }
614 655
@@ -616,8 +657,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
616 * we can't update the disk_isize if there are delalloc bytes 657 * we can't update the disk_isize if there are delalloc bytes
617 * between disk_i_size and this ordered extent 658 * between disk_i_size and this ordered extent
618 */ 659 */
619 if (test_range_bit(io_tree, disk_i_size, 660 if (test_range_bit(io_tree, disk_i_size, offset - 1,
620 ordered->file_offset + ordered->len - 1,
621 EXTENT_DELALLOC, 0, NULL)) { 661 EXTENT_DELALLOC, 0, NULL)) {
622 goto out; 662 goto out;
623 } 663 }
@@ -626,20 +666,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
626 * if we find an ordered extent then we can't update disk i_size 666 * if we find an ordered extent then we can't update disk i_size
627 * yet 667 * yet
628 */ 668 */
629 node = &ordered->rb_node; 669 if (ordered) {
630 while (1) { 670 node = rb_prev(&ordered->rb_node);
631 node = rb_prev(node); 671 } else {
632 if (!node) 672 prev = tree_search(tree, offset);
633 break; 673 /*
674 * we insert file extents without involving ordered struct,
675 * so there should be no ordered struct cover this offset
676 */
677 if (prev) {
678 test = rb_entry(prev, struct btrfs_ordered_extent,
679 rb_node);
680 BUG_ON(offset_in_entry(test, offset));
681 }
682 node = prev;
683 }
684 while (node) {
634 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 685 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
635 if (test->file_offset + test->len <= disk_i_size) 686 if (test->file_offset + test->len <= disk_i_size)
636 break; 687 break;
637 if (test->file_offset >= inode->i_size) 688 if (test->file_offset >= i_size)
638 break; 689 break;
639 if (test->file_offset >= disk_i_size) 690 if (test->file_offset >= disk_i_size)
640 goto out; 691 goto out;
692 node = rb_prev(node);
641 } 693 }
642 new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode)); 694 new_i_size = min_t(u64, offset, i_size);
643 695
644 /* 696 /*
645 * at this point, we know we can safely update i_size to at least 697 * at this point, we know we can safely update i_size to at least
@@ -647,7 +699,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
647 * walk forward and see if ios from higher up in the file have 699 * walk forward and see if ios from higher up in the file have
648 * finished. 700 * finished.
649 */ 701 */
650 node = rb_next(&ordered->rb_node); 702 if (ordered) {
703 node = rb_next(&ordered->rb_node);
704 } else {
705 if (prev)
706 node = rb_next(prev);
707 else
708 node = rb_first(&tree->tree);
709 }
651 i_size_test = 0; 710 i_size_test = 0;
652 if (node) { 711 if (node) {
653 /* 712 /*
@@ -655,10 +714,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
655 * between our ordered extent and the next one. 714 * between our ordered extent and the next one.
656 */ 715 */
657 test = rb_entry(node, struct btrfs_ordered_extent, rb_node); 716 test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
658 if (test->file_offset > entry_end(ordered)) 717 if (test->file_offset > offset)
659 i_size_test = test->file_offset; 718 i_size_test = test->file_offset;
660 } else { 719 } else {
661 i_size_test = i_size_read(inode); 720 i_size_test = i_size;
662 } 721 }
663 722
664 /* 723 /*
@@ -667,15 +726,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
667 * are no delalloc bytes in this area, it is safe to update 726 * are no delalloc bytes in this area, it is safe to update
668 * disk_i_size to the end of the region. 727 * disk_i_size to the end of the region.
669 */ 728 */
670 if (i_size_test > entry_end(ordered) && 729 if (i_size_test > offset &&
671 !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1, 730 !test_range_bit(io_tree, offset, i_size_test - 1,
672 EXTENT_DELALLOC, 0, NULL)) { 731 EXTENT_DELALLOC, 0, NULL)) {
673 new_i_size = min_t(u64, i_size_test, i_size_read(inode)); 732 new_i_size = min_t(u64, i_size_test, i_size);
674 } 733 }
675 BTRFS_I(inode)->disk_i_size = new_i_size; 734 BTRFS_I(inode)->disk_i_size = new_i_size;
735 ret = 0;
676out: 736out:
677 mutex_unlock(&tree->mutex); 737 /*
678 return 0; 738 * we need to remove the ordered extent with the tree lock held
739 * so that other people calling this function don't find our fully
740 * processed ordered entry and skip updating the i_size
741 */
742 if (ordered)
743 __btrfs_remove_ordered_extent(inode, ordered);
744 spin_unlock(&tree->lock);
745 if (ordered)
746 wake_up(&ordered->wait);
747 return ret;
679} 748}
680 749
681/* 750/*
@@ -699,7 +768,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
699 if (!ordered) 768 if (!ordered)
700 return 1; 769 return 1;
701 770
702 mutex_lock(&tree->mutex); 771 spin_lock(&tree->lock);
703 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 772 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
704 if (disk_bytenr >= ordered_sum->bytenr) { 773 if (disk_bytenr >= ordered_sum->bytenr) {
705 num_sectors = ordered_sum->len / sectorsize; 774 num_sectors = ordered_sum->len / sectorsize;
@@ -714,7 +783,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
714 } 783 }
715 } 784 }
716out: 785out:
717 mutex_unlock(&tree->mutex); 786 spin_unlock(&tree->lock);
718 btrfs_put_ordered_extent(ordered); 787 btrfs_put_ordered_extent(ordered);
719 return ret; 788 return ret;
720} 789}