aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ordered-data.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:41:54 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-02 19:41:54 -0500
commitb695188dd39162a1a6bff11fdbcc4c0b65b933ab (patch)
treea3df7c052d38b5bfaf335fbf3130abcc5c6ca577 /fs/btrfs/ordered-data.c
parent48476df99894492a0f7239f2f3c9a2dde4ff38e2 (diff)
parent180e001cd5fc2950dc6a7997dde5b65c954d0e79 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs update from Chris Mason: "The biggest feature in the pull is the new (and still experimental) raid56 code that David Woodhouse started long ago. I'm still working on the parity logging setup that will avoid inconsistent parity after a crash, so this is only for testing right now. But, I'd really like to get it out to a broader audience to hammer out any performance issues or other problems. scrub does not yet correct errors on raid5/6 either. Josef has another pass at fsync performance. The big change here is to combine waiting for metadata with waiting for data, which is a big latency win. It is also step one toward using atomics from the hardware during a commit. Mark Fasheh has a new way to use btrfs send/receive to send only the metadata changes. SUSE is using this to make snapper more efficient at finding changes between snapshosts. Snapshot-aware defrag is also included. Otherwise we have a large number of fixes and cleanups. Eric Sandeen wins the award for removing the most lines, and I'm hoping we steal this idea from XFS over and over again." * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (118 commits) btrfs: fixup/remove module.h usage as required Btrfs: delete inline extents when we find them during logging btrfs: try harder to allocate raid56 stripe cache Btrfs: cleanup to make the function btrfs_delalloc_reserve_metadata more logic Btrfs: don't call btrfs_qgroup_free if just btrfs_qgroup_reserve fails Btrfs: remove reduplicate check about root in the function btrfs_clean_quota_tree Btrfs: return ENOMEM rather than use BUG_ON when btrfs_alloc_path fails Btrfs: fix missing deleted items in btrfs_clean_quota_tree btrfs: use only inline_pages from extent buffer Btrfs: fix wrong reserved space when deleting a snapshot/subvolume Btrfs: fix wrong reserved space in qgroup during snap/subv creation Btrfs: remove unnecessary dget_parent/dput when creating the pending snapshot btrfs: remove a printk from scan_one_device Btrfs: fix NULL pointer after aborting a transaction Btrfs: fix memory leak of log roots Btrfs: copy everything if we've created an inline extent btrfs: cleanup for open-coded alignment Btrfs: do not change inode flags in rename Btrfs: use reserved space for creating a snapshot clear chunk_alloc flag on retryable failure ...
Diffstat (limited to 'fs/btrfs/ordered-data.c')
-rw-r--r--fs/btrfs/ordered-data.c98
1 files changed, 80 insertions, 18 deletions
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e5ed56729607..dc08d77b717e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -196,6 +196,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
196 entry->file_offset = file_offset; 196 entry->file_offset = file_offset;
197 entry->start = start; 197 entry->start = start;
198 entry->len = len; 198 entry->len = len;
199 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
200 !(type == BTRFS_ORDERED_NOCOW))
201 entry->csum_bytes_left = disk_len;
199 entry->disk_len = disk_len; 202 entry->disk_len = disk_len;
200 entry->bytes_left = len; 203 entry->bytes_left = len;
201 entry->inode = igrab(inode); 204 entry->inode = igrab(inode);
@@ -213,6 +216,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
213 INIT_LIST_HEAD(&entry->root_extent_list); 216 INIT_LIST_HEAD(&entry->root_extent_list);
214 INIT_LIST_HEAD(&entry->work_list); 217 INIT_LIST_HEAD(&entry->work_list);
215 init_completion(&entry->completion); 218 init_completion(&entry->completion);
219 INIT_LIST_HEAD(&entry->log_list);
216 220
217 trace_btrfs_ordered_extent_add(inode, entry); 221 trace_btrfs_ordered_extent_add(inode, entry);
218 222
@@ -270,6 +274,10 @@ void btrfs_add_ordered_sum(struct inode *inode,
270 tree = &BTRFS_I(inode)->ordered_tree; 274 tree = &BTRFS_I(inode)->ordered_tree;
271 spin_lock_irq(&tree->lock); 275 spin_lock_irq(&tree->lock);
272 list_add_tail(&sum->list, &entry->list); 276 list_add_tail(&sum->list, &entry->list);
277 WARN_ON(entry->csum_bytes_left < sum->len);
278 entry->csum_bytes_left -= sum->len;
279 if (entry->csum_bytes_left == 0)
280 wake_up(&entry->wait);
273 spin_unlock_irq(&tree->lock); 281 spin_unlock_irq(&tree->lock);
274} 282}
275 283
@@ -405,6 +413,66 @@ out:
405 return ret == 0; 413 return ret == 0;
406} 414}
407 415
416/* Needs to either be called under a log transaction or the log_mutex */
417void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode)
418{
419 struct btrfs_ordered_inode_tree *tree;
420 struct btrfs_ordered_extent *ordered;
421 struct rb_node *n;
422 int index = log->log_transid % 2;
423
424 tree = &BTRFS_I(inode)->ordered_tree;
425 spin_lock_irq(&tree->lock);
426 for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
427 ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
428 spin_lock(&log->log_extents_lock[index]);
429 if (list_empty(&ordered->log_list)) {
430 list_add_tail(&ordered->log_list, &log->logged_list[index]);
431 atomic_inc(&ordered->refs);
432 }
433 spin_unlock(&log->log_extents_lock[index]);
434 }
435 spin_unlock_irq(&tree->lock);
436}
437
438void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
439{
440 struct btrfs_ordered_extent *ordered;
441 int index = transid % 2;
442
443 spin_lock_irq(&log->log_extents_lock[index]);
444 while (!list_empty(&log->logged_list[index])) {
445 ordered = list_first_entry(&log->logged_list[index],
446 struct btrfs_ordered_extent,
447 log_list);
448 list_del_init(&ordered->log_list);
449 spin_unlock_irq(&log->log_extents_lock[index]);
450 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
451 &ordered->flags));
452 btrfs_put_ordered_extent(ordered);
453 spin_lock_irq(&log->log_extents_lock[index]);
454 }
455 spin_unlock_irq(&log->log_extents_lock[index]);
456}
457
458void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
459{
460 struct btrfs_ordered_extent *ordered;
461 int index = transid % 2;
462
463 spin_lock_irq(&log->log_extents_lock[index]);
464 while (!list_empty(&log->logged_list[index])) {
465 ordered = list_first_entry(&log->logged_list[index],
466 struct btrfs_ordered_extent,
467 log_list);
468 list_del_init(&ordered->log_list);
469 spin_unlock_irq(&log->log_extents_lock[index]);
470 btrfs_put_ordered_extent(ordered);
471 spin_lock_irq(&log->log_extents_lock[index]);
472 }
473 spin_unlock_irq(&log->log_extents_lock[index]);
474}
475
408/* 476/*
409 * used to drop a reference on an ordered extent. This will free 477 * used to drop a reference on an ordered extent. This will free
410 * the extent if the last reference is dropped 478 * the extent if the last reference is dropped
@@ -544,10 +612,12 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
544 * extra check to make sure the ordered operation list really is empty 612 * extra check to make sure the ordered operation list really is empty
545 * before we return 613 * before we return
546 */ 614 */
547int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) 615int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
616 struct btrfs_root *root, int wait)
548{ 617{
549 struct btrfs_inode *btrfs_inode; 618 struct btrfs_inode *btrfs_inode;
550 struct inode *inode; 619 struct inode *inode;
620 struct btrfs_transaction *cur_trans = trans->transaction;
551 struct list_head splice; 621 struct list_head splice;
552 struct list_head works; 622 struct list_head works;
553 struct btrfs_delalloc_work *work, *next; 623 struct btrfs_delalloc_work *work, *next;
@@ -558,14 +628,10 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
558 628
559 mutex_lock(&root->fs_info->ordered_operations_mutex); 629 mutex_lock(&root->fs_info->ordered_operations_mutex);
560 spin_lock(&root->fs_info->ordered_extent_lock); 630 spin_lock(&root->fs_info->ordered_extent_lock);
561again: 631 list_splice_init(&cur_trans->ordered_operations, &splice);
562 list_splice_init(&root->fs_info->ordered_operations, &splice);
563
564 while (!list_empty(&splice)) { 632 while (!list_empty(&splice)) {
565
566 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 633 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
567 ordered_operations); 634 ordered_operations);
568
569 inode = &btrfs_inode->vfs_inode; 635 inode = &btrfs_inode->vfs_inode;
570 636
571 list_del_init(&btrfs_inode->ordered_operations); 637 list_del_init(&btrfs_inode->ordered_operations);
@@ -574,24 +640,22 @@ again:
574 * the inode may be getting freed (in sys_unlink path). 640 * the inode may be getting freed (in sys_unlink path).
575 */ 641 */
576 inode = igrab(inode); 642 inode = igrab(inode);
577
578 if (!wait && inode) {
579 list_add_tail(&BTRFS_I(inode)->ordered_operations,
580 &root->fs_info->ordered_operations);
581 }
582
583 if (!inode) 643 if (!inode)
584 continue; 644 continue;
645
646 if (!wait)
647 list_add_tail(&BTRFS_I(inode)->ordered_operations,
648 &cur_trans->ordered_operations);
585 spin_unlock(&root->fs_info->ordered_extent_lock); 649 spin_unlock(&root->fs_info->ordered_extent_lock);
586 650
587 work = btrfs_alloc_delalloc_work(inode, wait, 1); 651 work = btrfs_alloc_delalloc_work(inode, wait, 1);
588 if (!work) { 652 if (!work) {
653 spin_lock(&root->fs_info->ordered_extent_lock);
589 if (list_empty(&BTRFS_I(inode)->ordered_operations)) 654 if (list_empty(&BTRFS_I(inode)->ordered_operations))
590 list_add_tail(&btrfs_inode->ordered_operations, 655 list_add_tail(&btrfs_inode->ordered_operations,
591 &splice); 656 &splice);
592 spin_lock(&root->fs_info->ordered_extent_lock);
593 list_splice_tail(&splice, 657 list_splice_tail(&splice,
594 &root->fs_info->ordered_operations); 658 &cur_trans->ordered_operations);
595 spin_unlock(&root->fs_info->ordered_extent_lock); 659 spin_unlock(&root->fs_info->ordered_extent_lock);
596 ret = -ENOMEM; 660 ret = -ENOMEM;
597 goto out; 661 goto out;
@@ -603,9 +667,6 @@ again:
603 cond_resched(); 667 cond_resched();
604 spin_lock(&root->fs_info->ordered_extent_lock); 668 spin_lock(&root->fs_info->ordered_extent_lock);
605 } 669 }
606 if (wait && !list_empty(&root->fs_info->ordered_operations))
607 goto again;
608
609 spin_unlock(&root->fs_info->ordered_extent_lock); 670 spin_unlock(&root->fs_info->ordered_extent_lock);
610out: 671out:
611 list_for_each_entry_safe(work, next, &works, list) { 672 list_for_each_entry_safe(work, next, &works, list) {
@@ -974,6 +1035,7 @@ out:
974void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 1035void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
975 struct btrfs_root *root, struct inode *inode) 1036 struct btrfs_root *root, struct inode *inode)
976{ 1037{
1038 struct btrfs_transaction *cur_trans = trans->transaction;
977 u64 last_mod; 1039 u64 last_mod;
978 1040
979 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); 1041 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
@@ -988,7 +1050,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
988 spin_lock(&root->fs_info->ordered_extent_lock); 1050 spin_lock(&root->fs_info->ordered_extent_lock);
989 if (list_empty(&BTRFS_I(inode)->ordered_operations)) { 1051 if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
990 list_add_tail(&BTRFS_I(inode)->ordered_operations, 1052 list_add_tail(&BTRFS_I(inode)->ordered_operations,
991 &root->fs_info->ordered_operations); 1053 &cur_trans->ordered_operations);
992 } 1054 }
993 spin_unlock(&root->fs_info->ordered_extent_lock); 1055 spin_unlock(&root->fs_info->ordered_extent_lock);
994} 1056}