aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <jbacik@fb.com>2014-11-21 14:52:38 -0500
committerChris Mason <clm@fb.com>2014-11-21 14:58:32 -0500
commit50d9aa99bd35c77200e0e3dd7a72274f8304701f (patch)
tree346a80fdf2343a5c5e010b57b0213c989eed4ac8
parent9dba8cf128ef98257ca719722280c9634e7e9dc7 (diff)
Btrfs: make sure logged extents complete in the current transaction V3
Liu Bo pointed out that my previous fix would lose the generation update in the scenario I described. It is actually much worse than that, we could lose the entire extent if we lose power right after the transaction commits. Consider the following write extent 0-4k log extent in log tree commit transaction < power fail happens here ordered extent completes We would lose the 0-4k extent because it hasn't updated the actual fs tree, and the transaction commit will reset the log so it isn't replayed. If we lose power before the transaction commit we are save, otherwise we are not. Fix this by keeping track of all extents we logged in this transaction. Then when we go to commit the transaction make sure we wait for all of those ordered extents to complete before proceeding. This will make sure that if we lose power after the transaction commit we still have our data. This also fixes the problem of the improperly updated extent generation. Thanks, cc: stable@vger.kernel.org Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
-rw-r--r--fs/btrfs/disk-io.c20
-rw-r--r--fs/btrfs/ordered-data.c9
-rw-r--r--fs/btrfs/ordered-data.h8
-rw-r--r--fs/btrfs/transaction.c33
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c6
6 files changed, 72 insertions, 6 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7af9a1978a2f..6efaee8d7739 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4129,6 +4129,25 @@ again:
4129 return 0; 4129 return 0;
4130} 4130}
4131 4131
4132static void btrfs_free_pending_ordered(struct btrfs_transaction *cur_trans,
4133 struct btrfs_fs_info *fs_info)
4134{
4135 struct btrfs_ordered_extent *ordered;
4136
4137 spin_lock(&fs_info->trans_lock);
4138 while (!list_empty(&cur_trans->pending_ordered)) {
4139 ordered = list_first_entry(&cur_trans->pending_ordered,
4140 struct btrfs_ordered_extent,
4141 trans_list);
4142 list_del_init(&ordered->trans_list);
4143 spin_unlock(&fs_info->trans_lock);
4144
4145 btrfs_put_ordered_extent(ordered);
4146 spin_lock(&fs_info->trans_lock);
4147 }
4148 spin_unlock(&fs_info->trans_lock);
4149}
4150
4132void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, 4151void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4133 struct btrfs_root *root) 4152 struct btrfs_root *root)
4134{ 4153{
@@ -4140,6 +4159,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
4140 cur_trans->state = TRANS_STATE_UNBLOCKED; 4159 cur_trans->state = TRANS_STATE_UNBLOCKED;
4141 wake_up(&root->fs_info->transaction_wait); 4160 wake_up(&root->fs_info->transaction_wait);
4142 4161
4162 btrfs_free_pending_ordered(cur_trans, root->fs_info);
4143 btrfs_destroy_delayed_inodes(root); 4163 btrfs_destroy_delayed_inodes(root);
4144 btrfs_assert_delayed_root_empty(root); 4164 btrfs_assert_delayed_root_empty(root);
4145 4165
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 1401b1af4f06..9c28eb4da4dd 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -220,6 +220,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
220 INIT_LIST_HEAD(&entry->work_list); 220 INIT_LIST_HEAD(&entry->work_list);
221 init_completion(&entry->completion); 221 init_completion(&entry->completion);
222 INIT_LIST_HEAD(&entry->log_list); 222 INIT_LIST_HEAD(&entry->log_list);
223 INIT_LIST_HEAD(&entry->trans_list);
223 224
224 trace_btrfs_ordered_extent_add(inode, entry); 225 trace_btrfs_ordered_extent_add(inode, entry);
225 226
@@ -443,6 +444,8 @@ void btrfs_get_logged_extents(struct inode *inode,
443 ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); 444 ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
444 if (!list_empty(&ordered->log_list)) 445 if (!list_empty(&ordered->log_list))
445 continue; 446 continue;
447 if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
448 continue;
446 list_add_tail(&ordered->log_list, logged_list); 449 list_add_tail(&ordered->log_list, logged_list);
447 atomic_inc(&ordered->refs); 450 atomic_inc(&ordered->refs);
448 } 451 }
@@ -472,7 +475,8 @@ void btrfs_submit_logged_extents(struct list_head *logged_list,
472 spin_unlock_irq(&log->log_extents_lock[index]); 475 spin_unlock_irq(&log->log_extents_lock[index]);
473} 476}
474 477
475void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) 478void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
479 struct btrfs_root *log, u64 transid)
476{ 480{
477 struct btrfs_ordered_extent *ordered; 481 struct btrfs_ordered_extent *ordered;
478 int index = transid % 2; 482 int index = transid % 2;
@@ -497,7 +501,8 @@ void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
497 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, 501 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
498 &ordered->flags)); 502 &ordered->flags));
499 503
500 btrfs_put_ordered_extent(ordered); 504 if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
505 list_add_tail(&ordered->trans_list, &trans->ordered);
501 spin_lock_irq(&log->log_extents_lock[index]); 506 spin_lock_irq(&log->log_extents_lock[index]);
502 } 507 }
503 spin_unlock_irq(&log->log_extents_lock[index]); 508 spin_unlock_irq(&log->log_extents_lock[index]);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index d81a274d621e..0124bffc775f 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -71,6 +71,8 @@ struct btrfs_ordered_sum {
71 ordered extent */ 71 ordered extent */
72#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */ 72#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
73 73
74#define BTRFS_ORDERED_LOGGED 10 /* Set when we've waited on this ordered extent
75 * in the logging code. */
74struct btrfs_ordered_extent { 76struct btrfs_ordered_extent {
75 /* logical offset in the file */ 77 /* logical offset in the file */
76 u64 file_offset; 78 u64 file_offset;
@@ -121,6 +123,9 @@ struct btrfs_ordered_extent {
121 /* If we need to wait on this to be done */ 123 /* If we need to wait on this to be done */
122 struct list_head log_list; 124 struct list_head log_list;
123 125
126 /* If the transaction needs to wait on this ordered extent */
127 struct list_head trans_list;
128
124 /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ 129 /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
125 wait_queue_head_t wait; 130 wait_queue_head_t wait;
126 131
@@ -197,7 +202,8 @@ void btrfs_get_logged_extents(struct inode *inode,
197void btrfs_put_logged_extents(struct list_head *logged_list); 202void btrfs_put_logged_extents(struct list_head *logged_list);
198void btrfs_submit_logged_extents(struct list_head *logged_list, 203void btrfs_submit_logged_extents(struct list_head *logged_list,
199 struct btrfs_root *log); 204 struct btrfs_root *log);
200void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); 205void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
206 struct btrfs_root *log, u64 transid);
201void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); 207void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
202int __init ordered_data_init(void); 208int __init ordered_data_init(void);
203void ordered_data_exit(void); 209void ordered_data_exit(void);
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 16c704b68704..295a135c9c24 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -247,6 +247,7 @@ loop:
247 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 247 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
248 INIT_LIST_HEAD(&cur_trans->pending_chunks); 248 INIT_LIST_HEAD(&cur_trans->pending_chunks);
249 INIT_LIST_HEAD(&cur_trans->switch_commits); 249 INIT_LIST_HEAD(&cur_trans->switch_commits);
250 INIT_LIST_HEAD(&cur_trans->pending_ordered);
250 list_add_tail(&cur_trans->list, &fs_info->trans_list); 251 list_add_tail(&cur_trans->list, &fs_info->trans_list);
251 extent_io_tree_init(&cur_trans->dirty_pages, 252 extent_io_tree_init(&cur_trans->dirty_pages,
252 fs_info->btree_inode->i_mapping); 253 fs_info->btree_inode->i_mapping);
@@ -515,6 +516,7 @@ again:
515 h->sync = false; 516 h->sync = false;
516 INIT_LIST_HEAD(&h->qgroup_ref_list); 517 INIT_LIST_HEAD(&h->qgroup_ref_list);
517 INIT_LIST_HEAD(&h->new_bgs); 518 INIT_LIST_HEAD(&h->new_bgs);
519 INIT_LIST_HEAD(&h->ordered);
518 520
519 smp_mb(); 521 smp_mb();
520 if (cur_trans->state >= TRANS_STATE_BLOCKED && 522 if (cur_trans->state >= TRANS_STATE_BLOCKED &&
@@ -746,6 +748,12 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
746 if (!list_empty(&trans->new_bgs)) 748 if (!list_empty(&trans->new_bgs))
747 btrfs_create_pending_block_groups(trans, root); 749 btrfs_create_pending_block_groups(trans, root);
748 750
751 if (!list_empty(&trans->ordered)) {
752 spin_lock(&info->trans_lock);
753 list_splice(&trans->ordered, &cur_trans->pending_ordered);
754 spin_unlock(&info->trans_lock);
755 }
756
749 trans->delayed_ref_updates = 0; 757 trans->delayed_ref_updates = 0;
750 if (!trans->sync) { 758 if (!trans->sync) {
751 must_run_delayed_refs = 759 must_run_delayed_refs =
@@ -1715,6 +1723,28 @@ static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
1715 btrfs_wait_ordered_roots(fs_info, -1); 1723 btrfs_wait_ordered_roots(fs_info, -1);
1716} 1724}
1717 1725
1726static inline void
1727btrfs_wait_pending_ordered(struct btrfs_transaction *cur_trans,
1728 struct btrfs_fs_info *fs_info)
1729{
1730 struct btrfs_ordered_extent *ordered;
1731
1732 spin_lock(&fs_info->trans_lock);
1733 while (!list_empty(&cur_trans->pending_ordered)) {
1734 ordered = list_first_entry(&cur_trans->pending_ordered,
1735 struct btrfs_ordered_extent,
1736 trans_list);
1737 list_del_init(&ordered->trans_list);
1738 spin_unlock(&fs_info->trans_lock);
1739
1740 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_COMPLETE,
1741 &ordered->flags));
1742 btrfs_put_ordered_extent(ordered);
1743 spin_lock(&fs_info->trans_lock);
1744 }
1745 spin_unlock(&fs_info->trans_lock);
1746}
1747
1718int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1748int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1719 struct btrfs_root *root) 1749 struct btrfs_root *root)
1720{ 1750{
@@ -1765,6 +1795,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1765 } 1795 }
1766 1796
1767 spin_lock(&root->fs_info->trans_lock); 1797 spin_lock(&root->fs_info->trans_lock);
1798 list_splice(&trans->ordered, &cur_trans->pending_ordered);
1768 if (cur_trans->state >= TRANS_STATE_COMMIT_START) { 1799 if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
1769 spin_unlock(&root->fs_info->trans_lock); 1800 spin_unlock(&root->fs_info->trans_lock);
1770 atomic_inc(&cur_trans->use_count); 1801 atomic_inc(&cur_trans->use_count);
@@ -1817,6 +1848,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1817 1848
1818 btrfs_wait_delalloc_flush(root->fs_info); 1849 btrfs_wait_delalloc_flush(root->fs_info);
1819 1850
1851 btrfs_wait_pending_ordered(cur_trans, root->fs_info);
1852
1820 btrfs_scrub_pause(root); 1853 btrfs_scrub_pause(root);
1821 /* 1854 /*
1822 * Ok now we need to make sure to block out any other joins while we 1855 * Ok now we need to make sure to block out any other joins while we
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index b3f5b40aab22..fd400a3668a8 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -56,6 +56,7 @@ struct btrfs_transaction {
56 wait_queue_head_t commit_wait; 56 wait_queue_head_t commit_wait;
57 struct list_head pending_snapshots; 57 struct list_head pending_snapshots;
58 struct list_head pending_chunks; 58 struct list_head pending_chunks;
59 struct list_head pending_ordered;
59 struct list_head switch_commits; 60 struct list_head switch_commits;
60 struct btrfs_delayed_ref_root delayed_refs; 61 struct btrfs_delayed_ref_root delayed_refs;
61 int aborted; 62 int aborted;
@@ -105,6 +106,7 @@ struct btrfs_trans_handle {
105 */ 106 */
106 struct btrfs_root *root; 107 struct btrfs_root *root;
107 struct seq_list delayed_ref_elem; 108 struct seq_list delayed_ref_elem;
109 struct list_head ordered;
108 struct list_head qgroup_ref_list; 110 struct list_head qgroup_ref_list;
109 struct list_head new_bgs; 111 struct list_head new_bgs;
110}; 112};
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fc715ff31d26..7d96cc961663 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -2600,7 +2600,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2600 if (atomic_read(&log_root_tree->log_commit[index2])) { 2600 if (atomic_read(&log_root_tree->log_commit[index2])) {
2601 blk_finish_plug(&plug); 2601 blk_finish_plug(&plug);
2602 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2602 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2603 btrfs_wait_logged_extents(log, log_transid); 2603 btrfs_wait_logged_extents(trans, log, log_transid);
2604 wait_log_commit(trans, log_root_tree, 2604 wait_log_commit(trans, log_root_tree,
2605 root_log_ctx.log_transid); 2605 root_log_ctx.log_transid);
2606 mutex_unlock(&log_root_tree->log_mutex); 2606 mutex_unlock(&log_root_tree->log_mutex);
@@ -2645,7 +2645,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2645 btrfs_wait_marked_extents(log_root_tree, 2645 btrfs_wait_marked_extents(log_root_tree,
2646 &log_root_tree->dirty_log_pages, 2646 &log_root_tree->dirty_log_pages,
2647 EXTENT_NEW | EXTENT_DIRTY); 2647 EXTENT_NEW | EXTENT_DIRTY);
2648 btrfs_wait_logged_extents(log, log_transid); 2648 btrfs_wait_logged_extents(trans, log, log_transid);
2649 2649
2650 btrfs_set_super_log_root(root->fs_info->super_for_commit, 2650 btrfs_set_super_log_root(root->fs_info->super_for_commit,
2651 log_root_tree->node->start); 2651 log_root_tree->node->start);
@@ -3766,7 +3766,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3766 fi = btrfs_item_ptr(leaf, path->slots[0], 3766 fi = btrfs_item_ptr(leaf, path->slots[0],
3767 struct btrfs_file_extent_item); 3767 struct btrfs_file_extent_item);
3768 3768
3769 btrfs_set_token_file_extent_generation(leaf, fi, em->generation, 3769 btrfs_set_token_file_extent_generation(leaf, fi, trans->transid,
3770 &token); 3770 &token);
3771 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 3771 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
3772 btrfs_set_token_file_extent_type(leaf, fi, 3772 btrfs_set_token_file_extent_type(leaf, fi,