diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 102 |
1 files changed, 90 insertions, 12 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 978c3a810893..24d03c751149 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include "print-tree.h" | 26 | #include "print-tree.h" |
27 | #include "backref.h" | 27 | #include "backref.h" |
28 | #include "hash.h" | 28 | #include "hash.h" |
29 | #include "compression.h" | ||
29 | 30 | ||
30 | /* magic values for the inode_only field in btrfs_log_inode: | 31 | /* magic values for the inode_only field in btrfs_log_inode: |
31 | * | 32 | * |
@@ -1045,7 +1046,7 @@ again: | |||
1045 | 1046 | ||
1046 | /* | 1047 | /* |
1047 | * NOTE: we have searched root tree and checked the | 1048 | * NOTE: we have searched root tree and checked the |
1048 | * coresponding ref, it does not need to check again. | 1049 | * corresponding ref, it does not need to check again. |
1049 | */ | 1050 | */ |
1050 | *search_done = 1; | 1051 | *search_done = 1; |
1051 | } | 1052 | } |
@@ -4500,7 +4501,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
4500 | 4501 | ||
4501 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 4502 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
4502 | 4503 | ||
4503 | btrfs_get_logged_extents(inode, &logged_list, start, end); | 4504 | /* |
4505 | * Collect ordered extents only if we are logging data. This is to | ||
4506 | * ensure a subsequent request to log this inode in LOG_INODE_ALL mode | ||
4507 | * will process the ordered extents if they still exists at the time, | ||
4508 | * because when we collect them we test and set for the flag | ||
4509 | * BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the | ||
4510 | * same ordered extents. The consequence for the LOG_INODE_ALL log mode | ||
4511 | * not processing the ordered extents is that we end up logging the | ||
4512 | * corresponding file extent items, based on the extent maps in the | ||
4513 | * inode's extent_map_tree's modified_list, without logging the | ||
4514 | * respective checksums (since the may still be only attached to the | ||
4515 | * ordered extents and have not been inserted in the csum tree by | ||
4516 | * btrfs_finish_ordered_io() yet). | ||
4517 | */ | ||
4518 | if (inode_only == LOG_INODE_ALL) | ||
4519 | btrfs_get_logged_extents(inode, &logged_list, start, end); | ||
4504 | 4520 | ||
4505 | /* | 4521 | /* |
4506 | * a brute force approach to making sure we get the most uptodate | 4522 | * a brute force approach to making sure we get the most uptodate |
@@ -4772,6 +4788,42 @@ out_unlock: | |||
4772 | } | 4788 | } |
4773 | 4789 | ||
4774 | /* | 4790 | /* |
4791 | * Check if we must fallback to a transaction commit when logging an inode. | ||
4792 | * This must be called after logging the inode and is used only in the context | ||
4793 | * when fsyncing an inode requires the need to log some other inode - in which | ||
4794 | * case we can't lock the i_mutex of each other inode we need to log as that | ||
4795 | * can lead to deadlocks with concurrent fsync against other inodes (as we can | ||
4796 | * log inodes up or down in the hierarchy) or rename operations for example. So | ||
4797 | * we take the log_mutex of the inode after we have logged it and then check for | ||
4798 | * its last_unlink_trans value - this is safe because any task setting | ||
4799 | * last_unlink_trans must take the log_mutex and it must do this before it does | ||
4800 | * the actual unlink operation, so if we do this check before a concurrent task | ||
4801 | * sets last_unlink_trans it means we've logged a consistent version/state of | ||
4802 | * all the inode items, otherwise we are not sure and must do a transaction | ||
4803 | * commit (the concurrent task migth have only updated last_unlink_trans before | ||
4804 | * we logged the inode or it might have also done the unlink). | ||
4805 | */ | ||
4806 | static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans, | ||
4807 | struct inode *inode) | ||
4808 | { | ||
4809 | struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; | ||
4810 | bool ret = false; | ||
4811 | |||
4812 | mutex_lock(&BTRFS_I(inode)->log_mutex); | ||
4813 | if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) { | ||
4814 | /* | ||
4815 | * Make sure any commits to the log are forced to be full | ||
4816 | * commits. | ||
4817 | */ | ||
4818 | btrfs_set_log_full_commit(fs_info, trans); | ||
4819 | ret = true; | ||
4820 | } | ||
4821 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | ||
4822 | |||
4823 | return ret; | ||
4824 | } | ||
4825 | |||
4826 | /* | ||
4775 | * follow the dentry parent pointers up the chain and see if any | 4827 | * follow the dentry parent pointers up the chain and see if any |
4776 | * of the directories in it require a full commit before they can | 4828 | * of the directories in it require a full commit before they can |
4777 | * be logged. Returns zero if nothing special needs to be done or 1 if | 4829 | * be logged. Returns zero if nothing special needs to be done or 1 if |
@@ -4784,7 +4836,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
4784 | u64 last_committed) | 4836 | u64 last_committed) |
4785 | { | 4837 | { |
4786 | int ret = 0; | 4838 | int ret = 0; |
4787 | struct btrfs_root *root; | ||
4788 | struct dentry *old_parent = NULL; | 4839 | struct dentry *old_parent = NULL; |
4789 | struct inode *orig_inode = inode; | 4840 | struct inode *orig_inode = inode; |
4790 | 4841 | ||
@@ -4816,14 +4867,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | |||
4816 | BTRFS_I(inode)->logged_trans = trans->transid; | 4867 | BTRFS_I(inode)->logged_trans = trans->transid; |
4817 | smp_mb(); | 4868 | smp_mb(); |
4818 | 4869 | ||
4819 | if (BTRFS_I(inode)->last_unlink_trans > last_committed) { | 4870 | if (btrfs_must_commit_transaction(trans, inode)) { |
4820 | root = BTRFS_I(inode)->root; | ||
4821 | |||
4822 | /* | ||
4823 | * make sure any commits to the log are forced | ||
4824 | * to be full commits | ||
4825 | */ | ||
4826 | btrfs_set_log_full_commit(root->fs_info, trans); | ||
4827 | ret = 1; | 4871 | ret = 1; |
4828 | break; | 4872 | break; |
4829 | } | 4873 | } |
@@ -4982,6 +5026,9 @@ process_leaf: | |||
4982 | btrfs_release_path(path); | 5026 | btrfs_release_path(path); |
4983 | ret = btrfs_log_inode(trans, root, di_inode, | 5027 | ret = btrfs_log_inode(trans, root, di_inode, |
4984 | log_mode, 0, LLONG_MAX, ctx); | 5028 | log_mode, 0, LLONG_MAX, ctx); |
5029 | if (!ret && | ||
5030 | btrfs_must_commit_transaction(trans, di_inode)) | ||
5031 | ret = 1; | ||
4985 | iput(di_inode); | 5032 | iput(di_inode); |
4986 | if (ret) | 5033 | if (ret) |
4987 | goto next_dir_inode; | 5034 | goto next_dir_inode; |
@@ -5096,6 +5143,9 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans, | |||
5096 | 5143 | ||
5097 | ret = btrfs_log_inode(trans, root, dir_inode, | 5144 | ret = btrfs_log_inode(trans, root, dir_inode, |
5098 | LOG_INODE_ALL, 0, LLONG_MAX, ctx); | 5145 | LOG_INODE_ALL, 0, LLONG_MAX, ctx); |
5146 | if (!ret && | ||
5147 | btrfs_must_commit_transaction(trans, dir_inode)) | ||
5148 | ret = 1; | ||
5099 | iput(dir_inode); | 5149 | iput(dir_inode); |
5100 | if (ret) | 5150 | if (ret) |
5101 | goto out; | 5151 | goto out; |
@@ -5447,6 +5497,9 @@ error: | |||
5447 | * They revolve around files there were unlinked from the directory, and | 5497 | * They revolve around files there were unlinked from the directory, and |
5448 | * this function updates the parent directory so that a full commit is | 5498 | * this function updates the parent directory so that a full commit is |
5449 | * properly done if it is fsync'd later after the unlinks are done. | 5499 | * properly done if it is fsync'd later after the unlinks are done. |
5500 | * | ||
5501 | * Must be called before the unlink operations (updates to the subvolume tree, | ||
5502 | * inodes, etc) are done. | ||
5450 | */ | 5503 | */ |
5451 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | 5504 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, |
5452 | struct inode *dir, struct inode *inode, | 5505 | struct inode *dir, struct inode *inode, |
@@ -5462,8 +5515,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | |||
5462 | * into the file. When the file is logged we check it and | 5515 | * into the file. When the file is logged we check it and |
5463 | * don't log the parents if the file is fully on disk. | 5516 | * don't log the parents if the file is fully on disk. |
5464 | */ | 5517 | */ |
5465 | if (S_ISREG(inode->i_mode)) | 5518 | if (S_ISREG(inode->i_mode)) { |
5519 | mutex_lock(&BTRFS_I(inode)->log_mutex); | ||
5466 | BTRFS_I(inode)->last_unlink_trans = trans->transid; | 5520 | BTRFS_I(inode)->last_unlink_trans = trans->transid; |
5521 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | ||
5522 | } | ||
5467 | 5523 | ||
5468 | /* | 5524 | /* |
5469 | * if this directory was already logged any new | 5525 | * if this directory was already logged any new |
@@ -5494,7 +5550,29 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | |||
5494 | return; | 5550 | return; |
5495 | 5551 | ||
5496 | record: | 5552 | record: |
5553 | mutex_lock(&BTRFS_I(dir)->log_mutex); | ||
5554 | BTRFS_I(dir)->last_unlink_trans = trans->transid; | ||
5555 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | ||
5556 | } | ||
5557 | |||
5558 | /* | ||
5559 | * Make sure that if someone attempts to fsync the parent directory of a deleted | ||
5560 | * snapshot, it ends up triggering a transaction commit. This is to guarantee | ||
5561 | * that after replaying the log tree of the parent directory's root we will not | ||
5562 | * see the snapshot anymore and at log replay time we will not see any log tree | ||
5563 | * corresponding to the deleted snapshot's root, which could lead to replaying | ||
5564 | * it after replaying the log tree of the parent directory (which would replay | ||
5565 | * the snapshot delete operation). | ||
5566 | * | ||
5567 | * Must be called before the actual snapshot destroy operation (updates to the | ||
5568 | * parent root and tree of tree roots trees, etc) are done. | ||
5569 | */ | ||
5570 | void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, | ||
5571 | struct inode *dir) | ||
5572 | { | ||
5573 | mutex_lock(&BTRFS_I(dir)->log_mutex); | ||
5497 | BTRFS_I(dir)->last_unlink_trans = trans->transid; | 5574 | BTRFS_I(dir)->last_unlink_trans = trans->transid; |
5575 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | ||
5498 | } | 5576 | } |
5499 | 5577 | ||
5500 | /* | 5578 | /* |