aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2009-03-24 10:24:20 -0400
committerChris Mason <chris.mason@oracle.com>2009-03-24 16:14:52 -0400
commit12fcfd22fe5bf4fe74710232098bc101af497995 (patch)
treeb01ba82147ea76c89149e54d475ed97121387261 /fs/btrfs/inode.c
parenta74ac3220774d33db967088906dc3351829e2d3a (diff)
Btrfs: tree logging unlink/rename fixes
The tree logging code allows individual files or directories to be logged without including operations on other files and directories in the FS. It tries to commit the minimal set of changes to disk in order to fsync the single file or directory that was sent to fsync or O_SYNC. The tree logging code was allowing files and directories to be unlinked if they were part of a rename operation where only one directory in the rename was in the fsync log. This patch adds a few new rules to the tree logging. 1) on rename or unlink, if the inode being unlinked isn't in the fsync log, we must force a full commit before doing an fsync of the directory where the unlink was done. The commit isn't done during the unlink, but it is forced the next time we try to log the parent directory. Solution: record transid of last unlink/rename per directory when the directory wasn't already logged. For renames this is only done when renaming to a different directory. mkdir foo/some_dir normal commit rename foo/some_dir foo2/some_dir mkdir foo/some_dir fsync foo/some_dir/some_file The fsync above will unlink the original some_dir without recording it in its new location (foo2). After a crash, some_dir will be gone unless the fsync of some_file forces a full commit 2) we must log any new names for any file or dir that is in the fsync log. This way we make sure not to lose files that are unlinked during the same transaction. 2a) we must log any new names for any file or dir during rename when the directory they are being removed from was logged. 2a is actually the more important variant. Without the extra logging a crash might unlink the old name without recreating the new one 3) after a crash, we must go through any directories with a link count of zero and redo the rm -rf mkdir f1/foo normal commit rm -rf f1/foo fsync(f1) The directory f1 was fully removed from the FS, but fsync was never called on f1, only its parent dir. After a crash the rm -rf must be replayed. This must be able to recurse down the entire directory tree. The inode link count fixup code takes care of the ugly details. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c28
1 files changed, 25 insertions, 3 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 9b4faac50c18..bffd79faffb5 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2246,8 +2246,6 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2246 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, 2246 ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len,
2247 inode, dir->i_ino); 2247 inode, dir->i_ino);
2248 BUG_ON(ret != 0 && ret != -ENOENT); 2248 BUG_ON(ret != 0 && ret != -ENOENT);
2249 if (ret != -ENOENT)
2250 BTRFS_I(dir)->log_dirty_trans = trans->transid;
2251 2249
2252 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2250 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2253 dir, index); 2251 dir, index);
@@ -2280,6 +2278,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
2280 trans = btrfs_start_transaction(root, 1); 2278 trans = btrfs_start_transaction(root, 1);
2281 2279
2282 btrfs_set_trans_block_group(trans, dir); 2280 btrfs_set_trans_block_group(trans, dir);
2281
2282 btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0);
2283
2283 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, 2284 ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,
2284 dentry->d_name.name, dentry->d_name.len); 2285 dentry->d_name.name, dentry->d_name.len);
2285 2286
@@ -3042,7 +3043,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3042 bi->disk_i_size = 0; 3043 bi->disk_i_size = 0;
3043 bi->flags = 0; 3044 bi->flags = 0;
3044 bi->index_cnt = (u64)-1; 3045 bi->index_cnt = (u64)-1;
3045 bi->log_dirty_trans = 0; 3046 bi->last_unlink_trans = 0;
3046 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); 3047 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3047 extent_io_tree_init(&BTRFS_I(inode)->io_tree, 3048 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3048 inode->i_mapping, GFP_NOFS); 3049 inode->i_mapping, GFP_NOFS);
@@ -3786,6 +3787,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
3786 drop_inode = 1; 3787 drop_inode = 1;
3787 3788
3788 nr = trans->blocks_used; 3789 nr = trans->blocks_used;
3790
3791 btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
3789 btrfs_end_transaction_throttle(trans, root); 3792 btrfs_end_transaction_throttle(trans, root);
3790fail: 3793fail:
3791 if (drop_inode) { 3794 if (drop_inode) {
@@ -4666,6 +4669,15 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4666 4669
4667 trans = btrfs_start_transaction(root, 1); 4670 trans = btrfs_start_transaction(root, 1);
4668 4671
4672 /*
4673 * this is an ugly little race, but the rename is required to make
4674 * sure that if we crash, the inode is either at the old name
4675 * or the new one. pinning the log transaction lets us make sure
4676 * we don't allow a log commit to come in after we unlink the
4677 * name but before we add the new name back in.
4678 */
4679 btrfs_pin_log_trans(root);
4680
4669 btrfs_set_trans_block_group(trans, new_dir); 4681 btrfs_set_trans_block_group(trans, new_dir);
4670 4682
4671 btrfs_inc_nlink(old_dentry->d_inode); 4683 btrfs_inc_nlink(old_dentry->d_inode);
@@ -4673,6 +4685,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4673 new_dir->i_ctime = new_dir->i_mtime = ctime; 4685 new_dir->i_ctime = new_dir->i_mtime = ctime;
4674 old_inode->i_ctime = ctime; 4686 old_inode->i_ctime = ctime;
4675 4687
4688 if (old_dentry->d_parent != new_dentry->d_parent)
4689 btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
4690
4676 ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, 4691 ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode,
4677 old_dentry->d_name.name, 4692 old_dentry->d_name.name,
4678 old_dentry->d_name.len); 4693 old_dentry->d_name.len);
@@ -4704,7 +4719,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
4704 if (ret) 4719 if (ret)
4705 goto out_fail; 4720 goto out_fail;
4706 4721
4722 btrfs_log_new_name(trans, old_inode, old_dir,
4723 new_dentry->d_parent);
4707out_fail: 4724out_fail:
4725
4726 /* this btrfs_end_log_trans just allows the current
4727 * log-sub transaction to complete
4728 */
4729 btrfs_end_log_trans(root);
4708 btrfs_end_transaction_throttle(trans, root); 4730 btrfs_end_transaction_throttle(trans, root);
4709out_unlock: 4731out_unlock:
4710 return ret; 4732 return ret;