aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c628
1 files changed, 584 insertions, 44 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c5b8ba37f88e..9c45431e69ab 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -492,11 +492,19 @@ insert:
492 492
493 if (btrfs_inode_generation(eb, src_item) == 0) { 493 if (btrfs_inode_generation(eb, src_item) == 0) {
494 struct extent_buffer *dst_eb = path->nodes[0]; 494 struct extent_buffer *dst_eb = path->nodes[0];
495 const u64 ino_size = btrfs_inode_size(eb, src_item);
495 496
497 /*
498 * For regular files an ino_size == 0 is used only when
499 * logging that an inode exists, as part of a directory
500 * fsync, and the inode wasn't fsynced before. In this
501 * case don't set the size of the inode in the fs/subvol
502 * tree, otherwise we would be throwing valid data away.
503 */
496 if (S_ISREG(btrfs_inode_mode(eb, src_item)) && 504 if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
497 S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) { 505 S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) &&
506 ino_size != 0) {
498 struct btrfs_map_token token; 507 struct btrfs_map_token token;
499 u64 ino_size = btrfs_inode_size(eb, src_item);
500 508
501 btrfs_init_map_token(&token); 509 btrfs_init_map_token(&token);
502 btrfs_set_token_inode_size(dst_eb, dst_item, 510 btrfs_set_token_inode_size(dst_eb, dst_item,
@@ -1951,6 +1959,104 @@ out:
1951 return ret; 1959 return ret;
1952} 1960}
1953 1961
1962static int replay_xattr_deletes(struct btrfs_trans_handle *trans,
1963 struct btrfs_root *root,
1964 struct btrfs_root *log,
1965 struct btrfs_path *path,
1966 const u64 ino)
1967{
1968 struct btrfs_key search_key;
1969 struct btrfs_path *log_path;
1970 int i;
1971 int nritems;
1972 int ret;
1973
1974 log_path = btrfs_alloc_path();
1975 if (!log_path)
1976 return -ENOMEM;
1977
1978 search_key.objectid = ino;
1979 search_key.type = BTRFS_XATTR_ITEM_KEY;
1980 search_key.offset = 0;
1981again:
1982 ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0);
1983 if (ret < 0)
1984 goto out;
1985process_leaf:
1986 nritems = btrfs_header_nritems(path->nodes[0]);
1987 for (i = path->slots[0]; i < nritems; i++) {
1988 struct btrfs_key key;
1989 struct btrfs_dir_item *di;
1990 struct btrfs_dir_item *log_di;
1991 u32 total_size;
1992 u32 cur;
1993
1994 btrfs_item_key_to_cpu(path->nodes[0], &key, i);
1995 if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) {
1996 ret = 0;
1997 goto out;
1998 }
1999
2000 di = btrfs_item_ptr(path->nodes[0], i, struct btrfs_dir_item);
2001 total_size = btrfs_item_size_nr(path->nodes[0], i);
2002 cur = 0;
2003 while (cur < total_size) {
2004 u16 name_len = btrfs_dir_name_len(path->nodes[0], di);
2005 u16 data_len = btrfs_dir_data_len(path->nodes[0], di);
2006 u32 this_len = sizeof(*di) + name_len + data_len;
2007 char *name;
2008
2009 name = kmalloc(name_len, GFP_NOFS);
2010 if (!name) {
2011 ret = -ENOMEM;
2012 goto out;
2013 }
2014 read_extent_buffer(path->nodes[0], name,
2015 (unsigned long)(di + 1), name_len);
2016
2017 log_di = btrfs_lookup_xattr(NULL, log, log_path, ino,
2018 name, name_len, 0);
2019 btrfs_release_path(log_path);
2020 if (!log_di) {
2021 /* Doesn't exist in log tree, so delete it. */
2022 btrfs_release_path(path);
2023 di = btrfs_lookup_xattr(trans, root, path, ino,
2024 name, name_len, -1);
2025 kfree(name);
2026 if (IS_ERR(di)) {
2027 ret = PTR_ERR(di);
2028 goto out;
2029 }
2030 ASSERT(di);
2031 ret = btrfs_delete_one_dir_name(trans, root,
2032 path, di);
2033 if (ret)
2034 goto out;
2035 btrfs_release_path(path);
2036 search_key = key;
2037 goto again;
2038 }
2039 kfree(name);
2040 if (IS_ERR(log_di)) {
2041 ret = PTR_ERR(log_di);
2042 goto out;
2043 }
2044 cur += this_len;
2045 di = (struct btrfs_dir_item *)((char *)di + this_len);
2046 }
2047 }
2048 ret = btrfs_next_leaf(root, path);
2049 if (ret > 0)
2050 ret = 0;
2051 else if (ret == 0)
2052 goto process_leaf;
2053out:
2054 btrfs_free_path(log_path);
2055 btrfs_release_path(path);
2056 return ret;
2057}
2058
2059
1954/* 2060/*
1955 * deletion replay happens before we copy any new directory items 2061 * deletion replay happens before we copy any new directory items
1956 * out of the log or out of backreferences from inodes. It 2062 * out of the log or out of backreferences from inodes. It
@@ -2104,6 +2210,10 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2104 2210
2105 inode_item = btrfs_item_ptr(eb, i, 2211 inode_item = btrfs_item_ptr(eb, i,
2106 struct btrfs_inode_item); 2212 struct btrfs_inode_item);
2213 ret = replay_xattr_deletes(wc->trans, root, log,
2214 path, key.objectid);
2215 if (ret)
2216 break;
2107 mode = btrfs_inode_mode(eb, inode_item); 2217 mode = btrfs_inode_mode(eb, inode_item);
2108 if (S_ISDIR(mode)) { 2218 if (S_ISDIR(mode)) {
2109 ret = replay_dir_deletes(wc->trans, 2219 ret = replay_dir_deletes(wc->trans,
@@ -2230,7 +2340,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2230 if (trans) { 2340 if (trans) {
2231 btrfs_tree_lock(next); 2341 btrfs_tree_lock(next);
2232 btrfs_set_lock_blocking(next); 2342 btrfs_set_lock_blocking(next);
2233 clean_tree_block(trans, root, next); 2343 clean_tree_block(trans, root->fs_info,
2344 next);
2234 btrfs_wait_tree_block_writeback(next); 2345 btrfs_wait_tree_block_writeback(next);
2235 btrfs_tree_unlock(next); 2346 btrfs_tree_unlock(next);
2236 } 2347 }
@@ -2308,7 +2419,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
2308 if (trans) { 2419 if (trans) {
2309 btrfs_tree_lock(next); 2420 btrfs_tree_lock(next);
2310 btrfs_set_lock_blocking(next); 2421 btrfs_set_lock_blocking(next);
2311 clean_tree_block(trans, root, next); 2422 clean_tree_block(trans, root->fs_info,
2423 next);
2312 btrfs_wait_tree_block_writeback(next); 2424 btrfs_wait_tree_block_writeback(next);
2313 btrfs_tree_unlock(next); 2425 btrfs_tree_unlock(next);
2314 } 2426 }
@@ -2384,7 +2496,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
2384 if (trans) { 2496 if (trans) {
2385 btrfs_tree_lock(next); 2497 btrfs_tree_lock(next);
2386 btrfs_set_lock_blocking(next); 2498 btrfs_set_lock_blocking(next);
2387 clean_tree_block(trans, log, next); 2499 clean_tree_block(trans, log->fs_info, next);
2388 btrfs_wait_tree_block_writeback(next); 2500 btrfs_wait_tree_block_writeback(next);
2389 btrfs_tree_unlock(next); 2501 btrfs_tree_unlock(next);
2390 } 2502 }
@@ -3020,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3020 struct btrfs_root *root, struct inode *inode, 3132 struct btrfs_root *root, struct inode *inode,
3021 struct btrfs_path *path, 3133 struct btrfs_path *path,
3022 struct btrfs_path *dst_path, int key_type, 3134 struct btrfs_path *dst_path, int key_type,
3135 struct btrfs_log_ctx *ctx,
3023 u64 min_offset, u64 *last_offset_ret) 3136 u64 min_offset, u64 *last_offset_ret)
3024{ 3137{
3025 struct btrfs_key min_key; 3138 struct btrfs_key min_key;
@@ -3104,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3104 src = path->nodes[0]; 3217 src = path->nodes[0];
3105 nritems = btrfs_header_nritems(src); 3218 nritems = btrfs_header_nritems(src);
3106 for (i = path->slots[0]; i < nritems; i++) { 3219 for (i = path->slots[0]; i < nritems; i++) {
3220 struct btrfs_dir_item *di;
3221
3107 btrfs_item_key_to_cpu(src, &min_key, i); 3222 btrfs_item_key_to_cpu(src, &min_key, i);
3108 3223
3109 if (min_key.objectid != ino || min_key.type != key_type) 3224 if (min_key.objectid != ino || min_key.type != key_type)
@@ -3114,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
3114 err = ret; 3229 err = ret;
3115 goto done; 3230 goto done;
3116 } 3231 }
3232
3233 /*
3234 * We must make sure that when we log a directory entry,
3235 * the corresponding inode, after log replay, has a
3236 * matching link count. For example:
3237 *
3238 * touch foo
3239 * mkdir mydir
3240 * sync
3241 * ln foo mydir/bar
3242 * xfs_io -c "fsync" mydir
3243 * <crash>
3244 * <mount fs and log replay>
3245 *
3246 * Would result in a fsync log that when replayed, our
3247 * file inode would have a link count of 1, but we get
3248 * two directory entries pointing to the same inode.
3249 * After removing one of the names, it would not be
3250 * possible to remove the other name, which resulted
3251 * always in stale file handle errors, and would not
3252 * be possible to rmdir the parent directory, since
3253 * its i_size could never decrement to the value
3254 * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors.
3255 */
3256 di = btrfs_item_ptr(src, i, struct btrfs_dir_item);
3257 btrfs_dir_item_key_to_cpu(src, di, &tmp);
3258 if (ctx &&
3259 (btrfs_dir_transid(src, di) == trans->transid ||
3260 btrfs_dir_type(src, di) == BTRFS_FT_DIR) &&
3261 tmp.type != BTRFS_ROOT_ITEM_KEY)
3262 ctx->log_new_dentries = true;
3117 } 3263 }
3118 path->slots[0] = nritems; 3264 path->slots[0] = nritems;
3119 3265
@@ -3175,7 +3321,8 @@ done:
3175static noinline int log_directory_changes(struct btrfs_trans_handle *trans, 3321static noinline int log_directory_changes(struct btrfs_trans_handle *trans,
3176 struct btrfs_root *root, struct inode *inode, 3322 struct btrfs_root *root, struct inode *inode,
3177 struct btrfs_path *path, 3323 struct btrfs_path *path,
3178 struct btrfs_path *dst_path) 3324 struct btrfs_path *dst_path,
3325 struct btrfs_log_ctx *ctx)
3179{ 3326{
3180 u64 min_key; 3327 u64 min_key;
3181 u64 max_key; 3328 u64 max_key;
@@ -3187,7 +3334,7 @@ again:
3187 max_key = 0; 3334 max_key = 0;
3188 while (1) { 3335 while (1) {
3189 ret = log_dir_items(trans, root, inode, path, 3336 ret = log_dir_items(trans, root, inode, path,
3190 dst_path, key_type, min_key, 3337 dst_path, key_type, ctx, min_key,
3191 &max_key); 3338 &max_key);
3192 if (ret) 3339 if (ret)
3193 return ret; 3340 return ret;
@@ -3734,12 +3881,6 @@ static int wait_ordered_extents(struct btrfs_trans_handle *trans,
3734 &ordered->flags)) 3881 &ordered->flags))
3735 continue; 3882 continue;
3736 3883
3737 if (ordered->csum_bytes_left) {
3738 btrfs_start_ordered_extent(inode, ordered, 0);
3739 wait_event(ordered->wait,
3740 ordered->csum_bytes_left == 0);
3741 }
3742
3743 list_for_each_entry(sum, &ordered->list, list) { 3884 list_for_each_entry(sum, &ordered->list, list) {
3744 ret = btrfs_csum_file_blocks(trans, log, sum); 3885 ret = btrfs_csum_file_blocks(trans, log, sum);
3745 if (ret) 3886 if (ret)
@@ -3963,7 +4104,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
3963 if (ret < 0) { 4104 if (ret < 0) {
3964 return ret; 4105 return ret;
3965 } else if (ret > 0) { 4106 } else if (ret > 0) {
3966 *size_ret = i_size_read(inode); 4107 *size_ret = 0;
3967 } else { 4108 } else {
3968 struct btrfs_inode_item *item; 4109 struct btrfs_inode_item *item;
3969 4110
@@ -3976,6 +4117,187 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
3976 return 0; 4117 return 0;
3977} 4118}
3978 4119
4120/*
4121 * At the moment we always log all xattrs. This is to figure out at log replay
4122 * time which xattrs must have their deletion replayed. If a xattr is missing
4123 * in the log tree and exists in the fs/subvol tree, we delete it. This is
4124 * because if a xattr is deleted, the inode is fsynced and a power failure
4125 * happens, causing the log to be replayed the next time the fs is mounted,
4126 * we want the xattr to not exist anymore (same behaviour as other filesystems
4127 * with a journal, ext3/4, xfs, f2fs, etc).
4128 */
4129static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans,
4130 struct btrfs_root *root,
4131 struct inode *inode,
4132 struct btrfs_path *path,
4133 struct btrfs_path *dst_path)
4134{
4135 int ret;
4136 struct btrfs_key key;
4137 const u64 ino = btrfs_ino(inode);
4138 int ins_nr = 0;
4139 int start_slot = 0;
4140
4141 key.objectid = ino;
4142 key.type = BTRFS_XATTR_ITEM_KEY;
4143 key.offset = 0;
4144
4145 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4146 if (ret < 0)
4147 return ret;
4148
4149 while (true) {
4150 int slot = path->slots[0];
4151 struct extent_buffer *leaf = path->nodes[0];
4152 int nritems = btrfs_header_nritems(leaf);
4153
4154 if (slot >= nritems) {
4155 if (ins_nr > 0) {
4156 u64 last_extent = 0;
4157
4158 ret = copy_items(trans, inode, dst_path, path,
4159 &last_extent, start_slot,
4160 ins_nr, 1, 0);
4161 /* can't be 1, extent items aren't processed */
4162 ASSERT(ret <= 0);
4163 if (ret < 0)
4164 return ret;
4165 ins_nr = 0;
4166 }
4167 ret = btrfs_next_leaf(root, path);
4168 if (ret < 0)
4169 return ret;
4170 else if (ret > 0)
4171 break;
4172 continue;
4173 }
4174
4175 btrfs_item_key_to_cpu(leaf, &key, slot);
4176 if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY)
4177 break;
4178
4179 if (ins_nr == 0)
4180 start_slot = slot;
4181 ins_nr++;
4182 path->slots[0]++;
4183 cond_resched();
4184 }
4185 if (ins_nr > 0) {
4186 u64 last_extent = 0;
4187
4188 ret = copy_items(trans, inode, dst_path, path,
4189 &last_extent, start_slot,
4190 ins_nr, 1, 0);
4191 /* can't be 1, extent items aren't processed */
4192 ASSERT(ret <= 0);
4193 if (ret < 0)
4194 return ret;
4195 }
4196
4197 return 0;
4198}
4199
4200/*
4201 * If the no holes feature is enabled we need to make sure any hole between the
4202 * last extent and the i_size of our inode is explicitly marked in the log. This
4203 * is to make sure that doing something like:
4204 *
4205 * 1) create file with 128Kb of data
4206 * 2) truncate file to 64Kb
4207 * 3) truncate file to 256Kb
4208 * 4) fsync file
4209 * 5) <crash/power failure>
4210 * 6) mount fs and trigger log replay
4211 *
4212 * Will give us a file with a size of 256Kb, the first 64Kb of data match what
4213 * the file had in its first 64Kb of data at step 1 and the last 192Kb of the
4214 * file correspond to a hole. The presence of explicit holes in a log tree is
4215 * what guarantees that log replay will remove/adjust file extent items in the
4216 * fs/subvol tree.
4217 *
4218 * Here we do not need to care about holes between extents, that is already done
4219 * by copy_items(). We also only need to do this in the full sync path, where we
4220 * lookup for extents from the fs/subvol tree only. In the fast path case, we
4221 * lookup the list of modified extent maps and if any represents a hole, we
4222 * insert a corresponding extent representing a hole in the log tree.
4223 */
4224static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
4225 struct btrfs_root *root,
4226 struct inode *inode,
4227 struct btrfs_path *path)
4228{
4229 int ret;
4230 struct btrfs_key key;
4231 u64 hole_start;
4232 u64 hole_size;
4233 struct extent_buffer *leaf;
4234 struct btrfs_root *log = root->log_root;
4235 const u64 ino = btrfs_ino(inode);
4236 const u64 i_size = i_size_read(inode);
4237
4238 if (!btrfs_fs_incompat(root->fs_info, NO_HOLES))
4239 return 0;
4240
4241 key.objectid = ino;
4242 key.type = BTRFS_EXTENT_DATA_KEY;
4243 key.offset = (u64)-1;
4244
4245 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
4246 ASSERT(ret != 0);
4247 if (ret < 0)
4248 return ret;
4249
4250 ASSERT(path->slots[0] > 0);
4251 path->slots[0]--;
4252 leaf = path->nodes[0];
4253 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
4254
4255 if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
4256 /* inode does not have any extents */
4257 hole_start = 0;
4258 hole_size = i_size;
4259 } else {
4260 struct btrfs_file_extent_item *extent;
4261 u64 len;
4262
4263 /*
4264 * If there's an extent beyond i_size, an explicit hole was
4265 * already inserted by copy_items().
4266 */
4267 if (key.offset >= i_size)
4268 return 0;
4269
4270 extent = btrfs_item_ptr(leaf, path->slots[0],
4271 struct btrfs_file_extent_item);
4272
4273 if (btrfs_file_extent_type(leaf, extent) ==
4274 BTRFS_FILE_EXTENT_INLINE) {
4275 len = btrfs_file_extent_inline_len(leaf,
4276 path->slots[0],
4277 extent);
4278 ASSERT(len == i_size);
4279 return 0;
4280 }
4281
4282 len = btrfs_file_extent_num_bytes(leaf, extent);
4283 /* Last extent goes beyond i_size, no need to log a hole. */
4284 if (key.offset + len > i_size)
4285 return 0;
4286 hole_start = key.offset + len;
4287 hole_size = i_size - hole_start;
4288 }
4289 btrfs_release_path(path);
4290
4291 /* Last extent ends at i_size. */
4292 if (hole_size == 0)
4293 return 0;
4294
4295 hole_size = ALIGN(hole_size, root->sectorsize);
4296 ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0,
4297 hole_size, 0, hole_size, 0, 0, 0);
4298 return ret;
4299}
4300
3979/* log a single inode in the tree log. 4301/* log a single inode in the tree log.
3980 * At least one parent directory for this inode must exist in the tree 4302 * At least one parent directory for this inode must exist in the tree
3981 * or be logged already. 4303 * or be logged already.
@@ -4014,6 +4336,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4014 u64 ino = btrfs_ino(inode); 4336 u64 ino = btrfs_ino(inode);
4015 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 4337 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4016 u64 logged_isize = 0; 4338 u64 logged_isize = 0;
4339 bool need_log_inode_item = true;
4017 4340
4018 path = btrfs_alloc_path(); 4341 path = btrfs_alloc_path();
4019 if (!path) 4342 if (!path)
@@ -4070,10 +4393,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4070 if (S_ISDIR(inode->i_mode)) { 4393 if (S_ISDIR(inode->i_mode)) {
4071 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 4394 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
4072 4395
4073 if (inode_only == LOG_INODE_EXISTS) { 4396 if (inode_only == LOG_INODE_EXISTS)
4074 max_key_type = BTRFS_INODE_EXTREF_KEY; 4397 max_key_type = BTRFS_XATTR_ITEM_KEY;
4075 max_key.type = max_key_type;
4076 }
4077 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 4398 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
4078 } else { 4399 } else {
4079 if (inode_only == LOG_INODE_EXISTS) { 4400 if (inode_only == LOG_INODE_EXISTS) {
@@ -4098,7 +4419,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4098 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4419 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4099 &BTRFS_I(inode)->runtime_flags)) { 4420 &BTRFS_I(inode)->runtime_flags)) {
4100 if (inode_only == LOG_INODE_EXISTS) { 4421 if (inode_only == LOG_INODE_EXISTS) {
4101 max_key.type = BTRFS_INODE_EXTREF_KEY; 4422 max_key.type = BTRFS_XATTR_ITEM_KEY;
4102 ret = drop_objectid_items(trans, log, path, ino, 4423 ret = drop_objectid_items(trans, log, path, ino,
4103 max_key.type); 4424 max_key.type);
4104 } else { 4425 } else {
@@ -4106,30 +4427,24 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
4106 &BTRFS_I(inode)->runtime_flags); 4427 &BTRFS_I(inode)->runtime_flags);
4107 clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4428 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4108 &BTRFS_I(inode)->runtime_flags); 4429 &BTRFS_I(inode)->runtime_flags);
4109 ret = btrfs_truncate_inode_items(trans, log, 4430 while(1) {
4110 inode, 0, 0); 4431 ret = btrfs_truncate_inode_items(trans,
4432 log, inode, 0, 0);
4433 if (ret != -EAGAIN)
4434 break;
4435 }
4111 } 4436 }
4112 } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING, 4437 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4113 &BTRFS_I(inode)->runtime_flags) || 4438 &BTRFS_I(inode)->runtime_flags) ||
4114 inode_only == LOG_INODE_EXISTS) { 4439 inode_only == LOG_INODE_EXISTS) {
4115 if (inode_only == LOG_INODE_ALL) { 4440 if (inode_only == LOG_INODE_ALL)
4116 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4117 &BTRFS_I(inode)->runtime_flags);
4118 fast_search = true; 4441 fast_search = true;
4119 max_key.type = BTRFS_XATTR_ITEM_KEY; 4442 max_key.type = BTRFS_XATTR_ITEM_KEY;
4120 } else {
4121 max_key.type = BTRFS_INODE_EXTREF_KEY;
4122 }
4123 ret = drop_objectid_items(trans, log, path, ino, 4443 ret = drop_objectid_items(trans, log, path, ino,
4124 max_key.type); 4444 max_key.type);
4125 } else { 4445 } else {
4126 if (inode_only == LOG_INODE_ALL) 4446 if (inode_only == LOG_INODE_ALL)
4127 fast_search = true; 4447 fast_search = true;
4128 ret = log_inode_item(trans, log, dst_path, inode);
4129 if (ret) {
4130 err = ret;
4131 goto out_unlock;
4132 }
4133 goto log_extents; 4448 goto log_extents;
4134 } 4449 }
4135 4450
@@ -4152,6 +4467,28 @@ again:
4152 if (min_key.type > max_key.type) 4467 if (min_key.type > max_key.type)
4153 break; 4468 break;
4154 4469
4470 if (min_key.type == BTRFS_INODE_ITEM_KEY)
4471 need_log_inode_item = false;
4472
4473 /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
4474 if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
4475 if (ins_nr == 0)
4476 goto next_slot;
4477 ret = copy_items(trans, inode, dst_path, path,
4478 &last_extent, ins_start_slot,
4479 ins_nr, inode_only, logged_isize);
4480 if (ret < 0) {
4481 err = ret;
4482 goto out_unlock;
4483 }
4484 ins_nr = 0;
4485 if (ret) {
4486 btrfs_release_path(path);
4487 continue;
4488 }
4489 goto next_slot;
4490 }
4491
4155 src = path->nodes[0]; 4492 src = path->nodes[0];
4156 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 4493 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
4157 ins_nr++; 4494 ins_nr++;
@@ -4219,9 +4556,26 @@ next_slot:
4219 ins_nr = 0; 4556 ins_nr = 0;
4220 } 4557 }
4221 4558
4559 btrfs_release_path(path);
4560 btrfs_release_path(dst_path);
4561 err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path);
4562 if (err)
4563 goto out_unlock;
4564 if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) {
4565 btrfs_release_path(path);
4566 btrfs_release_path(dst_path);
4567 err = btrfs_log_trailing_hole(trans, root, inode, path);
4568 if (err)
4569 goto out_unlock;
4570 }
4222log_extents: 4571log_extents:
4223 btrfs_release_path(path); 4572 btrfs_release_path(path);
4224 btrfs_release_path(dst_path); 4573 btrfs_release_path(dst_path);
4574 if (need_log_inode_item) {
4575 err = log_inode_item(trans, log, dst_path, inode);
4576 if (err)
4577 goto out_unlock;
4578 }
4225 if (fast_search) { 4579 if (fast_search) {
4226 /* 4580 /*
4227 * Some ordered extents started by fsync might have completed 4581 * Some ordered extents started by fsync might have completed
@@ -4277,15 +4631,18 @@ log_extents:
4277 } 4631 }
4278 4632
4279 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { 4633 if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
4280 ret = log_directory_changes(trans, root, inode, path, dst_path); 4634 ret = log_directory_changes(trans, root, inode, path, dst_path,
4635 ctx);
4281 if (ret) { 4636 if (ret) {
4282 err = ret; 4637 err = ret;
4283 goto out_unlock; 4638 goto out_unlock;
4284 } 4639 }
4285 } 4640 }
4286 4641
4642 spin_lock(&BTRFS_I(inode)->lock);
4287 BTRFS_I(inode)->logged_trans = trans->transid; 4643 BTRFS_I(inode)->logged_trans = trans->transid;
4288 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 4644 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
4645 spin_unlock(&BTRFS_I(inode)->lock);
4289out_unlock: 4646out_unlock:
4290 if (unlikely(err)) 4647 if (unlikely(err))
4291 btrfs_put_logged_extents(&logged_list); 4648 btrfs_put_logged_extents(&logged_list);
@@ -4327,9 +4684,9 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
4327 goto out; 4684 goto out;
4328 4685
4329 if (!S_ISDIR(inode->i_mode)) { 4686 if (!S_ISDIR(inode->i_mode)) {
4330 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4687 if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
4331 goto out; 4688 goto out;
4332 inode = parent->d_inode; 4689 inode = d_inode(parent);
4333 } 4690 }
4334 4691
4335 while (1) { 4692 while (1) {
@@ -4355,7 +4712,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
4355 break; 4712 break;
4356 } 4713 }
4357 4714
4358 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4715 if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
4359 break; 4716 break;
4360 4717
4361 if (IS_ROOT(parent)) 4718 if (IS_ROOT(parent))
@@ -4364,7 +4721,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
4364 parent = dget_parent(parent); 4721 parent = dget_parent(parent);
4365 dput(old_parent); 4722 dput(old_parent);
4366 old_parent = parent; 4723 old_parent = parent;
4367 inode = parent->d_inode; 4724 inode = d_inode(parent);
4368 4725
4369 } 4726 }
4370 dput(old_parent); 4727 dput(old_parent);
@@ -4372,6 +4729,181 @@ out:
4372 return ret; 4729 return ret;
4373} 4730}
4374 4731
4732struct btrfs_dir_list {
4733 u64 ino;
4734 struct list_head list;
4735};
4736
4737/*
4738 * Log the inodes of the new dentries of a directory. See log_dir_items() for
4739 * details about the why it is needed.
4740 * This is a recursive operation - if an existing dentry corresponds to a
4741 * directory, that directory's new entries are logged too (same behaviour as
4742 * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes
4743 * the dentries point to we do not lock their i_mutex, otherwise lockdep
4744 * complains about the following circular lock dependency / possible deadlock:
4745 *
4746 * CPU0 CPU1
4747 * ---- ----
4748 * lock(&type->i_mutex_dir_key#3/2);
4749 * lock(sb_internal#2);
4750 * lock(&type->i_mutex_dir_key#3/2);
4751 * lock(&sb->s_type->i_mutex_key#14);
4752 *
4753 * Where sb_internal is the lock (a counter that works as a lock) acquired by
4754 * sb_start_intwrite() in btrfs_start_transaction().
4755 * Not locking i_mutex of the inodes is still safe because:
4756 *
4757 * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible
4758 * that while logging the inode new references (names) are added or removed
4759 * from the inode, leaving the logged inode item with a link count that does
4760 * not match the number of logged inode reference items. This is fine because
4761 * at log replay time we compute the real number of links and correct the
4762 * link count in the inode item (see replay_one_buffer() and
4763 * link_to_fixup_dir());
4764 *
4765 * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that
4766 * while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and
4767 * BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item
4768 * has a size that doesn't match the sum of the lengths of all the logged
4769 * names. This does not result in a problem because if a dir_item key is
4770 * logged but its matching dir_index key is not logged, at log replay time we
4771 * don't use it to replay the respective name (see replay_one_name()). On the
4772 * other hand if only the dir_index key ends up being logged, the respective
4773 * name is added to the fs/subvol tree with both the dir_item and dir_index
4774 * keys created (see replay_one_name()).
4775 * The directory's inode item with a wrong i_size is not a problem as well,
4776 * since we don't use it at log replay time to set the i_size in the inode
4777 * item of the fs/subvol tree (see overwrite_item()).
4778 */
4779static int log_new_dir_dentries(struct btrfs_trans_handle *trans,
4780 struct btrfs_root *root,
4781 struct inode *start_inode,
4782 struct btrfs_log_ctx *ctx)
4783{
4784 struct btrfs_root *log = root->log_root;
4785 struct btrfs_path *path;
4786 LIST_HEAD(dir_list);
4787 struct btrfs_dir_list *dir_elem;
4788 int ret = 0;
4789
4790 path = btrfs_alloc_path();
4791 if (!path)
4792 return -ENOMEM;
4793
4794 dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS);
4795 if (!dir_elem) {
4796 btrfs_free_path(path);
4797 return -ENOMEM;
4798 }
4799 dir_elem->ino = btrfs_ino(start_inode);
4800 list_add_tail(&dir_elem->list, &dir_list);
4801
4802 while (!list_empty(&dir_list)) {
4803 struct extent_buffer *leaf;
4804 struct btrfs_key min_key;
4805 int nritems;
4806 int i;
4807
4808 dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list,
4809 list);
4810 if (ret)
4811 goto next_dir_inode;
4812
4813 min_key.objectid = dir_elem->ino;
4814 min_key.type = BTRFS_DIR_ITEM_KEY;
4815 min_key.offset = 0;
4816again:
4817 btrfs_release_path(path);
4818 ret = btrfs_search_forward(log, &min_key, path, trans->transid);
4819 if (ret < 0) {
4820 goto next_dir_inode;
4821 } else if (ret > 0) {
4822 ret = 0;
4823 goto next_dir_inode;
4824 }
4825
4826process_leaf:
4827 leaf = path->nodes[0];
4828 nritems = btrfs_header_nritems(leaf);
4829 for (i = path->slots[0]; i < nritems; i++) {
4830 struct btrfs_dir_item *di;
4831 struct btrfs_key di_key;
4832 struct inode *di_inode;
4833 struct btrfs_dir_list *new_dir_elem;
4834 int log_mode = LOG_INODE_EXISTS;
4835 int type;
4836
4837 btrfs_item_key_to_cpu(leaf, &min_key, i);
4838 if (min_key.objectid != dir_elem->ino ||
4839 min_key.type != BTRFS_DIR_ITEM_KEY)
4840 goto next_dir_inode;
4841
4842 di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item);
4843 type = btrfs_dir_type(leaf, di);
4844 if (btrfs_dir_transid(leaf, di) < trans->transid &&
4845 type != BTRFS_FT_DIR)
4846 continue;
4847 btrfs_dir_item_key_to_cpu(leaf, di, &di_key);
4848 if (di_key.type == BTRFS_ROOT_ITEM_KEY)
4849 continue;
4850
4851 di_inode = btrfs_iget(root->fs_info->sb, &di_key,
4852 root, NULL);
4853 if (IS_ERR(di_inode)) {
4854 ret = PTR_ERR(di_inode);
4855 goto next_dir_inode;
4856 }
4857
4858 if (btrfs_inode_in_log(di_inode, trans->transid)) {
4859 iput(di_inode);
4860 continue;
4861 }
4862
4863 ctx->log_new_dentries = false;
4864 if (type == BTRFS_FT_DIR)
4865 log_mode = LOG_INODE_ALL;
4866 btrfs_release_path(path);
4867 ret = btrfs_log_inode(trans, root, di_inode,
4868 log_mode, 0, LLONG_MAX, ctx);
4869 iput(di_inode);
4870 if (ret)
4871 goto next_dir_inode;
4872 if (ctx->log_new_dentries) {
4873 new_dir_elem = kmalloc(sizeof(*new_dir_elem),
4874 GFP_NOFS);
4875 if (!new_dir_elem) {
4876 ret = -ENOMEM;
4877 goto next_dir_inode;
4878 }
4879 new_dir_elem->ino = di_key.objectid;
4880 list_add_tail(&new_dir_elem->list, &dir_list);
4881 }
4882 break;
4883 }
4884 if (i == nritems) {
4885 ret = btrfs_next_leaf(log, path);
4886 if (ret < 0) {
4887 goto next_dir_inode;
4888 } else if (ret > 0) {
4889 ret = 0;
4890 goto next_dir_inode;
4891 }
4892 goto process_leaf;
4893 }
4894 if (min_key.offset < (u64)-1) {
4895 min_key.offset++;
4896 goto again;
4897 }
4898next_dir_inode:
4899 list_del(&dir_elem->list);
4900 kfree(dir_elem);
4901 }
4902
4903 btrfs_free_path(path);
4904 return ret;
4905}
4906
4375/* 4907/*
4376 * helper function around btrfs_log_inode to make sure newly created 4908 * helper function around btrfs_log_inode to make sure newly created
4377 * parent directories also end up in the log. A minimal inode and backref 4909 * parent directories also end up in the log. A minimal inode and backref
@@ -4394,6 +4926,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4394 const struct dentry * const first_parent = parent; 4926 const struct dentry * const first_parent = parent;
4395 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > 4927 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
4396 last_committed); 4928 last_committed);
4929 bool log_dentries = false;
4930 struct inode *orig_inode = inode;
4397 4931
4398 sb = inode->i_sb; 4932 sb = inode->i_sb;
4399 4933
@@ -4449,11 +4983,14 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4449 goto end_trans; 4983 goto end_trans;
4450 } 4984 }
4451 4985
4986 if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries)
4987 log_dentries = true;
4988
4452 while (1) { 4989 while (1) {
4453 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4990 if (!parent || d_really_is_negative(parent) || sb != d_inode(parent)->i_sb)
4454 break; 4991 break;
4455 4992
4456 inode = parent->d_inode; 4993 inode = d_inode(parent);
4457 if (root != BTRFS_I(inode)->root) 4994 if (root != BTRFS_I(inode)->root)
4458 break; 4995 break;
4459 4996
@@ -4485,7 +5022,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4485 dput(old_parent); 5022 dput(old_parent);
4486 old_parent = parent; 5023 old_parent = parent;
4487 } 5024 }
4488 ret = 0; 5025 if (log_dentries)
5026 ret = log_new_dir_dentries(trans, root, orig_inode, ctx);
5027 else
5028 ret = 0;
4489end_trans: 5029end_trans:
4490 dput(old_parent); 5030 dput(old_parent);
4491 if (ret < 0) { 5031 if (ret < 0) {
@@ -4515,7 +5055,7 @@ int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
4515 struct dentry *parent = dget_parent(dentry); 5055 struct dentry *parent = dget_parent(dentry);
4516 int ret; 5056 int ret;
4517 5057
4518 ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 5058 ret = btrfs_log_inode_parent(trans, root, d_inode(dentry), parent,
4519 start, end, 0, ctx); 5059 start, end, 0, ctx);
4520 dput(parent); 5060 dput(parent);
4521 5061