diff options
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 243 |
1 files changed, 236 insertions, 7 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 6c95159302dd..016c90fc85db 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -492,11 +492,19 @@ insert: | |||
492 | 492 | ||
493 | if (btrfs_inode_generation(eb, src_item) == 0) { | 493 | if (btrfs_inode_generation(eb, src_item) == 0) { |
494 | struct extent_buffer *dst_eb = path->nodes[0]; | 494 | struct extent_buffer *dst_eb = path->nodes[0]; |
495 | const u64 ino_size = btrfs_inode_size(eb, src_item); | ||
495 | 496 | ||
497 | /* | ||
498 | * For regular files an ino_size == 0 is used only when | ||
499 | * logging that an inode exists, as part of a directory | ||
500 | * fsync, and the inode wasn't fsynced before. In this | ||
501 | * case don't set the size of the inode in the fs/subvol | ||
502 | * tree, otherwise we would be throwing valid data away. | ||
503 | */ | ||
496 | if (S_ISREG(btrfs_inode_mode(eb, src_item)) && | 504 | if (S_ISREG(btrfs_inode_mode(eb, src_item)) && |
497 | S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) { | 505 | S_ISREG(btrfs_inode_mode(dst_eb, dst_item)) && |
506 | ino_size != 0) { | ||
498 | struct btrfs_map_token token; | 507 | struct btrfs_map_token token; |
499 | u64 ino_size = btrfs_inode_size(eb, src_item); | ||
500 | 508 | ||
501 | btrfs_init_map_token(&token); | 509 | btrfs_init_map_token(&token); |
502 | btrfs_set_token_inode_size(dst_eb, dst_item, | 510 | btrfs_set_token_inode_size(dst_eb, dst_item, |
@@ -3124,6 +3132,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
3124 | struct btrfs_root *root, struct inode *inode, | 3132 | struct btrfs_root *root, struct inode *inode, |
3125 | struct btrfs_path *path, | 3133 | struct btrfs_path *path, |
3126 | struct btrfs_path *dst_path, int key_type, | 3134 | struct btrfs_path *dst_path, int key_type, |
3135 | struct btrfs_log_ctx *ctx, | ||
3127 | u64 min_offset, u64 *last_offset_ret) | 3136 | u64 min_offset, u64 *last_offset_ret) |
3128 | { | 3137 | { |
3129 | struct btrfs_key min_key; | 3138 | struct btrfs_key min_key; |
@@ -3208,6 +3217,8 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
3208 | src = path->nodes[0]; | 3217 | src = path->nodes[0]; |
3209 | nritems = btrfs_header_nritems(src); | 3218 | nritems = btrfs_header_nritems(src); |
3210 | for (i = path->slots[0]; i < nritems; i++) { | 3219 | for (i = path->slots[0]; i < nritems; i++) { |
3220 | struct btrfs_dir_item *di; | ||
3221 | |||
3211 | btrfs_item_key_to_cpu(src, &min_key, i); | 3222 | btrfs_item_key_to_cpu(src, &min_key, i); |
3212 | 3223 | ||
3213 | if (min_key.objectid != ino || min_key.type != key_type) | 3224 | if (min_key.objectid != ino || min_key.type != key_type) |
@@ -3218,6 +3229,37 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
3218 | err = ret; | 3229 | err = ret; |
3219 | goto done; | 3230 | goto done; |
3220 | } | 3231 | } |
3232 | |||
3233 | /* | ||
3234 | * We must make sure that when we log a directory entry, | ||
3235 | * the corresponding inode, after log replay, has a | ||
3236 | * matching link count. For example: | ||
3237 | * | ||
3238 | * touch foo | ||
3239 | * mkdir mydir | ||
3240 | * sync | ||
3241 | * ln foo mydir/bar | ||
3242 | * xfs_io -c "fsync" mydir | ||
3243 | * <crash> | ||
3244 | * <mount fs and log replay> | ||
3245 | * | ||
3246 | * Would result in a fsync log that when replayed, our | ||
3247 | * file inode would have a link count of 1, but we get | ||
3248 | * two directory entries pointing to the same inode. | ||
3249 | * After removing one of the names, it would not be | ||
3250 | * possible to remove the other name, which resulted | ||
3251 | * always in stale file handle errors, and would not | ||
3252 | * be possible to rmdir the parent directory, since | ||
3253 | * its i_size could never decrement to the value | ||
3254 | * BTRFS_EMPTY_DIR_SIZE, resulting in -ENOTEMPTY errors. | ||
3255 | */ | ||
3256 | di = btrfs_item_ptr(src, i, struct btrfs_dir_item); | ||
3257 | btrfs_dir_item_key_to_cpu(src, di, &tmp); | ||
3258 | if (ctx && | ||
3259 | (btrfs_dir_transid(src, di) == trans->transid || | ||
3260 | btrfs_dir_type(src, di) == BTRFS_FT_DIR) && | ||
3261 | tmp.type != BTRFS_ROOT_ITEM_KEY) | ||
3262 | ctx->log_new_dentries = true; | ||
3221 | } | 3263 | } |
3222 | path->slots[0] = nritems; | 3264 | path->slots[0] = nritems; |
3223 | 3265 | ||
@@ -3279,7 +3321,8 @@ done: | |||
3279 | static noinline int log_directory_changes(struct btrfs_trans_handle *trans, | 3321 | static noinline int log_directory_changes(struct btrfs_trans_handle *trans, |
3280 | struct btrfs_root *root, struct inode *inode, | 3322 | struct btrfs_root *root, struct inode *inode, |
3281 | struct btrfs_path *path, | 3323 | struct btrfs_path *path, |
3282 | struct btrfs_path *dst_path) | 3324 | struct btrfs_path *dst_path, |
3325 | struct btrfs_log_ctx *ctx) | ||
3283 | { | 3326 | { |
3284 | u64 min_key; | 3327 | u64 min_key; |
3285 | u64 max_key; | 3328 | u64 max_key; |
@@ -3291,7 +3334,7 @@ again: | |||
3291 | max_key = 0; | 3334 | max_key = 0; |
3292 | while (1) { | 3335 | while (1) { |
3293 | ret = log_dir_items(trans, root, inode, path, | 3336 | ret = log_dir_items(trans, root, inode, path, |
3294 | dst_path, key_type, min_key, | 3337 | dst_path, key_type, ctx, min_key, |
3295 | &max_key); | 3338 | &max_key); |
3296 | if (ret) | 3339 | if (ret) |
3297 | return ret; | 3340 | return ret; |
@@ -4067,7 +4110,7 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, | |||
4067 | if (ret < 0) { | 4110 | if (ret < 0) { |
4068 | return ret; | 4111 | return ret; |
4069 | } else if (ret > 0) { | 4112 | } else if (ret > 0) { |
4070 | *size_ret = i_size_read(inode); | 4113 | *size_ret = 0; |
4071 | } else { | 4114 | } else { |
4072 | struct btrfs_inode_item *item; | 4115 | struct btrfs_inode_item *item; |
4073 | 4116 | ||
@@ -4374,15 +4417,18 @@ log_extents: | |||
4374 | } | 4417 | } |
4375 | 4418 | ||
4376 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 4419 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
4377 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 4420 | ret = log_directory_changes(trans, root, inode, path, dst_path, |
4421 | ctx); | ||
4378 | if (ret) { | 4422 | if (ret) { |
4379 | err = ret; | 4423 | err = ret; |
4380 | goto out_unlock; | 4424 | goto out_unlock; |
4381 | } | 4425 | } |
4382 | } | 4426 | } |
4383 | 4427 | ||
4428 | spin_lock(&BTRFS_I(inode)->lock); | ||
4384 | BTRFS_I(inode)->logged_trans = trans->transid; | 4429 | BTRFS_I(inode)->logged_trans = trans->transid; |
4385 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 4430 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
4431 | spin_unlock(&BTRFS_I(inode)->lock); | ||
4386 | out_unlock: | 4432 | out_unlock: |
4387 | if (unlikely(err)) | 4433 | if (unlikely(err)) |
4388 | btrfs_put_logged_extents(&logged_list); | 4434 | btrfs_put_logged_extents(&logged_list); |
@@ -4469,6 +4515,181 @@ out: | |||
4469 | return ret; | 4515 | return ret; |
4470 | } | 4516 | } |
4471 | 4517 | ||
4518 | struct btrfs_dir_list { | ||
4519 | u64 ino; | ||
4520 | struct list_head list; | ||
4521 | }; | ||
4522 | |||
4523 | /* | ||
4524 | * Log the inodes of the new dentries of a directory. See log_dir_items() for | ||
4525 | * details about the why it is needed. | ||
4526 | * This is a recursive operation - if an existing dentry corresponds to a | ||
4527 | * directory, that directory's new entries are logged too (same behaviour as | ||
4528 | * ext3/4, xfs, f2fs, reiserfs, nilfs2). Note that when logging the inodes | ||
4529 | * the dentries point to we do not lock their i_mutex, otherwise lockdep | ||
4530 | * complains about the following circular lock dependency / possible deadlock: | ||
4531 | * | ||
4532 | * CPU0 CPU1 | ||
4533 | * ---- ---- | ||
4534 | * lock(&type->i_mutex_dir_key#3/2); | ||
4535 | * lock(sb_internal#2); | ||
4536 | * lock(&type->i_mutex_dir_key#3/2); | ||
4537 | * lock(&sb->s_type->i_mutex_key#14); | ||
4538 | * | ||
4539 | * Where sb_internal is the lock (a counter that works as a lock) acquired by | ||
4540 | * sb_start_intwrite() in btrfs_start_transaction(). | ||
4541 | * Not locking i_mutex of the inodes is still safe because: | ||
4542 | * | ||
4543 | * 1) For regular files we log with a mode of LOG_INODE_EXISTS. It's possible | ||
4544 | * that while logging the inode new references (names) are added or removed | ||
4545 | * from the inode, leaving the logged inode item with a link count that does | ||
4546 | * not match the number of logged inode reference items. This is fine because | ||
4547 | * at log replay time we compute the real number of links and correct the | ||
4548 | * link count in the inode item (see replay_one_buffer() and | ||
4549 | * link_to_fixup_dir()); | ||
4550 | * | ||
4551 | * 2) For directories we log with a mode of LOG_INODE_ALL. It's possible that | ||
4552 | * while logging the inode's items new items with keys BTRFS_DIR_ITEM_KEY and | ||
4553 | * BTRFS_DIR_INDEX_KEY are added to fs/subvol tree and the logged inode item | ||
4554 | * has a size that doesn't match the sum of the lengths of all the logged | ||
4555 | * names. This does not result in a problem because if a dir_item key is | ||
4556 | * logged but its matching dir_index key is not logged, at log replay time we | ||
4557 | * don't use it to replay the respective name (see replay_one_name()). On the | ||
4558 | * other hand if only the dir_index key ends up being logged, the respective | ||
4559 | * name is added to the fs/subvol tree with both the dir_item and dir_index | ||
4560 | * keys created (see replay_one_name()). | ||
4561 | * The directory's inode item with a wrong i_size is not a problem as well, | ||
4562 | * since we don't use it at log replay time to set the i_size in the inode | ||
4563 | * item of the fs/subvol tree (see overwrite_item()). | ||
4564 | */ | ||
4565 | static int log_new_dir_dentries(struct btrfs_trans_handle *trans, | ||
4566 | struct btrfs_root *root, | ||
4567 | struct inode *start_inode, | ||
4568 | struct btrfs_log_ctx *ctx) | ||
4569 | { | ||
4570 | struct btrfs_root *log = root->log_root; | ||
4571 | struct btrfs_path *path; | ||
4572 | LIST_HEAD(dir_list); | ||
4573 | struct btrfs_dir_list *dir_elem; | ||
4574 | int ret = 0; | ||
4575 | |||
4576 | path = btrfs_alloc_path(); | ||
4577 | if (!path) | ||
4578 | return -ENOMEM; | ||
4579 | |||
4580 | dir_elem = kmalloc(sizeof(*dir_elem), GFP_NOFS); | ||
4581 | if (!dir_elem) { | ||
4582 | btrfs_free_path(path); | ||
4583 | return -ENOMEM; | ||
4584 | } | ||
4585 | dir_elem->ino = btrfs_ino(start_inode); | ||
4586 | list_add_tail(&dir_elem->list, &dir_list); | ||
4587 | |||
4588 | while (!list_empty(&dir_list)) { | ||
4589 | struct extent_buffer *leaf; | ||
4590 | struct btrfs_key min_key; | ||
4591 | int nritems; | ||
4592 | int i; | ||
4593 | |||
4594 | dir_elem = list_first_entry(&dir_list, struct btrfs_dir_list, | ||
4595 | list); | ||
4596 | if (ret) | ||
4597 | goto next_dir_inode; | ||
4598 | |||
4599 | min_key.objectid = dir_elem->ino; | ||
4600 | min_key.type = BTRFS_DIR_ITEM_KEY; | ||
4601 | min_key.offset = 0; | ||
4602 | again: | ||
4603 | btrfs_release_path(path); | ||
4604 | ret = btrfs_search_forward(log, &min_key, path, trans->transid); | ||
4605 | if (ret < 0) { | ||
4606 | goto next_dir_inode; | ||
4607 | } else if (ret > 0) { | ||
4608 | ret = 0; | ||
4609 | goto next_dir_inode; | ||
4610 | } | ||
4611 | |||
4612 | process_leaf: | ||
4613 | leaf = path->nodes[0]; | ||
4614 | nritems = btrfs_header_nritems(leaf); | ||
4615 | for (i = path->slots[0]; i < nritems; i++) { | ||
4616 | struct btrfs_dir_item *di; | ||
4617 | struct btrfs_key di_key; | ||
4618 | struct inode *di_inode; | ||
4619 | struct btrfs_dir_list *new_dir_elem; | ||
4620 | int log_mode = LOG_INODE_EXISTS; | ||
4621 | int type; | ||
4622 | |||
4623 | btrfs_item_key_to_cpu(leaf, &min_key, i); | ||
4624 | if (min_key.objectid != dir_elem->ino || | ||
4625 | min_key.type != BTRFS_DIR_ITEM_KEY) | ||
4626 | goto next_dir_inode; | ||
4627 | |||
4628 | di = btrfs_item_ptr(leaf, i, struct btrfs_dir_item); | ||
4629 | type = btrfs_dir_type(leaf, di); | ||
4630 | if (btrfs_dir_transid(leaf, di) < trans->transid && | ||
4631 | type != BTRFS_FT_DIR) | ||
4632 | continue; | ||
4633 | btrfs_dir_item_key_to_cpu(leaf, di, &di_key); | ||
4634 | if (di_key.type == BTRFS_ROOT_ITEM_KEY) | ||
4635 | continue; | ||
4636 | |||
4637 | di_inode = btrfs_iget(root->fs_info->sb, &di_key, | ||
4638 | root, NULL); | ||
4639 | if (IS_ERR(di_inode)) { | ||
4640 | ret = PTR_ERR(di_inode); | ||
4641 | goto next_dir_inode; | ||
4642 | } | ||
4643 | |||
4644 | if (btrfs_inode_in_log(di_inode, trans->transid)) { | ||
4645 | iput(di_inode); | ||
4646 | continue; | ||
4647 | } | ||
4648 | |||
4649 | ctx->log_new_dentries = false; | ||
4650 | if (type == BTRFS_FT_DIR) | ||
4651 | log_mode = LOG_INODE_ALL; | ||
4652 | btrfs_release_path(path); | ||
4653 | ret = btrfs_log_inode(trans, root, di_inode, | ||
4654 | log_mode, 0, LLONG_MAX, ctx); | ||
4655 | iput(di_inode); | ||
4656 | if (ret) | ||
4657 | goto next_dir_inode; | ||
4658 | if (ctx->log_new_dentries) { | ||
4659 | new_dir_elem = kmalloc(sizeof(*new_dir_elem), | ||
4660 | GFP_NOFS); | ||
4661 | if (!new_dir_elem) { | ||
4662 | ret = -ENOMEM; | ||
4663 | goto next_dir_inode; | ||
4664 | } | ||
4665 | new_dir_elem->ino = di_key.objectid; | ||
4666 | list_add_tail(&new_dir_elem->list, &dir_list); | ||
4667 | } | ||
4668 | break; | ||
4669 | } | ||
4670 | if (i == nritems) { | ||
4671 | ret = btrfs_next_leaf(log, path); | ||
4672 | if (ret < 0) { | ||
4673 | goto next_dir_inode; | ||
4674 | } else if (ret > 0) { | ||
4675 | ret = 0; | ||
4676 | goto next_dir_inode; | ||
4677 | } | ||
4678 | goto process_leaf; | ||
4679 | } | ||
4680 | if (min_key.offset < (u64)-1) { | ||
4681 | min_key.offset++; | ||
4682 | goto again; | ||
4683 | } | ||
4684 | next_dir_inode: | ||
4685 | list_del(&dir_elem->list); | ||
4686 | kfree(dir_elem); | ||
4687 | } | ||
4688 | |||
4689 | btrfs_free_path(path); | ||
4690 | return ret; | ||
4691 | } | ||
4692 | |||
4472 | /* | 4693 | /* |
4473 | * helper function around btrfs_log_inode to make sure newly created | 4694 | * helper function around btrfs_log_inode to make sure newly created |
4474 | * parent directories also end up in the log. A minimal inode and backref | 4695 | * parent directories also end up in the log. A minimal inode and backref |
@@ -4491,6 +4712,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4491 | const struct dentry * const first_parent = parent; | 4712 | const struct dentry * const first_parent = parent; |
4492 | const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > | 4713 | const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > |
4493 | last_committed); | 4714 | last_committed); |
4715 | bool log_dentries = false; | ||
4716 | struct inode *orig_inode = inode; | ||
4494 | 4717 | ||
4495 | sb = inode->i_sb; | 4718 | sb = inode->i_sb; |
4496 | 4719 | ||
@@ -4546,6 +4769,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4546 | goto end_trans; | 4769 | goto end_trans; |
4547 | } | 4770 | } |
4548 | 4771 | ||
4772 | if (S_ISDIR(inode->i_mode) && ctx && ctx->log_new_dentries) | ||
4773 | log_dentries = true; | ||
4774 | |||
4549 | while (1) { | 4775 | while (1) { |
4550 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | 4776 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) |
4551 | break; | 4777 | break; |
@@ -4582,7 +4808,10 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4582 | dput(old_parent); | 4808 | dput(old_parent); |
4583 | old_parent = parent; | 4809 | old_parent = parent; |
4584 | } | 4810 | } |
4585 | ret = 0; | 4811 | if (log_dentries) |
4812 | ret = log_new_dir_dentries(trans, root, orig_inode, ctx); | ||
4813 | else | ||
4814 | ret = 0; | ||
4586 | end_trans: | 4815 | end_trans: |
4587 | dput(old_parent); | 4816 | dput(old_parent); |
4588 | if (ret < 0) { | 4817 | if (ret < 0) { |