aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/tree-log.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-19 17:36:00 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-19 17:36:00 -0500
commit2b9fb532d4168e8974fe49709e2c4c8d5352a64c (patch)
tree610cbe2d1bb32e28db135a767f158ade31452e2e /fs/btrfs/tree-log.c
parent4533f6e27a366ecc3da4876074ebfe0cc0ea4f0f (diff)
parenta742994aa2e271eb8cd8e043d276515ec858ed73 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason: "This pull is mostly cleanups and fixes: - The raid5/6 cleanups from Zhao Lei fixup some long standing warts in the code and add improvements on top of the scrubbing support from 3.19. - Josef has round one of our ENOSPC fixes coming from large btrfs clusters here at FB. - Dave Sterba continues a long series of cleanups (thanks Dave), and Filipe continues hammering on corner cases in fsync and others This all was held up a little trying to track down a use-after-free in btrfs raid5/6. It's not clear yet if this is just made easier to trigger with this pull or if its a new bug from the raid5/6 cleanups. Dave Sterba is the only one to trigger it so far, but he has a consistent way to reproduce, so we'll get it nailed shortly" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits) Btrfs: don't remove extents and xattrs when logging new names Btrfs: fix fsync data loss after adding hard link to inode Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group Btrfs: account for large extents with enospc Btrfs: don't set and clear delalloc for O_DIRECT writes Btrfs: only adjust outstanding_extents when we do a short write btrfs: Fix out-of-space bug Btrfs: scrub, fix sleep in atomic context Btrfs: fix scheduler warning when syncing log Btrfs: Remove unnecessary placeholder in btrfs_err_code btrfs: cleanup init for list in free-space-cache btrfs: delete chunk allocation attemp when setting block group ro btrfs: clear bio reference after submit_one_bio() Btrfs: fix scrub race leading to use-after-free Btrfs: add missing cleanup on sysfs init failure Btrfs: fix race between transaction commit and empty block group removal btrfs: add more checks to btrfs_read_sys_array btrfs: cleanup, rename a few variables in btrfs_read_sys_array btrfs: add checks for sys_chunk_array sizes btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize ...
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r--fs/btrfs/tree-log.c234
1 files changed, 184 insertions, 50 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 1a9585d4380a..9a37f8b39bae 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -453,11 +453,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
453insert: 453insert:
454 btrfs_release_path(path); 454 btrfs_release_path(path);
455 /* try to insert the key into the destination tree */ 455 /* try to insert the key into the destination tree */
456 path->skip_release_on_error = 1;
456 ret = btrfs_insert_empty_item(trans, root, path, 457 ret = btrfs_insert_empty_item(trans, root, path,
457 key, item_size); 458 key, item_size);
459 path->skip_release_on_error = 0;
458 460
459 /* make sure any existing item is the correct size */ 461 /* make sure any existing item is the correct size */
460 if (ret == -EEXIST) { 462 if (ret == -EEXIST || ret == -EOVERFLOW) {
461 u32 found_size; 463 u32 found_size;
462 found_size = btrfs_item_size_nr(path->nodes[0], 464 found_size = btrfs_item_size_nr(path->nodes[0],
463 path->slots[0]); 465 path->slots[0]);
@@ -488,8 +490,20 @@ insert:
488 src_item = (struct btrfs_inode_item *)src_ptr; 490 src_item = (struct btrfs_inode_item *)src_ptr;
489 dst_item = (struct btrfs_inode_item *)dst_ptr; 491 dst_item = (struct btrfs_inode_item *)dst_ptr;
490 492
491 if (btrfs_inode_generation(eb, src_item) == 0) 493 if (btrfs_inode_generation(eb, src_item) == 0) {
494 struct extent_buffer *dst_eb = path->nodes[0];
495
496 if (S_ISREG(btrfs_inode_mode(eb, src_item)) &&
497 S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) {
498 struct btrfs_map_token token;
499 u64 ino_size = btrfs_inode_size(eb, src_item);
500
501 btrfs_init_map_token(&token);
502 btrfs_set_token_inode_size(dst_eb, dst_item,
503 ino_size, &token);
504 }
492 goto no_copy; 505 goto no_copy;
506 }
493 507
494 if (overwrite_root && 508 if (overwrite_root &&
495 S_ISDIR(btrfs_inode_mode(eb, src_item)) && 509 S_ISDIR(btrfs_inode_mode(eb, src_item)) &&
@@ -844,7 +858,7 @@ out:
844static noinline int backref_in_log(struct btrfs_root *log, 858static noinline int backref_in_log(struct btrfs_root *log,
845 struct btrfs_key *key, 859 struct btrfs_key *key,
846 u64 ref_objectid, 860 u64 ref_objectid,
847 char *name, int namelen) 861 const char *name, int namelen)
848{ 862{
849 struct btrfs_path *path; 863 struct btrfs_path *path;
850 struct btrfs_inode_ref *ref; 864 struct btrfs_inode_ref *ref;
@@ -1254,13 +1268,14 @@ out:
1254} 1268}
1255 1269
1256static int insert_orphan_item(struct btrfs_trans_handle *trans, 1270static int insert_orphan_item(struct btrfs_trans_handle *trans,
1257 struct btrfs_root *root, u64 offset) 1271 struct btrfs_root *root, u64 ino)
1258{ 1272{
1259 int ret; 1273 int ret;
1260 ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID, 1274
1261 offset, BTRFS_ORPHAN_ITEM_KEY, NULL); 1275 ret = btrfs_insert_orphan_item(trans, root, ino);
1262 if (ret > 0) 1276 if (ret == -EEXIST)
1263 ret = btrfs_insert_orphan_item(trans, root, offset); 1277 ret = 0;
1278
1264 return ret; 1279 return ret;
1265} 1280}
1266 1281
@@ -1287,6 +1302,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
1287 leaf = path->nodes[0]; 1302 leaf = path->nodes[0];
1288 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1303 item_size = btrfs_item_size_nr(leaf, path->slots[0]);
1289 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 1304 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
1305 cur_offset = 0;
1290 1306
1291 while (cur_offset < item_size) { 1307 while (cur_offset < item_size) {
1292 extref = (struct btrfs_inode_extref *) (ptr + cur_offset); 1308 extref = (struct btrfs_inode_extref *) (ptr + cur_offset);
@@ -1302,7 +1318,7 @@ static int count_inode_extrefs(struct btrfs_root *root,
1302 } 1318 }
1303 btrfs_release_path(path); 1319 btrfs_release_path(path);
1304 1320
1305 if (ret < 0) 1321 if (ret < 0 && ret != -ENOENT)
1306 return ret; 1322 return ret;
1307 return nlink; 1323 return nlink;
1308} 1324}
@@ -1394,9 +1410,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
1394 nlink = ret; 1410 nlink = ret;
1395 1411
1396 ret = count_inode_extrefs(root, inode, path); 1412 ret = count_inode_extrefs(root, inode, path);
1397 if (ret == -ENOENT)
1398 ret = 0;
1399
1400 if (ret < 0) 1413 if (ret < 0)
1401 goto out; 1414 goto out;
1402 1415
@@ -1557,6 +1570,30 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
1557} 1570}
1558 1571
1559/* 1572/*
1573 * Return true if an inode reference exists in the log for the given name,
1574 * inode and parent inode.
1575 */
1576static bool name_in_log_ref(struct btrfs_root *log_root,
1577 const char *name, const int name_len,
1578 const u64 dirid, const u64 ino)
1579{
1580 struct btrfs_key search_key;
1581
1582 search_key.objectid = ino;
1583 search_key.type = BTRFS_INODE_REF_KEY;
1584 search_key.offset = dirid;
1585 if (backref_in_log(log_root, &search_key, dirid, name, name_len))
1586 return true;
1587
1588 search_key.type = BTRFS_INODE_EXTREF_KEY;
1589 search_key.offset = btrfs_extref_hash(dirid, name, name_len);
1590 if (backref_in_log(log_root, &search_key, dirid, name, name_len))
1591 return true;
1592
1593 return false;
1594}
1595
1596/*
1560 * take a single entry in a log directory item and replay it into 1597 * take a single entry in a log directory item and replay it into
1561 * the subvolume. 1598 * the subvolume.
1562 * 1599 *
@@ -1666,10 +1703,17 @@ out:
1666 return ret; 1703 return ret;
1667 1704
1668insert: 1705insert:
1706 if (name_in_log_ref(root->log_root, name, name_len,
1707 key->objectid, log_key.objectid)) {
1708 /* The dentry will be added later. */
1709 ret = 0;
1710 update_size = false;
1711 goto out;
1712 }
1669 btrfs_release_path(path); 1713 btrfs_release_path(path);
1670 ret = insert_one_name(trans, root, path, key->objectid, key->offset, 1714 ret = insert_one_name(trans, root, path, key->objectid, key->offset,
1671 name, name_len, log_type, &log_key); 1715 name, name_len, log_type, &log_key);
1672 if (ret && ret != -ENOENT) 1716 if (ret && ret != -ENOENT && ret != -EEXIST)
1673 goto out; 1717 goto out;
1674 update_size = false; 1718 update_size = false;
1675 ret = 0; 1719 ret = 0;
@@ -2164,7 +2208,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
2164 parent = path->nodes[*level]; 2208 parent = path->nodes[*level];
2165 root_owner = btrfs_header_owner(parent); 2209 root_owner = btrfs_header_owner(parent);
2166 2210
2167 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 2211 next = btrfs_find_create_tree_block(root, bytenr);
2168 if (!next) 2212 if (!next)
2169 return -ENOMEM; 2213 return -ENOMEM;
2170 2214
@@ -2416,8 +2460,8 @@ static void wait_for_writer(struct btrfs_trans_handle *trans,
2416 mutex_unlock(&root->log_mutex); 2460 mutex_unlock(&root->log_mutex);
2417 if (atomic_read(&root->log_writers)) 2461 if (atomic_read(&root->log_writers))
2418 schedule(); 2462 schedule();
2419 mutex_lock(&root->log_mutex);
2420 finish_wait(&root->log_writer_wait, &wait); 2463 finish_wait(&root->log_writer_wait, &wait);
2464 mutex_lock(&root->log_mutex);
2421 } 2465 }
2422} 2466}
2423 2467
@@ -3219,7 +3263,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans,
3219static void fill_inode_item(struct btrfs_trans_handle *trans, 3263static void fill_inode_item(struct btrfs_trans_handle *trans,
3220 struct extent_buffer *leaf, 3264 struct extent_buffer *leaf,
3221 struct btrfs_inode_item *item, 3265 struct btrfs_inode_item *item,
3222 struct inode *inode, int log_inode_only) 3266 struct inode *inode, int log_inode_only,
3267 u64 logged_isize)
3223{ 3268{
3224 struct btrfs_map_token token; 3269 struct btrfs_map_token token;
3225 3270
@@ -3232,7 +3277,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
3232 * to say 'update this inode with these values' 3277 * to say 'update this inode with these values'
3233 */ 3278 */
3234 btrfs_set_token_inode_generation(leaf, item, 0, &token); 3279 btrfs_set_token_inode_generation(leaf, item, 0, &token);
3235 btrfs_set_token_inode_size(leaf, item, 0, &token); 3280 btrfs_set_token_inode_size(leaf, item, logged_isize, &token);
3236 } else { 3281 } else {
3237 btrfs_set_token_inode_generation(leaf, item, 3282 btrfs_set_token_inode_generation(leaf, item,
3238 BTRFS_I(inode)->generation, 3283 BTRFS_I(inode)->generation,
@@ -3245,19 +3290,19 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
3245 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); 3290 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
3246 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); 3291 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
3247 3292
3248 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), 3293 btrfs_set_token_timespec_sec(leaf, &item->atime,
3249 inode->i_atime.tv_sec, &token); 3294 inode->i_atime.tv_sec, &token);
3250 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), 3295 btrfs_set_token_timespec_nsec(leaf, &item->atime,
3251 inode->i_atime.tv_nsec, &token); 3296 inode->i_atime.tv_nsec, &token);
3252 3297
3253 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), 3298 btrfs_set_token_timespec_sec(leaf, &item->mtime,
3254 inode->i_mtime.tv_sec, &token); 3299 inode->i_mtime.tv_sec, &token);
3255 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), 3300 btrfs_set_token_timespec_nsec(leaf, &item->mtime,
3256 inode->i_mtime.tv_nsec, &token); 3301 inode->i_mtime.tv_nsec, &token);
3257 3302
3258 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), 3303 btrfs_set_token_timespec_sec(leaf, &item->ctime,
3259 inode->i_ctime.tv_sec, &token); 3304 inode->i_ctime.tv_sec, &token);
3260 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), 3305 btrfs_set_token_timespec_nsec(leaf, &item->ctime,
3261 inode->i_ctime.tv_nsec, &token); 3306 inode->i_ctime.tv_nsec, &token);
3262 3307
3263 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), 3308 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
@@ -3284,7 +3329,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
3284 return ret; 3329 return ret;
3285 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3330 inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3286 struct btrfs_inode_item); 3331 struct btrfs_inode_item);
3287 fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); 3332 fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0);
3288 btrfs_release_path(path); 3333 btrfs_release_path(path);
3289 return 0; 3334 return 0;
3290} 3335}
@@ -3293,7 +3338,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3293 struct inode *inode, 3338 struct inode *inode,
3294 struct btrfs_path *dst_path, 3339 struct btrfs_path *dst_path,
3295 struct btrfs_path *src_path, u64 *last_extent, 3340 struct btrfs_path *src_path, u64 *last_extent,
3296 int start_slot, int nr, int inode_only) 3341 int start_slot, int nr, int inode_only,
3342 u64 logged_isize)
3297{ 3343{
3298 unsigned long src_offset; 3344 unsigned long src_offset;
3299 unsigned long dst_offset; 3345 unsigned long dst_offset;
@@ -3350,7 +3396,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans,
3350 dst_path->slots[0], 3396 dst_path->slots[0],
3351 struct btrfs_inode_item); 3397 struct btrfs_inode_item);
3352 fill_inode_item(trans, dst_path->nodes[0], inode_item, 3398 fill_inode_item(trans, dst_path->nodes[0], inode_item,
3353 inode, inode_only == LOG_INODE_EXISTS); 3399 inode, inode_only == LOG_INODE_EXISTS,
3400 logged_isize);
3354 } else { 3401 } else {
3355 copy_extent_buffer(dst_path->nodes[0], src, dst_offset, 3402 copy_extent_buffer(dst_path->nodes[0], src, dst_offset,
3356 src_offset, ins_sizes[i]); 3403 src_offset, ins_sizes[i]);
@@ -3902,6 +3949,33 @@ process:
3902 return ret; 3949 return ret;
3903} 3950}
3904 3951
3952static int logged_inode_size(struct btrfs_root *log, struct inode *inode,
3953 struct btrfs_path *path, u64 *size_ret)
3954{
3955 struct btrfs_key key;
3956 int ret;
3957
3958 key.objectid = btrfs_ino(inode);
3959 key.type = BTRFS_INODE_ITEM_KEY;
3960 key.offset = 0;
3961
3962 ret = btrfs_search_slot(NULL, log, &key, path, 0, 0);
3963 if (ret < 0) {
3964 return ret;
3965 } else if (ret > 0) {
3966 *size_ret = i_size_read(inode);
3967 } else {
3968 struct btrfs_inode_item *item;
3969
3970 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
3971 struct btrfs_inode_item);
3972 *size_ret = btrfs_inode_size(path->nodes[0], item);
3973 }
3974
3975 btrfs_release_path(path);
3976 return 0;
3977}
3978
3905/* log a single inode in the tree log. 3979/* log a single inode in the tree log.
3906 * At least one parent directory for this inode must exist in the tree 3980 * At least one parent directory for this inode must exist in the tree
3907 * or be logged already. 3981 * or be logged already.
@@ -3939,6 +4013,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3939 bool fast_search = false; 4013 bool fast_search = false;
3940 u64 ino = btrfs_ino(inode); 4014 u64 ino = btrfs_ino(inode);
3941 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 4015 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
4016 u64 logged_isize = 0;
3942 4017
3943 path = btrfs_alloc_path(); 4018 path = btrfs_alloc_path();
3944 if (!path) 4019 if (!path)
@@ -3966,15 +4041,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3966 max_key.type = (u8)-1; 4041 max_key.type = (u8)-1;
3967 max_key.offset = (u64)-1; 4042 max_key.offset = (u64)-1;
3968 4043
3969 /* Only run delayed items if we are a dir or a new file */ 4044 /*
4045 * Only run delayed items if we are a dir or a new file.
4046 * Otherwise commit the delayed inode only, which is needed in
4047 * order for the log replay code to mark inodes for link count
4048 * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items).
4049 */
3970 if (S_ISDIR(inode->i_mode) || 4050 if (S_ISDIR(inode->i_mode) ||
3971 BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { 4051 BTRFS_I(inode)->generation > root->fs_info->last_trans_committed)
3972 ret = btrfs_commit_inode_delayed_items(trans, inode); 4052 ret = btrfs_commit_inode_delayed_items(trans, inode);
3973 if (ret) { 4053 else
3974 btrfs_free_path(path); 4054 ret = btrfs_commit_inode_delayed_inode(inode);
3975 btrfs_free_path(dst_path); 4055
3976 return ret; 4056 if (ret) {
3977 } 4057 btrfs_free_path(path);
4058 btrfs_free_path(dst_path);
4059 return ret;
3978 } 4060 }
3979 4061
3980 mutex_lock(&BTRFS_I(inode)->log_mutex); 4062 mutex_lock(&BTRFS_I(inode)->log_mutex);
@@ -3988,22 +4070,56 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3988 if (S_ISDIR(inode->i_mode)) { 4070 if (S_ISDIR(inode->i_mode)) {
3989 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; 4071 int max_key_type = BTRFS_DIR_LOG_INDEX_KEY;
3990 4072
3991 if (inode_only == LOG_INODE_EXISTS) 4073 if (inode_only == LOG_INODE_EXISTS) {
3992 max_key_type = BTRFS_XATTR_ITEM_KEY; 4074 max_key_type = BTRFS_INODE_EXTREF_KEY;
4075 max_key.type = max_key_type;
4076 }
3993 ret = drop_objectid_items(trans, log, path, ino, max_key_type); 4077 ret = drop_objectid_items(trans, log, path, ino, max_key_type);
3994 } else { 4078 } else {
3995 if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 4079 if (inode_only == LOG_INODE_EXISTS) {
3996 &BTRFS_I(inode)->runtime_flags)) { 4080 /*
3997 clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4081 * Make sure the new inode item we write to the log has
3998 &BTRFS_I(inode)->runtime_flags); 4082 * the same isize as the current one (if it exists).
3999 ret = btrfs_truncate_inode_items(trans, log, 4083 * This is necessary to prevent data loss after log
4000 inode, 0, 0); 4084 * replay, and also to prevent doing a wrong expanding
4001 } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, 4085 * truncate - for e.g. create file, write 4K into offset
4002 &BTRFS_I(inode)->runtime_flags) || 4086 * 0, fsync, write 4K into offset 4096, add hard link,
4087 * fsync some other file (to sync log), power fail - if
4088 * we use the inode's current i_size, after log replay
4089 * we get a 8Kb file, with the last 4Kb extent as a hole
4090 * (zeroes), as if an expanding truncate happened,
4091 * instead of getting a file of 4Kb only.
4092 */
4093 err = logged_inode_size(log, inode, path,
4094 &logged_isize);
4095 if (err)
4096 goto out_unlock;
4097 }
4098 if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4099 &BTRFS_I(inode)->runtime_flags)) {
4100 if (inode_only == LOG_INODE_EXISTS) {
4101 max_key.type = BTRFS_INODE_EXTREF_KEY;
4102 ret = drop_objectid_items(trans, log, path, ino,
4103 max_key.type);
4104 } else {
4105 clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
4106 &BTRFS_I(inode)->runtime_flags);
4107 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4108 &BTRFS_I(inode)->runtime_flags);
4109 ret = btrfs_truncate_inode_items(trans, log,
4110 inode, 0, 0);
4111 }
4112 } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING,
4113 &BTRFS_I(inode)->runtime_flags) ||
4003 inode_only == LOG_INODE_EXISTS) { 4114 inode_only == LOG_INODE_EXISTS) {
4004 if (inode_only == LOG_INODE_ALL) 4115 if (inode_only == LOG_INODE_ALL) {
4116 clear_bit(BTRFS_INODE_COPY_EVERYTHING,
4117 &BTRFS_I(inode)->runtime_flags);
4005 fast_search = true; 4118 fast_search = true;
4006 max_key.type = BTRFS_XATTR_ITEM_KEY; 4119 max_key.type = BTRFS_XATTR_ITEM_KEY;
4120 } else {
4121 max_key.type = BTRFS_INODE_EXTREF_KEY;
4122 }
4007 ret = drop_objectid_items(trans, log, path, ino, 4123 ret = drop_objectid_items(trans, log, path, ino,
4008 max_key.type); 4124 max_key.type);
4009 } else { 4125 } else {
@@ -4047,7 +4163,8 @@ again:
4047 } 4163 }
4048 4164
4049 ret = copy_items(trans, inode, dst_path, path, &last_extent, 4165 ret = copy_items(trans, inode, dst_path, path, &last_extent,
4050 ins_start_slot, ins_nr, inode_only); 4166 ins_start_slot, ins_nr, inode_only,
4167 logged_isize);
4051 if (ret < 0) { 4168 if (ret < 0) {
4052 err = ret; 4169 err = ret;
4053 goto out_unlock; 4170 goto out_unlock;
@@ -4071,7 +4188,7 @@ next_slot:
4071 if (ins_nr) { 4188 if (ins_nr) {
4072 ret = copy_items(trans, inode, dst_path, path, 4189 ret = copy_items(trans, inode, dst_path, path,
4073 &last_extent, ins_start_slot, 4190 &last_extent, ins_start_slot,
4074 ins_nr, inode_only); 4191 ins_nr, inode_only, logged_isize);
4075 if (ret < 0) { 4192 if (ret < 0) {
4076 err = ret; 4193 err = ret;
4077 goto out_unlock; 4194 goto out_unlock;
@@ -4092,7 +4209,8 @@ next_slot:
4092 } 4209 }
4093 if (ins_nr) { 4210 if (ins_nr) {
4094 ret = copy_items(trans, inode, dst_path, path, &last_extent, 4211 ret = copy_items(trans, inode, dst_path, path, &last_extent,
4095 ins_start_slot, ins_nr, inode_only); 4212 ins_start_slot, ins_nr, inode_only,
4213 logged_isize);
4096 if (ret < 0) { 4214 if (ret < 0) {
4097 err = ret; 4215 err = ret;
4098 goto out_unlock; 4216 goto out_unlock;
@@ -4273,6 +4391,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4273 struct dentry *old_parent = NULL; 4391 struct dentry *old_parent = NULL;
4274 int ret = 0; 4392 int ret = 0;
4275 u64 last_committed = root->fs_info->last_trans_committed; 4393 u64 last_committed = root->fs_info->last_trans_committed;
4394 const struct dentry * const first_parent = parent;
4395 const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans >
4396 last_committed);
4276 4397
4277 sb = inode->i_sb; 4398 sb = inode->i_sb;
4278 4399
@@ -4328,7 +4449,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4328 goto end_trans; 4449 goto end_trans;
4329 } 4450 }
4330 4451
4331 inode_only = LOG_INODE_EXISTS;
4332 while (1) { 4452 while (1) {
4333 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) 4453 if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb)
4334 break; 4454 break;
@@ -4337,8 +4457,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
4337 if (root != BTRFS_I(inode)->root) 4457 if (root != BTRFS_I(inode)->root)
4338 break; 4458 break;
4339 4459
4460 /*
4461 * On unlink we must make sure our immediate parent directory
4462 * inode is fully logged. This is to prevent leaving dangling
4463 * directory index entries and a wrong directory inode's i_size.
4464 * Not doing so can result in a directory being impossible to
4465 * delete after log replay (rmdir will always fail with error
4466 * -ENOTEMPTY).
4467 */
4468 if (did_unlink && parent == first_parent)
4469 inode_only = LOG_INODE_ALL;
4470 else
4471 inode_only = LOG_INODE_EXISTS;
4472
4340 if (BTRFS_I(inode)->generation > 4473 if (BTRFS_I(inode)->generation >
4341 root->fs_info->last_trans_committed) { 4474 root->fs_info->last_trans_committed ||
4475 inode_only == LOG_INODE_ALL) {
4342 ret = btrfs_log_inode(trans, root, inode, inode_only, 4476 ret = btrfs_log_inode(trans, root, inode, inode_only,
4343 0, LLONG_MAX, ctx); 4477 0, LLONG_MAX, ctx);
4344 if (ret) 4478 if (ret)