diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-19 17:36:00 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-19 17:36:00 -0500 |
commit | 2b9fb532d4168e8974fe49709e2c4c8d5352a64c (patch) | |
tree | 610cbe2d1bb32e28db135a767f158ade31452e2e /fs/btrfs/tree-log.c | |
parent | 4533f6e27a366ecc3da4876074ebfe0cc0ea4f0f (diff) | |
parent | a742994aa2e271eb8cd8e043d276515ec858ed73 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs updates from Chris Mason:
"This pull is mostly cleanups and fixes:
- The raid5/6 cleanups from Zhao Lei fixup some long standing warts
in the code and add improvements on top of the scrubbing support
from 3.19.
- Josef has round one of our ENOSPC fixes coming from large btrfs
clusters here at FB.
- Dave Sterba continues a long series of cleanups (thanks Dave), and
Filipe continues hammering on corner cases in fsync and others
This all was held up a little trying to track down a use-after-free in
btrfs raid5/6. It's not clear yet if this is just made easier to
trigger with this pull or if its a new bug from the raid5/6 cleanups.
Dave Sterba is the only one to trigger it so far, but he has a
consistent way to reproduce, so we'll get it nailed shortly"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (68 commits)
Btrfs: don't remove extents and xattrs when logging new names
Btrfs: fix fsync data loss after adding hard link to inode
Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group
Btrfs: account for large extents with enospc
Btrfs: don't set and clear delalloc for O_DIRECT writes
Btrfs: only adjust outstanding_extents when we do a short write
btrfs: Fix out-of-space bug
Btrfs: scrub, fix sleep in atomic context
Btrfs: fix scheduler warning when syncing log
Btrfs: Remove unnecessary placeholder in btrfs_err_code
btrfs: cleanup init for list in free-space-cache
btrfs: delete chunk allocation attemp when setting block group ro
btrfs: clear bio reference after submit_one_bio()
Btrfs: fix scrub race leading to use-after-free
Btrfs: add missing cleanup on sysfs init failure
Btrfs: fix race between transaction commit and empty block group removal
btrfs: add more checks to btrfs_read_sys_array
btrfs: cleanup, rename a few variables in btrfs_read_sys_array
btrfs: add checks for sys_chunk_array sizes
btrfs: more superblock checks, lower bounds on devices and sectorsize/nodesize
...
Diffstat (limited to 'fs/btrfs/tree-log.c')
-rw-r--r-- | fs/btrfs/tree-log.c | 234 |
1 files changed, 184 insertions, 50 deletions
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1a9585d4380a..9a37f8b39bae 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -453,11 +453,13 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans, | |||
453 | insert: | 453 | insert: |
454 | btrfs_release_path(path); | 454 | btrfs_release_path(path); |
455 | /* try to insert the key into the destination tree */ | 455 | /* try to insert the key into the destination tree */ |
456 | path->skip_release_on_error = 1; | ||
456 | ret = btrfs_insert_empty_item(trans, root, path, | 457 | ret = btrfs_insert_empty_item(trans, root, path, |
457 | key, item_size); | 458 | key, item_size); |
459 | path->skip_release_on_error = 0; | ||
458 | 460 | ||
459 | /* make sure any existing item is the correct size */ | 461 | /* make sure any existing item is the correct size */ |
460 | if (ret == -EEXIST) { | 462 | if (ret == -EEXIST || ret == -EOVERFLOW) { |
461 | u32 found_size; | 463 | u32 found_size; |
462 | found_size = btrfs_item_size_nr(path->nodes[0], | 464 | found_size = btrfs_item_size_nr(path->nodes[0], |
463 | path->slots[0]); | 465 | path->slots[0]); |
@@ -488,8 +490,20 @@ insert: | |||
488 | src_item = (struct btrfs_inode_item *)src_ptr; | 490 | src_item = (struct btrfs_inode_item *)src_ptr; |
489 | dst_item = (struct btrfs_inode_item *)dst_ptr; | 491 | dst_item = (struct btrfs_inode_item *)dst_ptr; |
490 | 492 | ||
491 | if (btrfs_inode_generation(eb, src_item) == 0) | 493 | if (btrfs_inode_generation(eb, src_item) == 0) { |
494 | struct extent_buffer *dst_eb = path->nodes[0]; | ||
495 | |||
496 | if (S_ISREG(btrfs_inode_mode(eb, src_item)) && | ||
497 | S_ISREG(btrfs_inode_mode(dst_eb, dst_item))) { | ||
498 | struct btrfs_map_token token; | ||
499 | u64 ino_size = btrfs_inode_size(eb, src_item); | ||
500 | |||
501 | btrfs_init_map_token(&token); | ||
502 | btrfs_set_token_inode_size(dst_eb, dst_item, | ||
503 | ino_size, &token); | ||
504 | } | ||
492 | goto no_copy; | 505 | goto no_copy; |
506 | } | ||
493 | 507 | ||
494 | if (overwrite_root && | 508 | if (overwrite_root && |
495 | S_ISDIR(btrfs_inode_mode(eb, src_item)) && | 509 | S_ISDIR(btrfs_inode_mode(eb, src_item)) && |
@@ -844,7 +858,7 @@ out: | |||
844 | static noinline int backref_in_log(struct btrfs_root *log, | 858 | static noinline int backref_in_log(struct btrfs_root *log, |
845 | struct btrfs_key *key, | 859 | struct btrfs_key *key, |
846 | u64 ref_objectid, | 860 | u64 ref_objectid, |
847 | char *name, int namelen) | 861 | const char *name, int namelen) |
848 | { | 862 | { |
849 | struct btrfs_path *path; | 863 | struct btrfs_path *path; |
850 | struct btrfs_inode_ref *ref; | 864 | struct btrfs_inode_ref *ref; |
@@ -1254,13 +1268,14 @@ out: | |||
1254 | } | 1268 | } |
1255 | 1269 | ||
1256 | static int insert_orphan_item(struct btrfs_trans_handle *trans, | 1270 | static int insert_orphan_item(struct btrfs_trans_handle *trans, |
1257 | struct btrfs_root *root, u64 offset) | 1271 | struct btrfs_root *root, u64 ino) |
1258 | { | 1272 | { |
1259 | int ret; | 1273 | int ret; |
1260 | ret = btrfs_find_item(root, NULL, BTRFS_ORPHAN_OBJECTID, | 1274 | |
1261 | offset, BTRFS_ORPHAN_ITEM_KEY, NULL); | 1275 | ret = btrfs_insert_orphan_item(trans, root, ino); |
1262 | if (ret > 0) | 1276 | if (ret == -EEXIST) |
1263 | ret = btrfs_insert_orphan_item(trans, root, offset); | 1277 | ret = 0; |
1278 | |||
1264 | return ret; | 1279 | return ret; |
1265 | } | 1280 | } |
1266 | 1281 | ||
@@ -1287,6 +1302,7 @@ static int count_inode_extrefs(struct btrfs_root *root, | |||
1287 | leaf = path->nodes[0]; | 1302 | leaf = path->nodes[0]; |
1288 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | 1303 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); |
1289 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); | 1304 | ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); |
1305 | cur_offset = 0; | ||
1290 | 1306 | ||
1291 | while (cur_offset < item_size) { | 1307 | while (cur_offset < item_size) { |
1292 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); | 1308 | extref = (struct btrfs_inode_extref *) (ptr + cur_offset); |
@@ -1302,7 +1318,7 @@ static int count_inode_extrefs(struct btrfs_root *root, | |||
1302 | } | 1318 | } |
1303 | btrfs_release_path(path); | 1319 | btrfs_release_path(path); |
1304 | 1320 | ||
1305 | if (ret < 0) | 1321 | if (ret < 0 && ret != -ENOENT) |
1306 | return ret; | 1322 | return ret; |
1307 | return nlink; | 1323 | return nlink; |
1308 | } | 1324 | } |
@@ -1394,9 +1410,6 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
1394 | nlink = ret; | 1410 | nlink = ret; |
1395 | 1411 | ||
1396 | ret = count_inode_extrefs(root, inode, path); | 1412 | ret = count_inode_extrefs(root, inode, path); |
1397 | if (ret == -ENOENT) | ||
1398 | ret = 0; | ||
1399 | |||
1400 | if (ret < 0) | 1413 | if (ret < 0) |
1401 | goto out; | 1414 | goto out; |
1402 | 1415 | ||
@@ -1557,6 +1570,30 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans, | |||
1557 | } | 1570 | } |
1558 | 1571 | ||
1559 | /* | 1572 | /* |
1573 | * Return true if an inode reference exists in the log for the given name, | ||
1574 | * inode and parent inode. | ||
1575 | */ | ||
1576 | static bool name_in_log_ref(struct btrfs_root *log_root, | ||
1577 | const char *name, const int name_len, | ||
1578 | const u64 dirid, const u64 ino) | ||
1579 | { | ||
1580 | struct btrfs_key search_key; | ||
1581 | |||
1582 | search_key.objectid = ino; | ||
1583 | search_key.type = BTRFS_INODE_REF_KEY; | ||
1584 | search_key.offset = dirid; | ||
1585 | if (backref_in_log(log_root, &search_key, dirid, name, name_len)) | ||
1586 | return true; | ||
1587 | |||
1588 | search_key.type = BTRFS_INODE_EXTREF_KEY; | ||
1589 | search_key.offset = btrfs_extref_hash(dirid, name, name_len); | ||
1590 | if (backref_in_log(log_root, &search_key, dirid, name, name_len)) | ||
1591 | return true; | ||
1592 | |||
1593 | return false; | ||
1594 | } | ||
1595 | |||
1596 | /* | ||
1560 | * take a single entry in a log directory item and replay it into | 1597 | * take a single entry in a log directory item and replay it into |
1561 | * the subvolume. | 1598 | * the subvolume. |
1562 | * | 1599 | * |
@@ -1666,10 +1703,17 @@ out: | |||
1666 | return ret; | 1703 | return ret; |
1667 | 1704 | ||
1668 | insert: | 1705 | insert: |
1706 | if (name_in_log_ref(root->log_root, name, name_len, | ||
1707 | key->objectid, log_key.objectid)) { | ||
1708 | /* The dentry will be added later. */ | ||
1709 | ret = 0; | ||
1710 | update_size = false; | ||
1711 | goto out; | ||
1712 | } | ||
1669 | btrfs_release_path(path); | 1713 | btrfs_release_path(path); |
1670 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, | 1714 | ret = insert_one_name(trans, root, path, key->objectid, key->offset, |
1671 | name, name_len, log_type, &log_key); | 1715 | name, name_len, log_type, &log_key); |
1672 | if (ret && ret != -ENOENT) | 1716 | if (ret && ret != -ENOENT && ret != -EEXIST) |
1673 | goto out; | 1717 | goto out; |
1674 | update_size = false; | 1718 | update_size = false; |
1675 | ret = 0; | 1719 | ret = 0; |
@@ -2164,7 +2208,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
2164 | parent = path->nodes[*level]; | 2208 | parent = path->nodes[*level]; |
2165 | root_owner = btrfs_header_owner(parent); | 2209 | root_owner = btrfs_header_owner(parent); |
2166 | 2210 | ||
2167 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 2211 | next = btrfs_find_create_tree_block(root, bytenr); |
2168 | if (!next) | 2212 | if (!next) |
2169 | return -ENOMEM; | 2213 | return -ENOMEM; |
2170 | 2214 | ||
@@ -2416,8 +2460,8 @@ static void wait_for_writer(struct btrfs_trans_handle *trans, | |||
2416 | mutex_unlock(&root->log_mutex); | 2460 | mutex_unlock(&root->log_mutex); |
2417 | if (atomic_read(&root->log_writers)) | 2461 | if (atomic_read(&root->log_writers)) |
2418 | schedule(); | 2462 | schedule(); |
2419 | mutex_lock(&root->log_mutex); | ||
2420 | finish_wait(&root->log_writer_wait, &wait); | 2463 | finish_wait(&root->log_writer_wait, &wait); |
2464 | mutex_lock(&root->log_mutex); | ||
2421 | } | 2465 | } |
2422 | } | 2466 | } |
2423 | 2467 | ||
@@ -3219,7 +3263,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
3219 | static void fill_inode_item(struct btrfs_trans_handle *trans, | 3263 | static void fill_inode_item(struct btrfs_trans_handle *trans, |
3220 | struct extent_buffer *leaf, | 3264 | struct extent_buffer *leaf, |
3221 | struct btrfs_inode_item *item, | 3265 | struct btrfs_inode_item *item, |
3222 | struct inode *inode, int log_inode_only) | 3266 | struct inode *inode, int log_inode_only, |
3267 | u64 logged_isize) | ||
3223 | { | 3268 | { |
3224 | struct btrfs_map_token token; | 3269 | struct btrfs_map_token token; |
3225 | 3270 | ||
@@ -3232,7 +3277,7 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
3232 | * to say 'update this inode with these values' | 3277 | * to say 'update this inode with these values' |
3233 | */ | 3278 | */ |
3234 | btrfs_set_token_inode_generation(leaf, item, 0, &token); | 3279 | btrfs_set_token_inode_generation(leaf, item, 0, &token); |
3235 | btrfs_set_token_inode_size(leaf, item, 0, &token); | 3280 | btrfs_set_token_inode_size(leaf, item, logged_isize, &token); |
3236 | } else { | 3281 | } else { |
3237 | btrfs_set_token_inode_generation(leaf, item, | 3282 | btrfs_set_token_inode_generation(leaf, item, |
3238 | BTRFS_I(inode)->generation, | 3283 | BTRFS_I(inode)->generation, |
@@ -3245,19 +3290,19 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
3245 | btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); | 3290 | btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); |
3246 | btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); | 3291 | btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); |
3247 | 3292 | ||
3248 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), | 3293 | btrfs_set_token_timespec_sec(leaf, &item->atime, |
3249 | inode->i_atime.tv_sec, &token); | 3294 | inode->i_atime.tv_sec, &token); |
3250 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), | 3295 | btrfs_set_token_timespec_nsec(leaf, &item->atime, |
3251 | inode->i_atime.tv_nsec, &token); | 3296 | inode->i_atime.tv_nsec, &token); |
3252 | 3297 | ||
3253 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), | 3298 | btrfs_set_token_timespec_sec(leaf, &item->mtime, |
3254 | inode->i_mtime.tv_sec, &token); | 3299 | inode->i_mtime.tv_sec, &token); |
3255 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), | 3300 | btrfs_set_token_timespec_nsec(leaf, &item->mtime, |
3256 | inode->i_mtime.tv_nsec, &token); | 3301 | inode->i_mtime.tv_nsec, &token); |
3257 | 3302 | ||
3258 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), | 3303 | btrfs_set_token_timespec_sec(leaf, &item->ctime, |
3259 | inode->i_ctime.tv_sec, &token); | 3304 | inode->i_ctime.tv_sec, &token); |
3260 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), | 3305 | btrfs_set_token_timespec_nsec(leaf, &item->ctime, |
3261 | inode->i_ctime.tv_nsec, &token); | 3306 | inode->i_ctime.tv_nsec, &token); |
3262 | 3307 | ||
3263 | btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), | 3308 | btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), |
@@ -3284,7 +3329,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans, | |||
3284 | return ret; | 3329 | return ret; |
3285 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3330 | inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3286 | struct btrfs_inode_item); | 3331 | struct btrfs_inode_item); |
3287 | fill_inode_item(trans, path->nodes[0], inode_item, inode, 0); | 3332 | fill_inode_item(trans, path->nodes[0], inode_item, inode, 0, 0); |
3288 | btrfs_release_path(path); | 3333 | btrfs_release_path(path); |
3289 | return 0; | 3334 | return 0; |
3290 | } | 3335 | } |
@@ -3293,7 +3338,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
3293 | struct inode *inode, | 3338 | struct inode *inode, |
3294 | struct btrfs_path *dst_path, | 3339 | struct btrfs_path *dst_path, |
3295 | struct btrfs_path *src_path, u64 *last_extent, | 3340 | struct btrfs_path *src_path, u64 *last_extent, |
3296 | int start_slot, int nr, int inode_only) | 3341 | int start_slot, int nr, int inode_only, |
3342 | u64 logged_isize) | ||
3297 | { | 3343 | { |
3298 | unsigned long src_offset; | 3344 | unsigned long src_offset; |
3299 | unsigned long dst_offset; | 3345 | unsigned long dst_offset; |
@@ -3350,7 +3396,8 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
3350 | dst_path->slots[0], | 3396 | dst_path->slots[0], |
3351 | struct btrfs_inode_item); | 3397 | struct btrfs_inode_item); |
3352 | fill_inode_item(trans, dst_path->nodes[0], inode_item, | 3398 | fill_inode_item(trans, dst_path->nodes[0], inode_item, |
3353 | inode, inode_only == LOG_INODE_EXISTS); | 3399 | inode, inode_only == LOG_INODE_EXISTS, |
3400 | logged_isize); | ||
3354 | } else { | 3401 | } else { |
3355 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, | 3402 | copy_extent_buffer(dst_path->nodes[0], src, dst_offset, |
3356 | src_offset, ins_sizes[i]); | 3403 | src_offset, ins_sizes[i]); |
@@ -3902,6 +3949,33 @@ process: | |||
3902 | return ret; | 3949 | return ret; |
3903 | } | 3950 | } |
3904 | 3951 | ||
3952 | static int logged_inode_size(struct btrfs_root *log, struct inode *inode, | ||
3953 | struct btrfs_path *path, u64 *size_ret) | ||
3954 | { | ||
3955 | struct btrfs_key key; | ||
3956 | int ret; | ||
3957 | |||
3958 | key.objectid = btrfs_ino(inode); | ||
3959 | key.type = BTRFS_INODE_ITEM_KEY; | ||
3960 | key.offset = 0; | ||
3961 | |||
3962 | ret = btrfs_search_slot(NULL, log, &key, path, 0, 0); | ||
3963 | if (ret < 0) { | ||
3964 | return ret; | ||
3965 | } else if (ret > 0) { | ||
3966 | *size_ret = i_size_read(inode); | ||
3967 | } else { | ||
3968 | struct btrfs_inode_item *item; | ||
3969 | |||
3970 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
3971 | struct btrfs_inode_item); | ||
3972 | *size_ret = btrfs_inode_size(path->nodes[0], item); | ||
3973 | } | ||
3974 | |||
3975 | btrfs_release_path(path); | ||
3976 | return 0; | ||
3977 | } | ||
3978 | |||
3905 | /* log a single inode in the tree log. | 3979 | /* log a single inode in the tree log. |
3906 | * At least one parent directory for this inode must exist in the tree | 3980 | * At least one parent directory for this inode must exist in the tree |
3907 | * or be logged already. | 3981 | * or be logged already. |
@@ -3939,6 +4013,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3939 | bool fast_search = false; | 4013 | bool fast_search = false; |
3940 | u64 ino = btrfs_ino(inode); | 4014 | u64 ino = btrfs_ino(inode); |
3941 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | 4015 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; |
4016 | u64 logged_isize = 0; | ||
3942 | 4017 | ||
3943 | path = btrfs_alloc_path(); | 4018 | path = btrfs_alloc_path(); |
3944 | if (!path) | 4019 | if (!path) |
@@ -3966,15 +4041,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3966 | max_key.type = (u8)-1; | 4041 | max_key.type = (u8)-1; |
3967 | max_key.offset = (u64)-1; | 4042 | max_key.offset = (u64)-1; |
3968 | 4043 | ||
3969 | /* Only run delayed items if we are a dir or a new file */ | 4044 | /* |
4045 | * Only run delayed items if we are a dir or a new file. | ||
4046 | * Otherwise commit the delayed inode only, which is needed in | ||
4047 | * order for the log replay code to mark inodes for link count | ||
4048 | * fixup (create temporary BTRFS_TREE_LOG_FIXUP_OBJECTID items). | ||
4049 | */ | ||
3970 | if (S_ISDIR(inode->i_mode) || | 4050 | if (S_ISDIR(inode->i_mode) || |
3971 | BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) { | 4051 | BTRFS_I(inode)->generation > root->fs_info->last_trans_committed) |
3972 | ret = btrfs_commit_inode_delayed_items(trans, inode); | 4052 | ret = btrfs_commit_inode_delayed_items(trans, inode); |
3973 | if (ret) { | 4053 | else |
3974 | btrfs_free_path(path); | 4054 | ret = btrfs_commit_inode_delayed_inode(inode); |
3975 | btrfs_free_path(dst_path); | 4055 | |
3976 | return ret; | 4056 | if (ret) { |
3977 | } | 4057 | btrfs_free_path(path); |
4058 | btrfs_free_path(dst_path); | ||
4059 | return ret; | ||
3978 | } | 4060 | } |
3979 | 4061 | ||
3980 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 4062 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
@@ -3988,22 +4070,56 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3988 | if (S_ISDIR(inode->i_mode)) { | 4070 | if (S_ISDIR(inode->i_mode)) { |
3989 | int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; | 4071 | int max_key_type = BTRFS_DIR_LOG_INDEX_KEY; |
3990 | 4072 | ||
3991 | if (inode_only == LOG_INODE_EXISTS) | 4073 | if (inode_only == LOG_INODE_EXISTS) { |
3992 | max_key_type = BTRFS_XATTR_ITEM_KEY; | 4074 | max_key_type = BTRFS_INODE_EXTREF_KEY; |
4075 | max_key.type = max_key_type; | ||
4076 | } | ||
3993 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); | 4077 | ret = drop_objectid_items(trans, log, path, ino, max_key_type); |
3994 | } else { | 4078 | } else { |
3995 | if (test_and_clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | 4079 | if (inode_only == LOG_INODE_EXISTS) { |
3996 | &BTRFS_I(inode)->runtime_flags)) { | 4080 | /* |
3997 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | 4081 | * Make sure the new inode item we write to the log has |
3998 | &BTRFS_I(inode)->runtime_flags); | 4082 | * the same isize as the current one (if it exists). |
3999 | ret = btrfs_truncate_inode_items(trans, log, | 4083 | * This is necessary to prevent data loss after log |
4000 | inode, 0, 0); | 4084 | * replay, and also to prevent doing a wrong expanding |
4001 | } else if (test_and_clear_bit(BTRFS_INODE_COPY_EVERYTHING, | 4085 | * truncate - for e.g. create file, write 4K into offset |
4002 | &BTRFS_I(inode)->runtime_flags) || | 4086 | * 0, fsync, write 4K into offset 4096, add hard link, |
4087 | * fsync some other file (to sync log), power fail - if | ||
4088 | * we use the inode's current i_size, after log replay | ||
4089 | * we get a 8Kb file, with the last 4Kb extent as a hole | ||
4090 | * (zeroes), as if an expanding truncate happened, | ||
4091 | * instead of getting a file of 4Kb only. | ||
4092 | */ | ||
4093 | err = logged_inode_size(log, inode, path, | ||
4094 | &logged_isize); | ||
4095 | if (err) | ||
4096 | goto out_unlock; | ||
4097 | } | ||
4098 | if (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
4099 | &BTRFS_I(inode)->runtime_flags)) { | ||
4100 | if (inode_only == LOG_INODE_EXISTS) { | ||
4101 | max_key.type = BTRFS_INODE_EXTREF_KEY; | ||
4102 | ret = drop_objectid_items(trans, log, path, ino, | ||
4103 | max_key.type); | ||
4104 | } else { | ||
4105 | clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, | ||
4106 | &BTRFS_I(inode)->runtime_flags); | ||
4107 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
4108 | &BTRFS_I(inode)->runtime_flags); | ||
4109 | ret = btrfs_truncate_inode_items(trans, log, | ||
4110 | inode, 0, 0); | ||
4111 | } | ||
4112 | } else if (test_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
4113 | &BTRFS_I(inode)->runtime_flags) || | ||
4003 | inode_only == LOG_INODE_EXISTS) { | 4114 | inode_only == LOG_INODE_EXISTS) { |
4004 | if (inode_only == LOG_INODE_ALL) | 4115 | if (inode_only == LOG_INODE_ALL) { |
4116 | clear_bit(BTRFS_INODE_COPY_EVERYTHING, | ||
4117 | &BTRFS_I(inode)->runtime_flags); | ||
4005 | fast_search = true; | 4118 | fast_search = true; |
4006 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 4119 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
4120 | } else { | ||
4121 | max_key.type = BTRFS_INODE_EXTREF_KEY; | ||
4122 | } | ||
4007 | ret = drop_objectid_items(trans, log, path, ino, | 4123 | ret = drop_objectid_items(trans, log, path, ino, |
4008 | max_key.type); | 4124 | max_key.type); |
4009 | } else { | 4125 | } else { |
@@ -4047,7 +4163,8 @@ again: | |||
4047 | } | 4163 | } |
4048 | 4164 | ||
4049 | ret = copy_items(trans, inode, dst_path, path, &last_extent, | 4165 | ret = copy_items(trans, inode, dst_path, path, &last_extent, |
4050 | ins_start_slot, ins_nr, inode_only); | 4166 | ins_start_slot, ins_nr, inode_only, |
4167 | logged_isize); | ||
4051 | if (ret < 0) { | 4168 | if (ret < 0) { |
4052 | err = ret; | 4169 | err = ret; |
4053 | goto out_unlock; | 4170 | goto out_unlock; |
@@ -4071,7 +4188,7 @@ next_slot: | |||
4071 | if (ins_nr) { | 4188 | if (ins_nr) { |
4072 | ret = copy_items(trans, inode, dst_path, path, | 4189 | ret = copy_items(trans, inode, dst_path, path, |
4073 | &last_extent, ins_start_slot, | 4190 | &last_extent, ins_start_slot, |
4074 | ins_nr, inode_only); | 4191 | ins_nr, inode_only, logged_isize); |
4075 | if (ret < 0) { | 4192 | if (ret < 0) { |
4076 | err = ret; | 4193 | err = ret; |
4077 | goto out_unlock; | 4194 | goto out_unlock; |
@@ -4092,7 +4209,8 @@ next_slot: | |||
4092 | } | 4209 | } |
4093 | if (ins_nr) { | 4210 | if (ins_nr) { |
4094 | ret = copy_items(trans, inode, dst_path, path, &last_extent, | 4211 | ret = copy_items(trans, inode, dst_path, path, &last_extent, |
4095 | ins_start_slot, ins_nr, inode_only); | 4212 | ins_start_slot, ins_nr, inode_only, |
4213 | logged_isize); | ||
4096 | if (ret < 0) { | 4214 | if (ret < 0) { |
4097 | err = ret; | 4215 | err = ret; |
4098 | goto out_unlock; | 4216 | goto out_unlock; |
@@ -4273,6 +4391,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4273 | struct dentry *old_parent = NULL; | 4391 | struct dentry *old_parent = NULL; |
4274 | int ret = 0; | 4392 | int ret = 0; |
4275 | u64 last_committed = root->fs_info->last_trans_committed; | 4393 | u64 last_committed = root->fs_info->last_trans_committed; |
4394 | const struct dentry * const first_parent = parent; | ||
4395 | const bool did_unlink = (BTRFS_I(inode)->last_unlink_trans > | ||
4396 | last_committed); | ||
4276 | 4397 | ||
4277 | sb = inode->i_sb; | 4398 | sb = inode->i_sb; |
4278 | 4399 | ||
@@ -4328,7 +4449,6 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4328 | goto end_trans; | 4449 | goto end_trans; |
4329 | } | 4450 | } |
4330 | 4451 | ||
4331 | inode_only = LOG_INODE_EXISTS; | ||
4332 | while (1) { | 4452 | while (1) { |
4333 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | 4453 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) |
4334 | break; | 4454 | break; |
@@ -4337,8 +4457,22 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4337 | if (root != BTRFS_I(inode)->root) | 4457 | if (root != BTRFS_I(inode)->root) |
4338 | break; | 4458 | break; |
4339 | 4459 | ||
4460 | /* | ||
4461 | * On unlink we must make sure our immediate parent directory | ||
4462 | * inode is fully logged. This is to prevent leaving dangling | ||
4463 | * directory index entries and a wrong directory inode's i_size. | ||
4464 | * Not doing so can result in a directory being impossible to | ||
4465 | * delete after log replay (rmdir will always fail with error | ||
4466 | * -ENOTEMPTY). | ||
4467 | */ | ||
4468 | if (did_unlink && parent == first_parent) | ||
4469 | inode_only = LOG_INODE_ALL; | ||
4470 | else | ||
4471 | inode_only = LOG_INODE_EXISTS; | ||
4472 | |||
4340 | if (BTRFS_I(inode)->generation > | 4473 | if (BTRFS_I(inode)->generation > |
4341 | root->fs_info->last_trans_committed) { | 4474 | root->fs_info->last_trans_committed || |
4475 | inode_only == LOG_INODE_ALL) { | ||
4342 | ret = btrfs_log_inode(trans, root, inode, inode_only, | 4476 | ret = btrfs_log_inode(trans, root, inode, inode_only, |
4343 | 0, LLONG_MAX, ctx); | 4477 | 0, LLONG_MAX, ctx); |
4344 | if (ret) | 4478 | if (ret) |