diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-03-06 16:52:54 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-03-06 16:52:54 -0500 |
commit | 84399bb075a6fe320d4221970dc36314e46229fe (patch) | |
tree | 7e4b9336021310925bb4781af310994ca1245ed2 /fs | |
parent | 0d9b9c1674fa7f86175a41805061908022e394b8 (diff) | |
parent | dd9ef135e3542ffc621c4eb7f0091870ec7a1504 (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason:
"Outside of misc fixes, Filipe has a few fsync corners and we're
pulling in one more of Josef's fixes from production use here"
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
Btrfs:__add_inode_ref: out of bounds memory read when looking for extended ref.
Btrfs: fix data loss in the fast fsync path
Btrfs: remove extra run_delayed_refs in update_cowonly_root
Btrfs: incremental send, don't rename a directory too soon
btrfs: fix lost return value due to variable shadowing
Btrfs: do not ignore errors from btrfs_lookup_xattr in do_setxattr
Btrfs: fix off-by-one logic error in btrfs_realloc_node
Btrfs: add missing inode update when punching hole
Btrfs: abort the transaction if we fail to update the free space cache inode
Btrfs: fix fsync race leading to ordered extent memory leaks
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/ctree.c | 8 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 16 | ||||
-rw-r--r-- | fs/btrfs/file.c | 87 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 1 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 7 | ||||
-rw-r--r-- | fs/btrfs/send.c | 171 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 3 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 2 | ||||
-rw-r--r-- | fs/btrfs/xattr.c | 8 |
9 files changed, 241 insertions, 62 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 993642199326..6d67f32e648d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1645 | 1645 | ||
1646 | parent_nritems = btrfs_header_nritems(parent); | 1646 | parent_nritems = btrfs_header_nritems(parent); |
1647 | blocksize = root->nodesize; | 1647 | blocksize = root->nodesize; |
1648 | end_slot = parent_nritems; | 1648 | end_slot = parent_nritems - 1; |
1649 | 1649 | ||
1650 | if (parent_nritems == 1) | 1650 | if (parent_nritems <= 1) |
1651 | return 0; | 1651 | return 0; |
1652 | 1652 | ||
1653 | btrfs_set_lock_blocking(parent); | 1653 | btrfs_set_lock_blocking(parent); |
1654 | 1654 | ||
1655 | for (i = start_slot; i < end_slot; i++) { | 1655 | for (i = start_slot; i <= end_slot; i++) { |
1656 | int close = 1; | 1656 | int close = 1; |
1657 | 1657 | ||
1658 | btrfs_node_key(parent, &disk_key, i); | 1658 | btrfs_node_key(parent, &disk_key, i); |
@@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1669 | other = btrfs_node_blockptr(parent, i - 1); | 1669 | other = btrfs_node_blockptr(parent, i - 1); |
1670 | close = close_blocks(blocknr, other, blocksize); | 1670 | close = close_blocks(blocknr, other, blocksize); |
1671 | } | 1671 | } |
1672 | if (!close && i < end_slot - 2) { | 1672 | if (!close && i < end_slot) { |
1673 | other = btrfs_node_blockptr(parent, i + 1); | 1673 | other = btrfs_node_blockptr(parent, i + 1); |
1674 | close = close_blocks(blocknr, other, blocksize); | 1674 | close = close_blocks(blocknr, other, blocksize); |
1675 | } | 1675 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 571f402d3fc4..6f080451fcb1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group, | |||
3208 | return 0; | 3208 | return 0; |
3209 | } | 3209 | } |
3210 | 3210 | ||
3211 | if (trans->aborted) | ||
3212 | return 0; | ||
3211 | again: | 3213 | again: |
3212 | inode = lookup_free_space_inode(root, block_group, path); | 3214 | inode = lookup_free_space_inode(root, block_group, path); |
3213 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { | 3215 | if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { |
@@ -3243,6 +3245,20 @@ again: | |||
3243 | */ | 3245 | */ |
3244 | BTRFS_I(inode)->generation = 0; | 3246 | BTRFS_I(inode)->generation = 0; |
3245 | ret = btrfs_update_inode(trans, root, inode); | 3247 | ret = btrfs_update_inode(trans, root, inode); |
3248 | if (ret) { | ||
3249 | /* | ||
3250 | * So theoretically we could recover from this, simply set the | ||
3251 | * super cache generation to 0 so we know to invalidate the | ||
3252 | * cache, but then we'd have to keep track of the block groups | ||
3253 | * that fail this way so we know we _have_ to reset this cache | ||
3254 | * before the next commit or risk reading stale cache. So to | ||
3255 | * limit our exposure to horrible edge cases lets just abort the | ||
3256 | * transaction, this only happens in really bad situations | ||
3257 | * anyway. | ||
3258 | */ | ||
3259 | btrfs_abort_transaction(trans, root, ret); | ||
3260 | goto out_put; | ||
3261 | } | ||
3246 | WARN_ON(ret); | 3262 | WARN_ON(ret); |
3247 | 3263 | ||
3248 | if (i_size_read(inode) > 0) { | 3264 | if (i_size_read(inode) > 0) { |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index b78bbbac900d..30982bbd31c3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb, | |||
1811 | mutex_unlock(&inode->i_mutex); | 1811 | mutex_unlock(&inode->i_mutex); |
1812 | 1812 | ||
1813 | /* | 1813 | /* |
1814 | * we want to make sure fsync finds this change | ||
1815 | * but we haven't joined a transaction running right now. | ||
1816 | * | ||
1817 | * Later on, someone is sure to update the inode and get the | ||
1818 | * real transid recorded. | ||
1819 | * | ||
1820 | * We set last_trans now to the fs_info generation + 1, | ||
1821 | * this will either be one more than the running transaction | ||
1822 | * or the generation used for the next transaction if there isn't | ||
1823 | * one running right now. | ||
1824 | * | ||
1825 | * We also have to set last_sub_trans to the current log transid, | 1814 | * We also have to set last_sub_trans to the current log transid, |
1826 | * otherwise subsequent syncs to a file that's been synced in this | 1815 | * otherwise subsequent syncs to a file that's been synced in this |
1827 | * transaction will appear to have already occured. | 1816 | * transaction will appear to have already occured. |
1828 | */ | 1817 | */ |
1829 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
1830 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | 1818 | BTRFS_I(inode)->last_sub_trans = root->log_transid; |
1831 | if (num_written > 0) { | 1819 | if (num_written > 0) { |
1832 | err = generic_write_sync(file, pos, num_written); | 1820 | err = generic_write_sync(file, pos, num_written); |
@@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1959 | atomic_inc(&root->log_batch); | 1947 | atomic_inc(&root->log_batch); |
1960 | 1948 | ||
1961 | /* | 1949 | /* |
1962 | * check the transaction that last modified this inode | 1950 | * If the last transaction that changed this file was before the current |
1963 | * and see if its already been committed | 1951 | * transaction and we have the full sync flag set in our inode, we can |
1964 | */ | 1952 | * bail out now without any syncing. |
1965 | if (!BTRFS_I(inode)->last_trans) { | 1953 | * |
1966 | mutex_unlock(&inode->i_mutex); | 1954 | * Note that we can't bail out if the full sync flag isn't set. This is |
1967 | goto out; | 1955 | * because when the full sync flag is set we start all ordered extents |
1968 | } | 1956 | * and wait for them to fully complete - when they complete they update |
1969 | 1957 | * the inode's last_trans field through: | |
1970 | /* | 1958 | * |
1971 | * if the last transaction that changed this file was before | 1959 | * btrfs_finish_ordered_io() -> |
1972 | * the current transaction, we can bail out now without any | 1960 | * btrfs_update_inode_fallback() -> |
1973 | * syncing | 1961 | * btrfs_update_inode() -> |
1962 | * btrfs_set_inode_last_trans() | ||
1963 | * | ||
1964 | * So we are sure that last_trans is up to date and can do this check to | ||
1965 | * bail out safely. For the fast path, when the full sync flag is not | ||
1966 | * set in our inode, we can not do it because we start only our ordered | ||
1967 | * extents and don't wait for them to complete (that is when | ||
1968 | * btrfs_finish_ordered_io runs), so here at this point their last_trans | ||
1969 | * value might be less than or equals to fs_info->last_trans_committed, | ||
1970 | * and setting a speculative last_trans for an inode when a buffered | ||
1971 | * write is made (such as fs_info->generation + 1 for example) would not | ||
1972 | * be reliable since after setting the value and before fsync is called | ||
1973 | * any number of transactions can start and commit (transaction kthread | ||
1974 | * commits the current transaction periodically), and a transaction | ||
1975 | * commit does not start nor waits for ordered extents to complete. | ||
1974 | */ | 1976 | */ |
1975 | smp_mb(); | 1977 | smp_mb(); |
1976 | if (btrfs_inode_in_log(inode, root->fs_info->generation) || | 1978 | if (btrfs_inode_in_log(inode, root->fs_info->generation) || |
1977 | BTRFS_I(inode)->last_trans <= | 1979 | (full_sync && BTRFS_I(inode)->last_trans <= |
1978 | root->fs_info->last_trans_committed) { | 1980 | root->fs_info->last_trans_committed)) { |
1979 | BTRFS_I(inode)->last_trans = 0; | ||
1980 | |||
1981 | /* | 1981 | /* |
1982 | * We'v had everything committed since the last time we were | 1982 | * We'v had everything committed since the last time we were |
1983 | * modified so clear this flag in case it was set for whatever | 1983 | * modified so clear this flag in case it was set for whatever |
@@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2275 | bool same_page; | 2275 | bool same_page; |
2276 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2276 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
2277 | u64 ino_size; | 2277 | u64 ino_size; |
2278 | bool truncated_page = false; | ||
2279 | bool updated_inode = false; | ||
2278 | 2280 | ||
2279 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2281 | ret = btrfs_wait_ordered_range(inode, offset, len); |
2280 | if (ret) | 2282 | if (ret) |
@@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2306 | * entire page. | 2308 | * entire page. |
2307 | */ | 2309 | */ |
2308 | if (same_page && len < PAGE_CACHE_SIZE) { | 2310 | if (same_page && len < PAGE_CACHE_SIZE) { |
2309 | if (offset < ino_size) | 2311 | if (offset < ino_size) { |
2312 | truncated_page = true; | ||
2310 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2313 | ret = btrfs_truncate_page(inode, offset, len, 0); |
2314 | } else { | ||
2315 | ret = 0; | ||
2316 | } | ||
2311 | goto out_only_mutex; | 2317 | goto out_only_mutex; |
2312 | } | 2318 | } |
2313 | 2319 | ||
2314 | /* zero back part of the first page */ | 2320 | /* zero back part of the first page */ |
2315 | if (offset < ino_size) { | 2321 | if (offset < ino_size) { |
2322 | truncated_page = true; | ||
2316 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 2323 | ret = btrfs_truncate_page(inode, offset, 0, 0); |
2317 | if (ret) { | 2324 | if (ret) { |
2318 | mutex_unlock(&inode->i_mutex); | 2325 | mutex_unlock(&inode->i_mutex); |
@@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2348 | if (!ret) { | 2355 | if (!ret) { |
2349 | /* zero the front end of the last page */ | 2356 | /* zero the front end of the last page */ |
2350 | if (tail_start + tail_len < ino_size) { | 2357 | if (tail_start + tail_len < ino_size) { |
2358 | truncated_page = true; | ||
2351 | ret = btrfs_truncate_page(inode, | 2359 | ret = btrfs_truncate_page(inode, |
2352 | tail_start + tail_len, 0, 1); | 2360 | tail_start + tail_len, 0, 1); |
2353 | if (ret) | 2361 | if (ret) |
@@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2357 | } | 2365 | } |
2358 | 2366 | ||
2359 | if (lockend < lockstart) { | 2367 | if (lockend < lockstart) { |
2360 | mutex_unlock(&inode->i_mutex); | 2368 | ret = 0; |
2361 | return 0; | 2369 | goto out_only_mutex; |
2362 | } | 2370 | } |
2363 | 2371 | ||
2364 | while (1) { | 2372 | while (1) { |
@@ -2506,6 +2514,7 @@ out_trans: | |||
2506 | 2514 | ||
2507 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2515 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2508 | ret = btrfs_update_inode(trans, root, inode); | 2516 | ret = btrfs_update_inode(trans, root, inode); |
2517 | updated_inode = true; | ||
2509 | btrfs_end_transaction(trans, root); | 2518 | btrfs_end_transaction(trans, root); |
2510 | btrfs_btree_balance_dirty(root); | 2519 | btrfs_btree_balance_dirty(root); |
2511 | out_free: | 2520 | out_free: |
@@ -2515,6 +2524,22 @@ out: | |||
2515 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 2524 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
2516 | &cached_state, GFP_NOFS); | 2525 | &cached_state, GFP_NOFS); |
2517 | out_only_mutex: | 2526 | out_only_mutex: |
2527 | if (!updated_inode && truncated_page && !ret && !err) { | ||
2528 | /* | ||
2529 | * If we only end up zeroing part of a page, we still need to | ||
2530 | * update the inode item, so that all the time fields are | ||
2531 | * updated as well as the necessary btrfs inode in memory fields | ||
2532 | * for detecting, at fsync time, if the inode isn't yet in the | ||
2533 | * log tree or it's there but not up to date. | ||
2534 | */ | ||
2535 | trans = btrfs_start_transaction(root, 1); | ||
2536 | if (IS_ERR(trans)) { | ||
2537 | err = PTR_ERR(trans); | ||
2538 | } else { | ||
2539 | err = btrfs_update_inode(trans, root, inode); | ||
2540 | ret = btrfs_end_transaction(trans, root); | ||
2541 | } | ||
2542 | } | ||
2518 | mutex_unlock(&inode->i_mutex); | 2543 | mutex_unlock(&inode->i_mutex); |
2519 | if (ret && !err) | 2544 | if (ret && !err) |
2520 | err = ret; | 2545 | err = ret; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a85c23dfcddb..da828cf5e8f8 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -7285,7 +7285,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
7285 | ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | 7285 | ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && |
7286 | em->block_start != EXTENT_MAP_HOLE)) { | 7286 | em->block_start != EXTENT_MAP_HOLE)) { |
7287 | int type; | 7287 | int type; |
7288 | int ret; | ||
7289 | u64 block_start, orig_start, orig_block_len, ram_bytes; | 7288 | u64 block_start, orig_start, orig_block_len, ram_bytes; |
7290 | 7289 | ||
7291 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | 7290 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 534544e08f76..157cc54fc634 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode, | |||
452 | continue; | 452 | continue; |
453 | if (entry_end(ordered) <= start) | 453 | if (entry_end(ordered) <= start) |
454 | break; | 454 | break; |
455 | if (!list_empty(&ordered->log_list)) | 455 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) |
456 | continue; | ||
457 | if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) | ||
458 | continue; | 456 | continue; |
459 | list_add(&ordered->log_list, logged_list); | 457 | list_add(&ordered->log_list, logged_list); |
460 | atomic_inc(&ordered->refs); | 458 | atomic_inc(&ordered->refs); |
@@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans, | |||
511 | wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, | 509 | wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, |
512 | &ordered->flags)); | 510 | &ordered->flags)); |
513 | 511 | ||
514 | if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) | 512 | list_add_tail(&ordered->trans_list, &trans->ordered); |
515 | list_add_tail(&ordered->trans_list, &trans->ordered); | ||
516 | spin_lock_irq(&log->log_extents_lock[index]); | 513 | spin_lock_irq(&log->log_extents_lock[index]); |
517 | } | 514 | } |
518 | spin_unlock_irq(&log->log_extents_lock[index]); | 515 | spin_unlock_irq(&log->log_extents_lock[index]); |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fe5857223515..d6033f540cc7 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -230,6 +230,7 @@ struct pending_dir_move { | |||
230 | u64 parent_ino; | 230 | u64 parent_ino; |
231 | u64 ino; | 231 | u64 ino; |
232 | u64 gen; | 232 | u64 gen; |
233 | bool is_orphan; | ||
233 | struct list_head update_refs; | 234 | struct list_head update_refs; |
234 | }; | 235 | }; |
235 | 236 | ||
@@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, | |||
2984 | u64 ino_gen, | 2985 | u64 ino_gen, |
2985 | u64 parent_ino, | 2986 | u64 parent_ino, |
2986 | struct list_head *new_refs, | 2987 | struct list_head *new_refs, |
2987 | struct list_head *deleted_refs) | 2988 | struct list_head *deleted_refs, |
2989 | const bool is_orphan) | ||
2988 | { | 2990 | { |
2989 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; | 2991 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; |
2990 | struct rb_node *parent = NULL; | 2992 | struct rb_node *parent = NULL; |
@@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx, | |||
2999 | pm->parent_ino = parent_ino; | 3001 | pm->parent_ino = parent_ino; |
3000 | pm->ino = ino; | 3002 | pm->ino = ino; |
3001 | pm->gen = ino_gen; | 3003 | pm->gen = ino_gen; |
3004 | pm->is_orphan = is_orphan; | ||
3002 | INIT_LIST_HEAD(&pm->list); | 3005 | INIT_LIST_HEAD(&pm->list); |
3003 | INIT_LIST_HEAD(&pm->update_refs); | 3006 | INIT_LIST_HEAD(&pm->update_refs); |
3004 | RB_CLEAR_NODE(&pm->node); | 3007 | RB_CLEAR_NODE(&pm->node); |
@@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3131 | rmdir_ino = dm->rmdir_ino; | 3134 | rmdir_ino = dm->rmdir_ino; |
3132 | free_waiting_dir_move(sctx, dm); | 3135 | free_waiting_dir_move(sctx, dm); |
3133 | 3136 | ||
3134 | ret = get_first_ref(sctx->parent_root, pm->ino, | 3137 | if (pm->is_orphan) { |
3135 | &parent_ino, &parent_gen, name); | 3138 | ret = gen_unique_name(sctx, pm->ino, |
3136 | if (ret < 0) | 3139 | pm->gen, from_path); |
3137 | goto out; | 3140 | } else { |
3138 | 3141 | ret = get_first_ref(sctx->parent_root, pm->ino, | |
3139 | ret = get_cur_path(sctx, parent_ino, parent_gen, | 3142 | &parent_ino, &parent_gen, name); |
3140 | from_path); | 3143 | if (ret < 0) |
3141 | if (ret < 0) | 3144 | goto out; |
3142 | goto out; | 3145 | ret = get_cur_path(sctx, parent_ino, parent_gen, |
3143 | ret = fs_path_add_path(from_path, name); | 3146 | from_path); |
3147 | if (ret < 0) | ||
3148 | goto out; | ||
3149 | ret = fs_path_add_path(from_path, name); | ||
3150 | } | ||
3144 | if (ret < 0) | 3151 | if (ret < 0) |
3145 | goto out; | 3152 | goto out; |
3146 | 3153 | ||
@@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
3150 | LIST_HEAD(deleted_refs); | 3157 | LIST_HEAD(deleted_refs); |
3151 | ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); | 3158 | ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); |
3152 | ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, | 3159 | ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, |
3153 | &pm->update_refs, &deleted_refs); | 3160 | &pm->update_refs, &deleted_refs, |
3161 | pm->is_orphan); | ||
3154 | if (ret < 0) | 3162 | if (ret < 0) |
3155 | goto out; | 3163 | goto out; |
3156 | if (rmdir_ino) { | 3164 | if (rmdir_ino) { |
@@ -3283,6 +3291,127 @@ out: | |||
3283 | return ret; | 3291 | return ret; |
3284 | } | 3292 | } |
3285 | 3293 | ||
3294 | /* | ||
3295 | * We might need to delay a directory rename even when no ancestor directory | ||
3296 | * (in the send root) with a higher inode number than ours (sctx->cur_ino) was | ||
3297 | * renamed. This happens when we rename a directory to the old name (the name | ||
3298 | * in the parent root) of some other unrelated directory that got its rename | ||
3299 | * delayed due to some ancestor with higher number that got renamed. | ||
3300 | * | ||
3301 | * Example: | ||
3302 | * | ||
3303 | * Parent snapshot: | ||
3304 | * . (ino 256) | ||
3305 | * |---- a/ (ino 257) | ||
3306 | * | |---- file (ino 260) | ||
3307 | * | | ||
3308 | * |---- b/ (ino 258) | ||
3309 | * |---- c/ (ino 259) | ||
3310 | * | ||
3311 | * Send snapshot: | ||
3312 | * . (ino 256) | ||
3313 | * |---- a/ (ino 258) | ||
3314 | * |---- x/ (ino 259) | ||
3315 | * |---- y/ (ino 257) | ||
3316 | * |----- file (ino 260) | ||
3317 | * | ||
3318 | * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257 | ||
3319 | * from 'a' to 'x/y' happening first, which in turn depends on the rename of | ||
3320 | * inode 259 from 'c' to 'x'. So the order of rename commands the send stream | ||
3321 | * must issue is: | ||
3322 | * | ||
3323 | * 1 - rename 259 from 'c' to 'x' | ||
3324 | * 2 - rename 257 from 'a' to 'x/y' | ||
3325 | * 3 - rename 258 from 'b' to 'a' | ||
3326 | * | ||
3327 | * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can | ||
3328 | * be done right away and < 0 on error. | ||
3329 | */ | ||
3330 | static int wait_for_dest_dir_move(struct send_ctx *sctx, | ||
3331 | struct recorded_ref *parent_ref, | ||
3332 | const bool is_orphan) | ||
3333 | { | ||
3334 | struct btrfs_path *path; | ||
3335 | struct btrfs_key key; | ||
3336 | struct btrfs_key di_key; | ||
3337 | struct btrfs_dir_item *di; | ||
3338 | u64 left_gen; | ||
3339 | u64 right_gen; | ||
3340 | int ret = 0; | ||
3341 | |||
3342 | if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves)) | ||
3343 | return 0; | ||
3344 | |||
3345 | path = alloc_path_for_send(); | ||
3346 | if (!path) | ||
3347 | return -ENOMEM; | ||
3348 | |||
3349 | key.objectid = parent_ref->dir; | ||
3350 | key.type = BTRFS_DIR_ITEM_KEY; | ||
3351 | key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len); | ||
3352 | |||
3353 | ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0); | ||
3354 | if (ret < 0) { | ||
3355 | goto out; | ||
3356 | } else if (ret > 0) { | ||
3357 | ret = 0; | ||
3358 | goto out; | ||
3359 | } | ||
3360 | |||
3361 | di = btrfs_match_dir_item_name(sctx->parent_root, path, | ||
3362 | parent_ref->name, parent_ref->name_len); | ||
3363 | if (!di) { | ||
3364 | ret = 0; | ||
3365 | goto out; | ||
3366 | } | ||
3367 | /* | ||
3368 | * di_key.objectid has the number of the inode that has a dentry in the | ||
3369 | * parent directory with the same name that sctx->cur_ino is being | ||
3370 | * renamed to. We need to check if that inode is in the send root as | ||
3371 | * well and if it is currently marked as an inode with a pending rename, | ||
3372 | * if it is, we need to delay the rename of sctx->cur_ino as well, so | ||
3373 | * that it happens after that other inode is renamed. | ||
3374 | */ | ||
3375 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key); | ||
3376 | if (di_key.type != BTRFS_INODE_ITEM_KEY) { | ||
3377 | ret = 0; | ||
3378 | goto out; | ||
3379 | } | ||
3380 | |||
3381 | ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL, | ||
3382 | &left_gen, NULL, NULL, NULL, NULL); | ||
3383 | if (ret < 0) | ||
3384 | goto out; | ||
3385 | ret = get_inode_info(sctx->send_root, di_key.objectid, NULL, | ||
3386 | &right_gen, NULL, NULL, NULL, NULL); | ||
3387 | if (ret < 0) { | ||
3388 | if (ret == -ENOENT) | ||
3389 | ret = 0; | ||
3390 | goto out; | ||
3391 | } | ||
3392 | |||
3393 | /* Different inode, no need to delay the rename of sctx->cur_ino */ | ||
3394 | if (right_gen != left_gen) { | ||
3395 | ret = 0; | ||
3396 | goto out; | ||
3397 | } | ||
3398 | |||
3399 | if (is_waiting_for_move(sctx, di_key.objectid)) { | ||
3400 | ret = add_pending_dir_move(sctx, | ||
3401 | sctx->cur_ino, | ||
3402 | sctx->cur_inode_gen, | ||
3403 | di_key.objectid, | ||
3404 | &sctx->new_refs, | ||
3405 | &sctx->deleted_refs, | ||
3406 | is_orphan); | ||
3407 | if (!ret) | ||
3408 | ret = 1; | ||
3409 | } | ||
3410 | out: | ||
3411 | btrfs_free_path(path); | ||
3412 | return ret; | ||
3413 | } | ||
3414 | |||
3286 | static int wait_for_parent_move(struct send_ctx *sctx, | 3415 | static int wait_for_parent_move(struct send_ctx *sctx, |
3287 | struct recorded_ref *parent_ref) | 3416 | struct recorded_ref *parent_ref) |
3288 | { | 3417 | { |
@@ -3349,7 +3478,8 @@ out: | |||
3349 | sctx->cur_inode_gen, | 3478 | sctx->cur_inode_gen, |
3350 | ino, | 3479 | ino, |
3351 | &sctx->new_refs, | 3480 | &sctx->new_refs, |
3352 | &sctx->deleted_refs); | 3481 | &sctx->deleted_refs, |
3482 | false); | ||
3353 | if (!ret) | 3483 | if (!ret) |
3354 | ret = 1; | 3484 | ret = 1; |
3355 | } | 3485 | } |
@@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) | |||
3372 | int did_overwrite = 0; | 3502 | int did_overwrite = 0; |
3373 | int is_orphan = 0; | 3503 | int is_orphan = 0; |
3374 | u64 last_dir_ino_rm = 0; | 3504 | u64 last_dir_ino_rm = 0; |
3505 | bool can_rename = true; | ||
3375 | 3506 | ||
3376 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | 3507 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); |
3377 | 3508 | ||
@@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3490 | } | 3621 | } |
3491 | } | 3622 | } |
3492 | 3623 | ||
3624 | if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) { | ||
3625 | ret = wait_for_dest_dir_move(sctx, cur, is_orphan); | ||
3626 | if (ret < 0) | ||
3627 | goto out; | ||
3628 | if (ret == 1) { | ||
3629 | can_rename = false; | ||
3630 | *pending_move = 1; | ||
3631 | } | ||
3632 | } | ||
3633 | |||
3493 | /* | 3634 | /* |
3494 | * link/move the ref to the new place. If we have an orphan | 3635 | * link/move the ref to the new place. If we have an orphan |
3495 | * inode, move it and update valid_path. If not, link or move | 3636 | * inode, move it and update valid_path. If not, link or move |
3496 | * it depending on the inode mode. | 3637 | * it depending on the inode mode. |
3497 | */ | 3638 | */ |
3498 | if (is_orphan) { | 3639 | if (is_orphan && can_rename) { |
3499 | ret = send_rename(sctx, valid_path, cur->full_path); | 3640 | ret = send_rename(sctx, valid_path, cur->full_path); |
3500 | if (ret < 0) | 3641 | if (ret < 0) |
3501 | goto out; | 3642 | goto out; |
@@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3503 | ret = fs_path_copy(valid_path, cur->full_path); | 3644 | ret = fs_path_copy(valid_path, cur->full_path); |
3504 | if (ret < 0) | 3645 | if (ret < 0) |
3505 | goto out; | 3646 | goto out; |
3506 | } else { | 3647 | } else if (can_rename) { |
3507 | if (S_ISDIR(sctx->cur_inode_mode)) { | 3648 | if (S_ISDIR(sctx->cur_inode_mode)) { |
3508 | /* | 3649 | /* |
3509 | * Dirs can't be linked, so move it. For moved | 3650 | * Dirs can't be linked, so move it. For moved |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7e80f32550a6..88e51aded6bd 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -1052,9 +1052,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
1052 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1052 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
1053 | if (ret) | 1053 | if (ret) |
1054 | return ret; | 1054 | return ret; |
1055 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1056 | if (ret) | ||
1057 | return ret; | ||
1058 | } | 1055 | } |
1059 | 1056 | ||
1060 | return 0; | 1057 | return 0; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9a37f8b39bae..c5b8ba37f88e 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -1012,7 +1012,7 @@ again: | |||
1012 | base = btrfs_item_ptr_offset(leaf, path->slots[0]); | 1012 | base = btrfs_item_ptr_offset(leaf, path->slots[0]); |
1013 | 1013 | ||
1014 | while (cur_offset < item_size) { | 1014 | while (cur_offset < item_size) { |
1015 | extref = (struct btrfs_inode_extref *)base + cur_offset; | 1015 | extref = (struct btrfs_inode_extref *)(base + cur_offset); |
1016 | 1016 | ||
1017 | victim_name_len = btrfs_inode_extref_name_len(leaf, extref); | 1017 | victim_name_len = btrfs_inode_extref_name_len(leaf, extref); |
1018 | 1018 | ||
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 47b19465f0dc..883b93623bc5 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
111 | name, name_len, -1); | 111 | name, name_len, -1); |
112 | if (!di && (flags & XATTR_REPLACE)) | 112 | if (!di && (flags & XATTR_REPLACE)) |
113 | ret = -ENODATA; | 113 | ret = -ENODATA; |
114 | else if (IS_ERR(di)) | ||
115 | ret = PTR_ERR(di); | ||
114 | else if (di) | 116 | else if (di) |
115 | ret = btrfs_delete_one_dir_name(trans, root, path, di); | 117 | ret = btrfs_delete_one_dir_name(trans, root, path, di); |
116 | goto out; | 118 | goto out; |
@@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans, | |||
127 | ASSERT(mutex_is_locked(&inode->i_mutex)); | 129 | ASSERT(mutex_is_locked(&inode->i_mutex)); |
128 | di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), | 130 | di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), |
129 | name, name_len, 0); | 131 | name, name_len, 0); |
130 | if (!di) { | 132 | if (!di) |
131 | ret = -ENODATA; | 133 | ret = -ENODATA; |
134 | else if (IS_ERR(di)) | ||
135 | ret = PTR_ERR(di); | ||
136 | if (ret) | ||
132 | goto out; | 137 | goto out; |
133 | } | ||
134 | btrfs_release_path(path); | 138 | btrfs_release_path(path); |
135 | di = NULL; | 139 | di = NULL; |
136 | } | 140 | } |