aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.c8
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c51
-rw-r--r--fs/btrfs/extent_io.c6
-rw-r--r--fs/btrfs/file.c87
-rw-r--r--fs/btrfs/inode.c113
-rw-r--r--fs/btrfs/ordered-data.c7
-rw-r--r--fs/btrfs/qgroup.c2
-rw-r--r--fs/btrfs/send.c171
-rw-r--r--fs/btrfs/tests/inode-tests.c197
-rw-r--r--fs/btrfs/transaction.c42
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/xattr.c8
14 files changed, 593 insertions, 108 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 993642199326..6d67f32e648d 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1645,14 +1645,14 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1645 1645
1646 parent_nritems = btrfs_header_nritems(parent); 1646 parent_nritems = btrfs_header_nritems(parent);
1647 blocksize = root->nodesize; 1647 blocksize = root->nodesize;
1648 end_slot = parent_nritems; 1648 end_slot = parent_nritems - 1;
1649 1649
1650 if (parent_nritems == 1) 1650 if (parent_nritems <= 1)
1651 return 0; 1651 return 0;
1652 1652
1653 btrfs_set_lock_blocking(parent); 1653 btrfs_set_lock_blocking(parent);
1654 1654
1655 for (i = start_slot; i < end_slot; i++) { 1655 for (i = start_slot; i <= end_slot; i++) {
1656 int close = 1; 1656 int close = 1;
1657 1657
1658 btrfs_node_key(parent, &disk_key, i); 1658 btrfs_node_key(parent, &disk_key, i);
@@ -1669,7 +1669,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1669 other = btrfs_node_blockptr(parent, i - 1); 1669 other = btrfs_node_blockptr(parent, i - 1);
1670 close = close_blocks(blocknr, other, blocksize); 1670 close = close_blocks(blocknr, other, blocksize);
1671 } 1671 }
1672 if (!close && i < end_slot - 2) { 1672 if (!close && i < end_slot) {
1673 other = btrfs_node_blockptr(parent, i + 1); 1673 other = btrfs_node_blockptr(parent, i + 1);
1674 close = close_blocks(blocknr, other, blocksize); 1674 close = close_blocks(blocknr, other, blocksize);
1675 } 1675 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 84c3b00f3de8..f9c89cae39ee 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3387,6 +3387,8 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
3387 3387
3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3388int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3389 struct btrfs_root *root); 3389 struct btrfs_root *root);
3390int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3391 struct btrfs_root *root);
3390int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr); 3392int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr);
3391int btrfs_free_block_groups(struct btrfs_fs_info *info); 3393int btrfs_free_block_groups(struct btrfs_fs_info *info);
3392int btrfs_read_block_groups(struct btrfs_root *root); 3394int btrfs_read_block_groups(struct btrfs_root *root);
@@ -3909,6 +3911,9 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
3909 loff_t actual_len, u64 *alloc_hint); 3911 loff_t actual_len, u64 *alloc_hint);
3910int btrfs_inode_check_errors(struct inode *inode); 3912int btrfs_inode_check_errors(struct inode *inode);
3911extern const struct dentry_operations btrfs_dentry_operations; 3913extern const struct dentry_operations btrfs_dentry_operations;
3914#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
3915void btrfs_test_inode_set_ops(struct inode *inode);
3916#endif
3912 3917
3913/* ioctl.c */ 3918/* ioctl.c */
3914long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); 3919long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index f79f38542a73..639f2663ed3f 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3921,7 +3921,7 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
3921 } 3921 }
3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key) 3922 if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
3923 + sizeof(struct btrfs_chunk)) { 3923 + sizeof(struct btrfs_chunk)) {
3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %lu\n", 3924 printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
3925 btrfs_super_sys_array_size(sb), 3925 btrfs_super_sys_array_size(sb),
3926 sizeof(struct btrfs_disk_key) 3926 sizeof(struct btrfs_disk_key)
3927 + sizeof(struct btrfs_chunk)); 3927 + sizeof(struct btrfs_chunk));
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 571f402d3fc4..8b353ad02f03 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3208,6 +3208,8 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
3208 return 0; 3208 return 0;
3209 } 3209 }
3210 3210
3211 if (trans->aborted)
3212 return 0;
3211again: 3213again:
3212 inode = lookup_free_space_inode(root, block_group, path); 3214 inode = lookup_free_space_inode(root, block_group, path);
3213 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 3215 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
@@ -3243,6 +3245,20 @@ again:
3243 */ 3245 */
3244 BTRFS_I(inode)->generation = 0; 3246 BTRFS_I(inode)->generation = 0;
3245 ret = btrfs_update_inode(trans, root, inode); 3247 ret = btrfs_update_inode(trans, root, inode);
3248 if (ret) {
3249 /*
3250 * So theoretically we could recover from this, simply set the
3251 * super cache generation to 0 so we know to invalidate the
3252 * cache, but then we'd have to keep track of the block groups
3253 * that fail this way so we know we _have_ to reset this cache
3254 * before the next commit or risk reading stale cache. So to
3255 * limit our exposure to horrible edge cases lets just abort the
3256 * transaction, this only happens in really bad situations
3257 * anyway.
3258 */
3259 btrfs_abort_transaction(trans, root, ret);
3260 goto out_put;
3261 }
3246 WARN_ON(ret); 3262 WARN_ON(ret);
3247 3263
3248 if (i_size_read(inode) > 0) { 3264 if (i_size_read(inode) > 0) {
@@ -3309,6 +3325,32 @@ out:
3309 return ret; 3325 return ret;
3310} 3326}
3311 3327
3328int btrfs_setup_space_cache(struct btrfs_trans_handle *trans,
3329 struct btrfs_root *root)
3330{
3331 struct btrfs_block_group_cache *cache, *tmp;
3332 struct btrfs_transaction *cur_trans = trans->transaction;
3333 struct btrfs_path *path;
3334
3335 if (list_empty(&cur_trans->dirty_bgs) ||
3336 !btrfs_test_opt(root, SPACE_CACHE))
3337 return 0;
3338
3339 path = btrfs_alloc_path();
3340 if (!path)
3341 return -ENOMEM;
3342
3343 /* Could add new block groups, use _safe just in case */
3344 list_for_each_entry_safe(cache, tmp, &cur_trans->dirty_bgs,
3345 dirty_list) {
3346 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
3347 cache_save_setup(cache, trans, path);
3348 }
3349
3350 btrfs_free_path(path);
3351 return 0;
3352}
3353
3312int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 3354int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
3313 struct btrfs_root *root) 3355 struct btrfs_root *root)
3314{ 3356{
@@ -5094,7 +5136,11 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
5094 num_bytes = ALIGN(num_bytes, root->sectorsize); 5136 num_bytes = ALIGN(num_bytes, root->sectorsize);
5095 5137
5096 spin_lock(&BTRFS_I(inode)->lock); 5138 spin_lock(&BTRFS_I(inode)->lock);
5097 BTRFS_I(inode)->outstanding_extents++; 5139 nr_extents = (unsigned)div64_u64(num_bytes +
5140 BTRFS_MAX_EXTENT_SIZE - 1,
5141 BTRFS_MAX_EXTENT_SIZE);
5142 BTRFS_I(inode)->outstanding_extents += nr_extents;
5143 nr_extents = 0;
5098 5144
5099 if (BTRFS_I(inode)->outstanding_extents > 5145 if (BTRFS_I(inode)->outstanding_extents >
5100 BTRFS_I(inode)->reserved_extents) 5146 BTRFS_I(inode)->reserved_extents)
@@ -5239,6 +5285,9 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
5239 if (dropped > 0) 5285 if (dropped > 0)
5240 to_free += btrfs_calc_trans_metadata_size(root, dropped); 5286 to_free += btrfs_calc_trans_metadata_size(root, dropped);
5241 5287
5288 if (btrfs_test_is_dummy_root(root))
5289 return;
5290
5242 trace_btrfs_space_reservation(root->fs_info, "delalloc", 5291 trace_btrfs_space_reservation(root->fs_info, "delalloc",
5243 btrfs_ino(inode), to_free, 0); 5292 btrfs_ino(inode), to_free, 0);
5244 if (root->fs_info->quota_enabled) { 5293 if (root->fs_info->quota_enabled) {
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c7233ff1d533..d688cfe5d496 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4968,6 +4968,12 @@ static int release_extent_buffer(struct extent_buffer *eb)
4968 4968
4969 /* Should be safe to release our pages at this point */ 4969 /* Should be safe to release our pages at this point */
4970 btrfs_release_extent_buffer_page(eb); 4970 btrfs_release_extent_buffer_page(eb);
4971#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
4972 if (unlikely(test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags))) {
4973 __free_extent_buffer(eb);
4974 return 1;
4975 }
4976#endif
4971 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); 4977 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
4972 return 1; 4978 return 1;
4973 } 4979 }
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index b78bbbac900d..30982bbd31c3 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1811,22 +1811,10 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
1811 mutex_unlock(&inode->i_mutex); 1811 mutex_unlock(&inode->i_mutex);
1812 1812
1813 /* 1813 /*
1814 * we want to make sure fsync finds this change
1815 * but we haven't joined a transaction running right now.
1816 *
1817 * Later on, someone is sure to update the inode and get the
1818 * real transid recorded.
1819 *
1820 * We set last_trans now to the fs_info generation + 1,
1821 * this will either be one more than the running transaction
1822 * or the generation used for the next transaction if there isn't
1823 * one running right now.
1824 *
1825 * We also have to set last_sub_trans to the current log transid, 1814 * We also have to set last_sub_trans to the current log transid,
1826 * otherwise subsequent syncs to a file that's been synced in this 1815 * otherwise subsequent syncs to a file that's been synced in this
1827 * transaction will appear to have already occured. 1816 * transaction will appear to have already occured.
1828 */ 1817 */
1829 BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
1830 BTRFS_I(inode)->last_sub_trans = root->log_transid; 1818 BTRFS_I(inode)->last_sub_trans = root->log_transid;
1831 if (num_written > 0) { 1819 if (num_written > 0) {
1832 err = generic_write_sync(file, pos, num_written); 1820 err = generic_write_sync(file, pos, num_written);
@@ -1959,25 +1947,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1959 atomic_inc(&root->log_batch); 1947 atomic_inc(&root->log_batch);
1960 1948
1961 /* 1949 /*
1962 * check the transaction that last modified this inode 1950 * If the last transaction that changed this file was before the current
1963 * and see if its already been committed 1951 * transaction and we have the full sync flag set in our inode, we can
1964 */ 1952 * bail out now without any syncing.
1965 if (!BTRFS_I(inode)->last_trans) { 1953 *
1966 mutex_unlock(&inode->i_mutex); 1954 * Note that we can't bail out if the full sync flag isn't set. This is
1967 goto out; 1955 * because when the full sync flag is set we start all ordered extents
1968 } 1956 * and wait for them to fully complete - when they complete they update
1969 1957 * the inode's last_trans field through:
1970 /* 1958 *
1971 * if the last transaction that changed this file was before 1959 * btrfs_finish_ordered_io() ->
1972 * the current transaction, we can bail out now without any 1960 * btrfs_update_inode_fallback() ->
1973 * syncing 1961 * btrfs_update_inode() ->
1962 * btrfs_set_inode_last_trans()
1963 *
1964 * So we are sure that last_trans is up to date and can do this check to
1965 * bail out safely. For the fast path, when the full sync flag is not
1966 * set in our inode, we can not do it because we start only our ordered
1967 * extents and don't wait for them to complete (that is when
1968 * btrfs_finish_ordered_io runs), so here at this point their last_trans
1969 * value might be less than or equals to fs_info->last_trans_committed,
1970 * and setting a speculative last_trans for an inode when a buffered
1971 * write is made (such as fs_info->generation + 1 for example) would not
1972 * be reliable since after setting the value and before fsync is called
1973 * any number of transactions can start and commit (transaction kthread
1974 * commits the current transaction periodically), and a transaction
1975 * commit does not start nor waits for ordered extents to complete.
1974 */ 1976 */
1975 smp_mb(); 1977 smp_mb();
1976 if (btrfs_inode_in_log(inode, root->fs_info->generation) || 1978 if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
1977 BTRFS_I(inode)->last_trans <= 1979 (full_sync && BTRFS_I(inode)->last_trans <=
1978 root->fs_info->last_trans_committed) { 1980 root->fs_info->last_trans_committed)) {
1979 BTRFS_I(inode)->last_trans = 0;
1980
1981 /* 1981 /*
1982 * We'v had everything committed since the last time we were 1982 * We'v had everything committed since the last time we were
1983 * modified so clear this flag in case it was set for whatever 1983 * modified so clear this flag in case it was set for whatever
@@ -2275,6 +2275,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2275 bool same_page; 2275 bool same_page;
2276 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); 2276 bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
2277 u64 ino_size; 2277 u64 ino_size;
2278 bool truncated_page = false;
2279 bool updated_inode = false;
2278 2280
2279 ret = btrfs_wait_ordered_range(inode, offset, len); 2281 ret = btrfs_wait_ordered_range(inode, offset, len);
2280 if (ret) 2282 if (ret)
@@ -2306,13 +2308,18 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2306 * entire page. 2308 * entire page.
2307 */ 2309 */
2308 if (same_page && len < PAGE_CACHE_SIZE) { 2310 if (same_page && len < PAGE_CACHE_SIZE) {
2309 if (offset < ino_size) 2311 if (offset < ino_size) {
2312 truncated_page = true;
2310 ret = btrfs_truncate_page(inode, offset, len, 0); 2313 ret = btrfs_truncate_page(inode, offset, len, 0);
2314 } else {
2315 ret = 0;
2316 }
2311 goto out_only_mutex; 2317 goto out_only_mutex;
2312 } 2318 }
2313 2319
2314 /* zero back part of the first page */ 2320 /* zero back part of the first page */
2315 if (offset < ino_size) { 2321 if (offset < ino_size) {
2322 truncated_page = true;
2316 ret = btrfs_truncate_page(inode, offset, 0, 0); 2323 ret = btrfs_truncate_page(inode, offset, 0, 0);
2317 if (ret) { 2324 if (ret) {
2318 mutex_unlock(&inode->i_mutex); 2325 mutex_unlock(&inode->i_mutex);
@@ -2348,6 +2355,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2348 if (!ret) { 2355 if (!ret) {
2349 /* zero the front end of the last page */ 2356 /* zero the front end of the last page */
2350 if (tail_start + tail_len < ino_size) { 2357 if (tail_start + tail_len < ino_size) {
2358 truncated_page = true;
2351 ret = btrfs_truncate_page(inode, 2359 ret = btrfs_truncate_page(inode,
2352 tail_start + tail_len, 0, 1); 2360 tail_start + tail_len, 0, 1);
2353 if (ret) 2361 if (ret)
@@ -2357,8 +2365,8 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2357 } 2365 }
2358 2366
2359 if (lockend < lockstart) { 2367 if (lockend < lockstart) {
2360 mutex_unlock(&inode->i_mutex); 2368 ret = 0;
2361 return 0; 2369 goto out_only_mutex;
2362 } 2370 }
2363 2371
2364 while (1) { 2372 while (1) {
@@ -2506,6 +2514,7 @@ out_trans:
2506 2514
2507 trans->block_rsv = &root->fs_info->trans_block_rsv; 2515 trans->block_rsv = &root->fs_info->trans_block_rsv;
2508 ret = btrfs_update_inode(trans, root, inode); 2516 ret = btrfs_update_inode(trans, root, inode);
2517 updated_inode = true;
2509 btrfs_end_transaction(trans, root); 2518 btrfs_end_transaction(trans, root);
2510 btrfs_btree_balance_dirty(root); 2519 btrfs_btree_balance_dirty(root);
2511out_free: 2520out_free:
@@ -2515,6 +2524,22 @@ out:
2515 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, 2524 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
2516 &cached_state, GFP_NOFS); 2525 &cached_state, GFP_NOFS);
2517out_only_mutex: 2526out_only_mutex:
2527 if (!updated_inode && truncated_page && !ret && !err) {
2528 /*
2529 * If we only end up zeroing part of a page, we still need to
2530 * update the inode item, so that all the time fields are
2531 * updated as well as the necessary btrfs inode in memory fields
2532 * for detecting, at fsync time, if the inode isn't yet in the
2533 * log tree or it's there but not up to date.
2534 */
2535 trans = btrfs_start_transaction(root, 1);
2536 if (IS_ERR(trans)) {
2537 err = PTR_ERR(trans);
2538 } else {
2539 err = btrfs_update_inode(trans, root, inode);
2540 ret = btrfs_end_transaction(trans, root);
2541 }
2542 }
2518 mutex_unlock(&inode->i_mutex); 2543 mutex_unlock(&inode->i_mutex);
2519 if (ret && !err) 2544 if (ret && !err)
2520 err = ret; 2545 err = ret;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index a85c23dfcddb..d2e732d7af52 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -108,6 +108,13 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
108 108
109static int btrfs_dirty_inode(struct inode *inode); 109static int btrfs_dirty_inode(struct inode *inode);
110 110
111#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
112void btrfs_test_inode_set_ops(struct inode *inode)
113{
114 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
115}
116#endif
117
111static int btrfs_init_inode_security(struct btrfs_trans_handle *trans, 118static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
112 struct inode *inode, struct inode *dir, 119 struct inode *inode, struct inode *dir,
113 const struct qstr *qstr) 120 const struct qstr *qstr)
@@ -1542,30 +1549,17 @@ static void btrfs_split_extent_hook(struct inode *inode,
1542 u64 new_size; 1549 u64 new_size;
1543 1550
1544 /* 1551 /*
1545 * We need the largest size of the remaining extent to see if we 1552 * See the explanation in btrfs_merge_extent_hook, the same
1546 * need to add a new outstanding extent. Think of the following 1553 * applies here, just in reverse.
1547 * case
1548 *
1549 * [MEAX_EXTENT_SIZEx2 - 4k][4k]
1550 *
1551 * The new_size would just be 4k and we'd think we had enough
1552 * outstanding extents for this if we only took one side of the
1553 * split, same goes for the other direction. We need to see if
1554 * the larger size still is the same amount of extents as the
1555 * original size, because if it is we need to add a new
1556 * outstanding extent. But if we split up and the larger size
1557 * is less than the original then we are good to go since we've
1558 * already accounted for the extra extent in our original
1559 * accounting.
1560 */ 1554 */
1561 new_size = orig->end - split + 1; 1555 new_size = orig->end - split + 1;
1562 if ((split - orig->start) > new_size) 1556 num_extents = div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1563 new_size = split - orig->start;
1564
1565 num_extents = div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1566 BTRFS_MAX_EXTENT_SIZE); 1557 BTRFS_MAX_EXTENT_SIZE);
1567 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1558 new_size = split - orig->start;
1568 BTRFS_MAX_EXTENT_SIZE) < num_extents) 1559 num_extents += div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1560 BTRFS_MAX_EXTENT_SIZE);
1561 if (div64_u64(size + BTRFS_MAX_EXTENT_SIZE - 1,
1562 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1569 return; 1563 return;
1570 } 1564 }
1571 1565
@@ -1591,8 +1585,10 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1591 if (!(other->state & EXTENT_DELALLOC)) 1585 if (!(other->state & EXTENT_DELALLOC))
1592 return; 1586 return;
1593 1587
1594 old_size = other->end - other->start + 1; 1588 if (new->start > other->start)
1595 new_size = old_size + (new->end - new->start + 1); 1589 new_size = new->end - other->start + 1;
1590 else
1591 new_size = other->end - new->start + 1;
1596 1592
1597 /* we're not bigger than the max, unreserve the space and go */ 1593 /* we're not bigger than the max, unreserve the space and go */
1598 if (new_size <= BTRFS_MAX_EXTENT_SIZE) { 1594 if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
@@ -1603,13 +1599,32 @@ static void btrfs_merge_extent_hook(struct inode *inode,
1603 } 1599 }
1604 1600
1605 /* 1601 /*
1606 * If we grew by another max_extent, just return, we want to keep that 1602 * We have to add up either side to figure out how many extents were
1607 * reserved amount. 1603 * accounted for before we merged into one big extent. If the number of
1604 * extents we accounted for is <= the amount we need for the new range
1605 * then we can return, otherwise drop. Think of it like this
1606 *
1607 * [ 4k][MAX_SIZE]
1608 *
1609 * So we've grown the extent by a MAX_SIZE extent, this would mean we
1610 * need 2 outstanding extents, on one side we have 1 and the other side
1611 * we have 1 so they are == and we can return. But in this case
1612 *
1613 * [MAX_SIZE+4k][MAX_SIZE+4k]
1614 *
1615 * Each range on their own accounts for 2 extents, but merged together
1616 * they are only 3 extents worth of accounting, so we need to drop in
1617 * this case.
1608 */ 1618 */
1619 old_size = other->end - other->start + 1;
1609 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1, 1620 num_extents = div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1610 BTRFS_MAX_EXTENT_SIZE); 1621 BTRFS_MAX_EXTENT_SIZE);
1622 old_size = new->end - new->start + 1;
1623 num_extents += div64_u64(old_size + BTRFS_MAX_EXTENT_SIZE - 1,
1624 BTRFS_MAX_EXTENT_SIZE);
1625
1611 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1, 1626 if (div64_u64(new_size + BTRFS_MAX_EXTENT_SIZE - 1,
1612 BTRFS_MAX_EXTENT_SIZE) > num_extents) 1627 BTRFS_MAX_EXTENT_SIZE) >= num_extents)
1613 return; 1628 return;
1614 1629
1615 spin_lock(&BTRFS_I(inode)->lock); 1630 spin_lock(&BTRFS_I(inode)->lock);
@@ -1686,6 +1701,10 @@ static void btrfs_set_bit_hook(struct inode *inode,
1686 spin_unlock(&BTRFS_I(inode)->lock); 1701 spin_unlock(&BTRFS_I(inode)->lock);
1687 } 1702 }
1688 1703
1704 /* For sanity tests */
1705 if (btrfs_test_is_dummy_root(root))
1706 return;
1707
1689 __percpu_counter_add(&root->fs_info->delalloc_bytes, len, 1708 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1690 root->fs_info->delalloc_batch); 1709 root->fs_info->delalloc_batch);
1691 spin_lock(&BTRFS_I(inode)->lock); 1710 spin_lock(&BTRFS_I(inode)->lock);
@@ -1741,6 +1760,10 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1741 root != root->fs_info->tree_root) 1760 root != root->fs_info->tree_root)
1742 btrfs_delalloc_release_metadata(inode, len); 1761 btrfs_delalloc_release_metadata(inode, len);
1743 1762
1763 /* For sanity tests. */
1764 if (btrfs_test_is_dummy_root(root))
1765 return;
1766
1744 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID 1767 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1745 && do_list && !(state->state & EXTENT_NORESERVE)) 1768 && do_list && !(state->state & EXTENT_NORESERVE))
1746 btrfs_free_reserved_data_space(inode, len); 1769 btrfs_free_reserved_data_space(inode, len);
@@ -7213,7 +7236,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7213 u64 start = iblock << inode->i_blkbits; 7236 u64 start = iblock << inode->i_blkbits;
7214 u64 lockstart, lockend; 7237 u64 lockstart, lockend;
7215 u64 len = bh_result->b_size; 7238 u64 len = bh_result->b_size;
7216 u64 orig_len = len; 7239 u64 *outstanding_extents = NULL;
7217 int unlock_bits = EXTENT_LOCKED; 7240 int unlock_bits = EXTENT_LOCKED;
7218 int ret = 0; 7241 int ret = 0;
7219 7242
@@ -7225,6 +7248,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7225 lockstart = start; 7248 lockstart = start;
7226 lockend = start + len - 1; 7249 lockend = start + len - 1;
7227 7250
7251 if (current->journal_info) {
7252 /*
7253 * Need to pull our outstanding extents and set journal_info to NULL so
7254 * that anything that needs to check if there's a transction doesn't get
7255 * confused.
7256 */
7257 outstanding_extents = current->journal_info;
7258 current->journal_info = NULL;
7259 }
7260
7228 /* 7261 /*
7229 * If this errors out it's because we couldn't invalidate pagecache for 7262 * If this errors out it's because we couldn't invalidate pagecache for
7230 * this range and we need to fallback to buffered. 7263 * this range and we need to fallback to buffered.
@@ -7285,7 +7318,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
7285 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && 7318 ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
7286 em->block_start != EXTENT_MAP_HOLE)) { 7319 em->block_start != EXTENT_MAP_HOLE)) {
7287 int type; 7320 int type;
7288 int ret;
7289 u64 block_start, orig_start, orig_block_len, ram_bytes; 7321 u64 block_start, orig_start, orig_block_len, ram_bytes;
7290 7322
7291 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) 7323 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -7349,11 +7381,20 @@ unlock:
7349 if (start + len > i_size_read(inode)) 7381 if (start + len > i_size_read(inode))
7350 i_size_write(inode, start + len); 7382 i_size_write(inode, start + len);
7351 7383
7352 if (len < orig_len) { 7384 /*
7385 * If we have an outstanding_extents count still set then we're
7386 * within our reservation, otherwise we need to adjust our inode
7387 * counter appropriately.
7388 */
7389 if (*outstanding_extents) {
7390 (*outstanding_extents)--;
7391 } else {
7353 spin_lock(&BTRFS_I(inode)->lock); 7392 spin_lock(&BTRFS_I(inode)->lock);
7354 BTRFS_I(inode)->outstanding_extents++; 7393 BTRFS_I(inode)->outstanding_extents++;
7355 spin_unlock(&BTRFS_I(inode)->lock); 7394 spin_unlock(&BTRFS_I(inode)->lock);
7356 } 7395 }
7396
7397 current->journal_info = outstanding_extents;
7357 btrfs_free_reserved_data_space(inode, len); 7398 btrfs_free_reserved_data_space(inode, len);
7358 } 7399 }
7359 7400
@@ -7377,6 +7418,8 @@ unlock:
7377unlock_err: 7418unlock_err:
7378 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 7419 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
7379 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 7420 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
7421 if (outstanding_extents)
7422 current->journal_info = outstanding_extents;
7380 return ret; 7423 return ret;
7381} 7424}
7382 7425
@@ -8076,6 +8119,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8076{ 8119{
8077 struct file *file = iocb->ki_filp; 8120 struct file *file = iocb->ki_filp;
8078 struct inode *inode = file->f_mapping->host; 8121 struct inode *inode = file->f_mapping->host;
8122 u64 outstanding_extents = 0;
8079 size_t count = 0; 8123 size_t count = 0;
8080 int flags = 0; 8124 int flags = 0;
8081 bool wakeup = true; 8125 bool wakeup = true;
@@ -8113,6 +8157,16 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8113 ret = btrfs_delalloc_reserve_space(inode, count); 8157 ret = btrfs_delalloc_reserve_space(inode, count);
8114 if (ret) 8158 if (ret)
8115 goto out; 8159 goto out;
8160 outstanding_extents = div64_u64(count +
8161 BTRFS_MAX_EXTENT_SIZE - 1,
8162 BTRFS_MAX_EXTENT_SIZE);
8163
8164 /*
8165 * We need to know how many extents we reserved so that we can
8166 * do the accounting properly if we go over the number we
8167 * originally calculated. Abuse current->journal_info for this.
8168 */
8169 current->journal_info = &outstanding_extents;
8116 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK, 8170 } else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
8117 &BTRFS_I(inode)->runtime_flags)) { 8171 &BTRFS_I(inode)->runtime_flags)) {
8118 inode_dio_done(inode); 8172 inode_dio_done(inode);
@@ -8125,6 +8179,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
8125 iter, offset, btrfs_get_blocks_direct, NULL, 8179 iter, offset, btrfs_get_blocks_direct, NULL,
8126 btrfs_submit_direct, flags); 8180 btrfs_submit_direct, flags);
8127 if (rw & WRITE) { 8181 if (rw & WRITE) {
8182 current->journal_info = NULL;
8128 if (ret < 0 && ret != -EIOCBQUEUED) 8183 if (ret < 0 && ret != -EIOCBQUEUED)
8129 btrfs_delalloc_release_space(inode, count); 8184 btrfs_delalloc_release_space(inode, count);
8130 else if (ret >= 0 && (size_t)ret < count) 8185 else if (ret >= 0 && (size_t)ret < count)
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 534544e08f76..157cc54fc634 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -452,9 +452,7 @@ void btrfs_get_logged_extents(struct inode *inode,
452 continue; 452 continue;
453 if (entry_end(ordered) <= start) 453 if (entry_end(ordered) <= start)
454 break; 454 break;
455 if (!list_empty(&ordered->log_list)) 455 if (test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
456 continue;
457 if (test_bit(BTRFS_ORDERED_LOGGED, &ordered->flags))
458 continue; 456 continue;
459 list_add(&ordered->log_list, logged_list); 457 list_add(&ordered->log_list, logged_list);
460 atomic_inc(&ordered->refs); 458 atomic_inc(&ordered->refs);
@@ -511,8 +509,7 @@ void btrfs_wait_logged_extents(struct btrfs_trans_handle *trans,
511 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, 509 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
512 &ordered->flags)); 510 &ordered->flags));
513 511
514 if (!test_and_set_bit(BTRFS_ORDERED_LOGGED, &ordered->flags)) 512 list_add_tail(&ordered->trans_list, &trans->ordered);
515 list_add_tail(&ordered->trans_list, &trans->ordered);
516 spin_lock_irq(&log->log_extents_lock[index]); 513 spin_lock_irq(&log->log_extents_lock[index]);
517 } 514 }
518 spin_unlock_irq(&log->log_extents_lock[index]); 515 spin_unlock_irq(&log->log_extents_lock[index]);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 97159a8e91d4..058c79eecbfb 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1259,7 +1259,7 @@ static int comp_oper(struct btrfs_qgroup_operation *oper1,
1259 if (oper1->seq < oper2->seq) 1259 if (oper1->seq < oper2->seq)
1260 return -1; 1260 return -1;
1261 if (oper1->seq > oper2->seq) 1261 if (oper1->seq > oper2->seq)
1262 return -1; 1262 return 1;
1263 if (oper1->ref_root < oper2->ref_root) 1263 if (oper1->ref_root < oper2->ref_root)
1264 return -1; 1264 return -1;
1265 if (oper1->ref_root > oper2->ref_root) 1265 if (oper1->ref_root > oper2->ref_root)
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index fe5857223515..d6033f540cc7 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -230,6 +230,7 @@ struct pending_dir_move {
230 u64 parent_ino; 230 u64 parent_ino;
231 u64 ino; 231 u64 ino;
232 u64 gen; 232 u64 gen;
233 bool is_orphan;
233 struct list_head update_refs; 234 struct list_head update_refs;
234}; 235};
235 236
@@ -2984,7 +2985,8 @@ static int add_pending_dir_move(struct send_ctx *sctx,
2984 u64 ino_gen, 2985 u64 ino_gen,
2985 u64 parent_ino, 2986 u64 parent_ino,
2986 struct list_head *new_refs, 2987 struct list_head *new_refs,
2987 struct list_head *deleted_refs) 2988 struct list_head *deleted_refs,
2989 const bool is_orphan)
2988{ 2990{
2989 struct rb_node **p = &sctx->pending_dir_moves.rb_node; 2991 struct rb_node **p = &sctx->pending_dir_moves.rb_node;
2990 struct rb_node *parent = NULL; 2992 struct rb_node *parent = NULL;
@@ -2999,6 +3001,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
2999 pm->parent_ino = parent_ino; 3001 pm->parent_ino = parent_ino;
3000 pm->ino = ino; 3002 pm->ino = ino;
3001 pm->gen = ino_gen; 3003 pm->gen = ino_gen;
3004 pm->is_orphan = is_orphan;
3002 INIT_LIST_HEAD(&pm->list); 3005 INIT_LIST_HEAD(&pm->list);
3003 INIT_LIST_HEAD(&pm->update_refs); 3006 INIT_LIST_HEAD(&pm->update_refs);
3004 RB_CLEAR_NODE(&pm->node); 3007 RB_CLEAR_NODE(&pm->node);
@@ -3131,16 +3134,20 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3131 rmdir_ino = dm->rmdir_ino; 3134 rmdir_ino = dm->rmdir_ino;
3132 free_waiting_dir_move(sctx, dm); 3135 free_waiting_dir_move(sctx, dm);
3133 3136
3134 ret = get_first_ref(sctx->parent_root, pm->ino, 3137 if (pm->is_orphan) {
3135 &parent_ino, &parent_gen, name); 3138 ret = gen_unique_name(sctx, pm->ino,
3136 if (ret < 0) 3139 pm->gen, from_path);
3137 goto out; 3140 } else {
3138 3141 ret = get_first_ref(sctx->parent_root, pm->ino,
3139 ret = get_cur_path(sctx, parent_ino, parent_gen, 3142 &parent_ino, &parent_gen, name);
3140 from_path); 3143 if (ret < 0)
3141 if (ret < 0) 3144 goto out;
3142 goto out; 3145 ret = get_cur_path(sctx, parent_ino, parent_gen,
3143 ret = fs_path_add_path(from_path, name); 3146 from_path);
3147 if (ret < 0)
3148 goto out;
3149 ret = fs_path_add_path(from_path, name);
3150 }
3144 if (ret < 0) 3151 if (ret < 0)
3145 goto out; 3152 goto out;
3146 3153
@@ -3150,7 +3157,8 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm)
3150 LIST_HEAD(deleted_refs); 3157 LIST_HEAD(deleted_refs);
3151 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID); 3158 ASSERT(ancestor > BTRFS_FIRST_FREE_OBJECTID);
3152 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor, 3159 ret = add_pending_dir_move(sctx, pm->ino, pm->gen, ancestor,
3153 &pm->update_refs, &deleted_refs); 3160 &pm->update_refs, &deleted_refs,
3161 pm->is_orphan);
3154 if (ret < 0) 3162 if (ret < 0)
3155 goto out; 3163 goto out;
3156 if (rmdir_ino) { 3164 if (rmdir_ino) {
@@ -3283,6 +3291,127 @@ out:
3283 return ret; 3291 return ret;
3284} 3292}
3285 3293
3294/*
3295 * We might need to delay a directory rename even when no ancestor directory
3296 * (in the send root) with a higher inode number than ours (sctx->cur_ino) was
3297 * renamed. This happens when we rename a directory to the old name (the name
3298 * in the parent root) of some other unrelated directory that got its rename
3299 * delayed due to some ancestor with higher number that got renamed.
3300 *
3301 * Example:
3302 *
3303 * Parent snapshot:
3304 * . (ino 256)
3305 * |---- a/ (ino 257)
3306 * | |---- file (ino 260)
3307 * |
3308 * |---- b/ (ino 258)
3309 * |---- c/ (ino 259)
3310 *
3311 * Send snapshot:
3312 * . (ino 256)
3313 * |---- a/ (ino 258)
3314 * |---- x/ (ino 259)
3315 * |---- y/ (ino 257)
3316 * |----- file (ino 260)
3317 *
3318 * Here we can not rename 258 from 'b' to 'a' without the rename of inode 257
3319 * from 'a' to 'x/y' happening first, which in turn depends on the rename of
3320 * inode 259 from 'c' to 'x'. So the order of rename commands the send stream
3321 * must issue is:
3322 *
3323 * 1 - rename 259 from 'c' to 'x'
3324 * 2 - rename 257 from 'a' to 'x/y'
3325 * 3 - rename 258 from 'b' to 'a'
3326 *
3327 * Returns 1 if the rename of sctx->cur_ino needs to be delayed, 0 if it can
3328 * be done right away and < 0 on error.
3329 */
3330static int wait_for_dest_dir_move(struct send_ctx *sctx,
3331 struct recorded_ref *parent_ref,
3332 const bool is_orphan)
3333{
3334 struct btrfs_path *path;
3335 struct btrfs_key key;
3336 struct btrfs_key di_key;
3337 struct btrfs_dir_item *di;
3338 u64 left_gen;
3339 u64 right_gen;
3340 int ret = 0;
3341
3342 if (RB_EMPTY_ROOT(&sctx->waiting_dir_moves))
3343 return 0;
3344
3345 path = alloc_path_for_send();
3346 if (!path)
3347 return -ENOMEM;
3348
3349 key.objectid = parent_ref->dir;
3350 key.type = BTRFS_DIR_ITEM_KEY;
3351 key.offset = btrfs_name_hash(parent_ref->name, parent_ref->name_len);
3352
3353 ret = btrfs_search_slot(NULL, sctx->parent_root, &key, path, 0, 0);
3354 if (ret < 0) {
3355 goto out;
3356 } else if (ret > 0) {
3357 ret = 0;
3358 goto out;
3359 }
3360
3361 di = btrfs_match_dir_item_name(sctx->parent_root, path,
3362 parent_ref->name, parent_ref->name_len);
3363 if (!di) {
3364 ret = 0;
3365 goto out;
3366 }
3367 /*
3368 * di_key.objectid has the number of the inode that has a dentry in the
3369 * parent directory with the same name that sctx->cur_ino is being
3370 * renamed to. We need to check if that inode is in the send root as
3371 * well and if it is currently marked as an inode with a pending rename,
3372 * if it is, we need to delay the rename of sctx->cur_ino as well, so
3373 * that it happens after that other inode is renamed.
3374 */
3375 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &di_key);
3376 if (di_key.type != BTRFS_INODE_ITEM_KEY) {
3377 ret = 0;
3378 goto out;
3379 }
3380
3381 ret = get_inode_info(sctx->parent_root, di_key.objectid, NULL,
3382 &left_gen, NULL, NULL, NULL, NULL);
3383 if (ret < 0)
3384 goto out;
3385 ret = get_inode_info(sctx->send_root, di_key.objectid, NULL,
3386 &right_gen, NULL, NULL, NULL, NULL);
3387 if (ret < 0) {
3388 if (ret == -ENOENT)
3389 ret = 0;
3390 goto out;
3391 }
3392
3393 /* Different inode, no need to delay the rename of sctx->cur_ino */
3394 if (right_gen != left_gen) {
3395 ret = 0;
3396 goto out;
3397 }
3398
3399 if (is_waiting_for_move(sctx, di_key.objectid)) {
3400 ret = add_pending_dir_move(sctx,
3401 sctx->cur_ino,
3402 sctx->cur_inode_gen,
3403 di_key.objectid,
3404 &sctx->new_refs,
3405 &sctx->deleted_refs,
3406 is_orphan);
3407 if (!ret)
3408 ret = 1;
3409 }
3410out:
3411 btrfs_free_path(path);
3412 return ret;
3413}
3414
3286static int wait_for_parent_move(struct send_ctx *sctx, 3415static int wait_for_parent_move(struct send_ctx *sctx,
3287 struct recorded_ref *parent_ref) 3416 struct recorded_ref *parent_ref)
3288{ 3417{
@@ -3349,7 +3478,8 @@ out:
3349 sctx->cur_inode_gen, 3478 sctx->cur_inode_gen,
3350 ino, 3479 ino,
3351 &sctx->new_refs, 3480 &sctx->new_refs,
3352 &sctx->deleted_refs); 3481 &sctx->deleted_refs,
3482 false);
3353 if (!ret) 3483 if (!ret)
3354 ret = 1; 3484 ret = 1;
3355 } 3485 }
@@ -3372,6 +3502,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
3372 int did_overwrite = 0; 3502 int did_overwrite = 0;
3373 int is_orphan = 0; 3503 int is_orphan = 0;
3374 u64 last_dir_ino_rm = 0; 3504 u64 last_dir_ino_rm = 0;
3505 bool can_rename = true;
3375 3506
3376verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); 3507verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3377 3508
@@ -3490,12 +3621,22 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3490 } 3621 }
3491 } 3622 }
3492 3623
3624 if (S_ISDIR(sctx->cur_inode_mode) && sctx->parent_root) {
3625 ret = wait_for_dest_dir_move(sctx, cur, is_orphan);
3626 if (ret < 0)
3627 goto out;
3628 if (ret == 1) {
3629 can_rename = false;
3630 *pending_move = 1;
3631 }
3632 }
3633
3493 /* 3634 /*
3494 * link/move the ref to the new place. If we have an orphan 3635 * link/move the ref to the new place. If we have an orphan
3495 * inode, move it and update valid_path. If not, link or move 3636 * inode, move it and update valid_path. If not, link or move
3496 * it depending on the inode mode. 3637 * it depending on the inode mode.
3497 */ 3638 */
3498 if (is_orphan) { 3639 if (is_orphan && can_rename) {
3499 ret = send_rename(sctx, valid_path, cur->full_path); 3640 ret = send_rename(sctx, valid_path, cur->full_path);
3500 if (ret < 0) 3641 if (ret < 0)
3501 goto out; 3642 goto out;
@@ -3503,7 +3644,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
3503 ret = fs_path_copy(valid_path, cur->full_path); 3644 ret = fs_path_copy(valid_path, cur->full_path);
3504 if (ret < 0) 3645 if (ret < 0)
3505 goto out; 3646 goto out;
3506 } else { 3647 } else if (can_rename) {
3507 if (S_ISDIR(sctx->cur_inode_mode)) { 3648 if (S_ISDIR(sctx->cur_inode_mode)) {
3508 /* 3649 /*
3509 * Dirs can't be linked, so move it. For moved 3650 * Dirs can't be linked, so move it. For moved
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index a116b55ce788..054fc0d97131 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -911,6 +911,197 @@ out:
911 return ret; 911 return ret;
912} 912}
913 913
914static int test_extent_accounting(void)
915{
916 struct inode *inode = NULL;
917 struct btrfs_root *root = NULL;
918 int ret = -ENOMEM;
919
920 inode = btrfs_new_test_inode();
921 if (!inode) {
922 test_msg("Couldn't allocate inode\n");
923 return ret;
924 }
925
926 root = btrfs_alloc_dummy_root();
927 if (IS_ERR(root)) {
928 test_msg("Couldn't allocate root\n");
929 goto out;
930 }
931
932 root->fs_info = btrfs_alloc_dummy_fs_info();
933 if (!root->fs_info) {
934 test_msg("Couldn't allocate dummy fs info\n");
935 goto out;
936 }
937
938 BTRFS_I(inode)->root = root;
939 btrfs_test_inode_set_ops(inode);
940
941 /* [BTRFS_MAX_EXTENT_SIZE] */
942 BTRFS_I(inode)->outstanding_extents++;
943 ret = btrfs_set_extent_delalloc(inode, 0, BTRFS_MAX_EXTENT_SIZE - 1,
944 NULL);
945 if (ret) {
946 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
947 goto out;
948 }
949 if (BTRFS_I(inode)->outstanding_extents != 1) {
950 ret = -EINVAL;
951 test_msg("Miscount, wanted 1, got %u\n",
952 BTRFS_I(inode)->outstanding_extents);
953 goto out;
954 }
955
956 /* [BTRFS_MAX_EXTENT_SIZE][4k] */
957 BTRFS_I(inode)->outstanding_extents++;
958 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE,
959 BTRFS_MAX_EXTENT_SIZE + 4095, NULL);
960 if (ret) {
961 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
962 goto out;
963 }
964 if (BTRFS_I(inode)->outstanding_extents != 2) {
965 ret = -EINVAL;
966 test_msg("Miscount, wanted 2, got %u\n",
967 BTRFS_I(inode)->outstanding_extents);
968 goto out;
969 }
970
971 /* [BTRFS_MAX_EXTENT_SIZE/2][4K HOLE][the rest] */
972 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
973 BTRFS_MAX_EXTENT_SIZE >> 1,
974 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
975 EXTENT_DELALLOC | EXTENT_DIRTY |
976 EXTENT_UPTODATE | EXTENT_DO_ACCOUNTING, 0, 0,
977 NULL, GFP_NOFS);
978 if (ret) {
979 test_msg("clear_extent_bit returned %d\n", ret);
980 goto out;
981 }
982 if (BTRFS_I(inode)->outstanding_extents != 2) {
983 ret = -EINVAL;
984 test_msg("Miscount, wanted 2, got %u\n",
985 BTRFS_I(inode)->outstanding_extents);
986 goto out;
987 }
988
989 /* [BTRFS_MAX_EXTENT_SIZE][4K] */
990 BTRFS_I(inode)->outstanding_extents++;
991 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE >> 1,
992 (BTRFS_MAX_EXTENT_SIZE >> 1) + 4095,
993 NULL);
994 if (ret) {
995 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
996 goto out;
997 }
998 if (BTRFS_I(inode)->outstanding_extents != 2) {
999 ret = -EINVAL;
1000 test_msg("Miscount, wanted 2, got %u\n",
1001 BTRFS_I(inode)->outstanding_extents);
1002 goto out;
1003 }
1004
1005 /*
1006 * [BTRFS_MAX_EXTENT_SIZE+4K][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4K]
1007 *
1008 * I'm artificially adding 2 to outstanding_extents because in the
1009 * buffered IO case we'd add things up as we go, but I don't feel like
1010 * doing that here, this isn't the interesting case we want to test.
1011 */
1012 BTRFS_I(inode)->outstanding_extents += 2;
1013 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE + 8192,
1014 (BTRFS_MAX_EXTENT_SIZE << 1) + 12287,
1015 NULL);
1016 if (ret) {
1017 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1018 goto out;
1019 }
1020 if (BTRFS_I(inode)->outstanding_extents != 4) {
1021 ret = -EINVAL;
1022 test_msg("Miscount, wanted 4, got %u\n",
1023 BTRFS_I(inode)->outstanding_extents);
1024 goto out;
1025 }
1026
1027 /* [BTRFS_MAX_EXTENT_SIZE+4k][4k][BTRFS_MAX_EXTENT_SIZE+4k] */
1028 BTRFS_I(inode)->outstanding_extents++;
1029 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1030 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1031 if (ret) {
1032 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1033 goto out;
1034 }
1035 if (BTRFS_I(inode)->outstanding_extents != 3) {
1036 ret = -EINVAL;
1037 test_msg("Miscount, wanted 3, got %u\n",
1038 BTRFS_I(inode)->outstanding_extents);
1039 goto out;
1040 }
1041
1042 /* [BTRFS_MAX_EXTENT_SIZE+4k][4K HOLE][BTRFS_MAX_EXTENT_SIZE+4k] */
1043 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree,
1044 BTRFS_MAX_EXTENT_SIZE+4096,
1045 BTRFS_MAX_EXTENT_SIZE+8191,
1046 EXTENT_DIRTY | EXTENT_DELALLOC |
1047 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1048 NULL, GFP_NOFS);
1049 if (ret) {
1050 test_msg("clear_extent_bit returned %d\n", ret);
1051 goto out;
1052 }
1053 if (BTRFS_I(inode)->outstanding_extents != 4) {
1054 ret = -EINVAL;
1055 test_msg("Miscount, wanted 4, got %u\n",
1056 BTRFS_I(inode)->outstanding_extents);
1057 goto out;
1058 }
1059
1060 /*
1061 * Refill the hole again just for good measure, because I thought it
1062 * might fail and I'd rather satisfy my paranoia at this point.
1063 */
1064 BTRFS_I(inode)->outstanding_extents++;
1065 ret = btrfs_set_extent_delalloc(inode, BTRFS_MAX_EXTENT_SIZE+4096,
1066 BTRFS_MAX_EXTENT_SIZE+8191, NULL);
1067 if (ret) {
1068 test_msg("btrfs_set_extent_delalloc returned %d\n", ret);
1069 goto out;
1070 }
1071 if (BTRFS_I(inode)->outstanding_extents != 3) {
1072 ret = -EINVAL;
1073 test_msg("Miscount, wanted 3, got %u\n",
1074 BTRFS_I(inode)->outstanding_extents);
1075 goto out;
1076 }
1077
1078 /* Empty */
1079 ret = clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1080 EXTENT_DIRTY | EXTENT_DELALLOC |
1081 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1082 NULL, GFP_NOFS);
1083 if (ret) {
1084 test_msg("clear_extent_bit returned %d\n", ret);
1085 goto out;
1086 }
1087 if (BTRFS_I(inode)->outstanding_extents) {
1088 ret = -EINVAL;
1089 test_msg("Miscount, wanted 0, got %u\n",
1090 BTRFS_I(inode)->outstanding_extents);
1091 goto out;
1092 }
1093 ret = 0;
1094out:
1095 if (ret)
1096 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, (u64)-1,
1097 EXTENT_DIRTY | EXTENT_DELALLOC |
1098 EXTENT_DO_ACCOUNTING | EXTENT_UPTODATE, 0, 0,
1099 NULL, GFP_NOFS);
1100 iput(inode);
1101 btrfs_free_dummy_root(root);
1102 return ret;
1103}
1104
914int btrfs_test_inodes(void) 1105int btrfs_test_inodes(void)
915{ 1106{
916 int ret; 1107 int ret;
@@ -924,5 +1115,9 @@ int btrfs_test_inodes(void)
924 if (ret) 1115 if (ret)
925 return ret; 1116 return ret;
926 test_msg("Running hole first btrfs_get_extent test\n"); 1117 test_msg("Running hole first btrfs_get_extent test\n");
927 return test_hole_first(); 1118 ret = test_hole_first();
1119 if (ret)
1120 return ret;
1121 test_msg("Running outstanding_extents tests\n");
1122 return test_extent_accounting();
928} 1123}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7e80f32550a6..8be4278e25e8 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1023,17 +1023,13 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1023 u64 old_root_bytenr; 1023 u64 old_root_bytenr;
1024 u64 old_root_used; 1024 u64 old_root_used;
1025 struct btrfs_root *tree_root = root->fs_info->tree_root; 1025 struct btrfs_root *tree_root = root->fs_info->tree_root;
1026 bool extent_root = (root->objectid == BTRFS_EXTENT_TREE_OBJECTID);
1027 1026
1028 old_root_used = btrfs_root_used(&root->root_item); 1027 old_root_used = btrfs_root_used(&root->root_item);
1029 btrfs_write_dirty_block_groups(trans, root);
1030 1028
1031 while (1) { 1029 while (1) {
1032 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 1030 old_root_bytenr = btrfs_root_bytenr(&root->root_item);
1033 if (old_root_bytenr == root->node->start && 1031 if (old_root_bytenr == root->node->start &&
1034 old_root_used == btrfs_root_used(&root->root_item) && 1032 old_root_used == btrfs_root_used(&root->root_item))
1035 (!extent_root ||
1036 list_empty(&trans->transaction->dirty_bgs)))
1037 break; 1033 break;
1038 1034
1039 btrfs_set_root_node(&root->root_item, root->node); 1035 btrfs_set_root_node(&root->root_item, root->node);
@@ -1044,17 +1040,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
1044 return ret; 1040 return ret;
1045 1041
1046 old_root_used = btrfs_root_used(&root->root_item); 1042 old_root_used = btrfs_root_used(&root->root_item);
1047 if (extent_root) {
1048 ret = btrfs_write_dirty_block_groups(trans, root);
1049 if (ret)
1050 return ret;
1051 }
1052 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1053 if (ret)
1054 return ret;
1055 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1056 if (ret)
1057 return ret;
1058 } 1043 }
1059 1044
1060 return 0; 1045 return 0;
@@ -1071,6 +1056,7 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1071 struct btrfs_root *root) 1056 struct btrfs_root *root)
1072{ 1057{
1073 struct btrfs_fs_info *fs_info = root->fs_info; 1058 struct btrfs_fs_info *fs_info = root->fs_info;
1059 struct list_head *dirty_bgs = &trans->transaction->dirty_bgs;
1074 struct list_head *next; 1060 struct list_head *next;
1075 struct extent_buffer *eb; 1061 struct extent_buffer *eb;
1076 int ret; 1062 int ret;
@@ -1098,11 +1084,15 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1098 if (ret) 1084 if (ret)
1099 return ret; 1085 return ret;
1100 1086
1087 ret = btrfs_setup_space_cache(trans, root);
1088 if (ret)
1089 return ret;
1090
1101 /* run_qgroups might have added some more refs */ 1091 /* run_qgroups might have added some more refs */
1102 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1092 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1103 if (ret) 1093 if (ret)
1104 return ret; 1094 return ret;
1105 1095again:
1106 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 1096 while (!list_empty(&fs_info->dirty_cowonly_roots)) {
1107 next = fs_info->dirty_cowonly_roots.next; 1097 next = fs_info->dirty_cowonly_roots.next;
1108 list_del_init(next); 1098 list_del_init(next);
@@ -1115,8 +1105,23 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
1115 ret = update_cowonly_root(trans, root); 1105 ret = update_cowonly_root(trans, root);
1116 if (ret) 1106 if (ret)
1117 return ret; 1107 return ret;
1108 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1109 if (ret)
1110 return ret;
1118 } 1111 }
1119 1112
1113 while (!list_empty(dirty_bgs)) {
1114 ret = btrfs_write_dirty_block_groups(trans, root);
1115 if (ret)
1116 return ret;
1117 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1);
1118 if (ret)
1119 return ret;
1120 }
1121
1122 if (!list_empty(&fs_info->dirty_cowonly_roots))
1123 goto again;
1124
1120 list_add_tail(&fs_info->extent_root->dirty_list, 1125 list_add_tail(&fs_info->extent_root->dirty_list,
1121 &trans->transaction->switch_commits); 1126 &trans->transaction->switch_commits);
1122 btrfs_after_dev_replace_commit(fs_info); 1127 btrfs_after_dev_replace_commit(fs_info);
@@ -1814,6 +1819,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1814 1819
1815 wait_for_commit(root, cur_trans); 1820 wait_for_commit(root, cur_trans);
1816 1821
1822 if (unlikely(cur_trans->aborted))
1823 ret = cur_trans->aborted;
1824
1817 btrfs_put_transaction(cur_trans); 1825 btrfs_put_transaction(cur_trans);
1818 1826
1819 return ret; 1827 return ret;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9a37f8b39bae..c5b8ba37f88e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1012,7 +1012,7 @@ again:
1012 base = btrfs_item_ptr_offset(leaf, path->slots[0]); 1012 base = btrfs_item_ptr_offset(leaf, path->slots[0]);
1013 1013
1014 while (cur_offset < item_size) { 1014 while (cur_offset < item_size) {
1015 extref = (struct btrfs_inode_extref *)base + cur_offset; 1015 extref = (struct btrfs_inode_extref *)(base + cur_offset);
1016 1016
1017 victim_name_len = btrfs_inode_extref_name_len(leaf, extref); 1017 victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
1018 1018
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 47b19465f0dc..883b93623bc5 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -111,6 +111,8 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
111 name, name_len, -1); 111 name, name_len, -1);
112 if (!di && (flags & XATTR_REPLACE)) 112 if (!di && (flags & XATTR_REPLACE))
113 ret = -ENODATA; 113 ret = -ENODATA;
114 else if (IS_ERR(di))
115 ret = PTR_ERR(di);
114 else if (di) 116 else if (di)
115 ret = btrfs_delete_one_dir_name(trans, root, path, di); 117 ret = btrfs_delete_one_dir_name(trans, root, path, di);
116 goto out; 118 goto out;
@@ -127,10 +129,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
127 ASSERT(mutex_is_locked(&inode->i_mutex)); 129 ASSERT(mutex_is_locked(&inode->i_mutex));
128 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode), 130 di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
129 name, name_len, 0); 131 name, name_len, 0);
130 if (!di) { 132 if (!di)
131 ret = -ENODATA; 133 ret = -ENODATA;
134 else if (IS_ERR(di))
135 ret = PTR_ERR(di);
136 if (ret)
132 goto out; 137 goto out;
133 }
134 btrfs_release_path(path); 138 btrfs_release_path(path);
135 di = NULL; 139 di = NULL;
136 } 140 }