aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-22 17:58:49 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-22 17:58:49 -0400
commit0fbf2cc983ca15208545010863c6536d36a25f3a (patch)
tree19a6b5d197d9051c6775c69cb1731f7563ee35ca /fs/btrfs
parentc43a3855f41a25330326570d8e8d54e9927b3f56 (diff)
parent94aebfb2e7d83748d882992196cb05dd39ba1807 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "These are mostly bug fixes and a two small performance fixes. The most important of the bunch are Josef's fix for a snapshotting regression and Mark's update to fix compile problems on arm" * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (25 commits) Btrfs: create the uuid tree on remount rw btrfs: change extent-same to copy entire argument struct Btrfs: dir_inode_operations should use btrfs_update_time also btrfs: Add btrfs: prefix to kernel log output btrfs: refuse to remount read-write after abort Btrfs: btrfs_ioctl_default_subvol: Revert back to toplevel subvolume when arg is 0 Btrfs: don't leak transaction in btrfs_sync_file() Btrfs: add the missing mutex unlock in write_all_supers() Btrfs: iput inode on allocation failure Btrfs: remove space_info->reservation_progress Btrfs: kill delay_iput arg to the wait_ordered functions Btrfs: fix worst case calculator for space usage Revert "Btrfs: rework the overcommit logic to be based on the total size" Btrfs: improve replacing nocow extents Btrfs: drop dir i_size when adding new names on replay Btrfs: replay dir_index items before other items Btrfs: check roots last log commit when checking if an inode has been logged Btrfs: actually log directory we are fsync()'ing Btrfs: actually limit the size of delalloc range Btrfs: allocate the free space by the existed max extent size when ENOSPC ...
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c7
-rw-r--r--fs/btrfs/ctree.h17
-rw-r--r--fs/btrfs/dev-replace.c4
-rw-r--r--fs/btrfs/disk-io.c2
-rw-r--r--fs/btrfs/extent-tree.c57
-rw-r--r--fs/btrfs/extent_io.c8
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/free-space-cache.c67
-rw-r--r--fs/btrfs/free-space-cache.h5
-rw-r--r--fs/btrfs/inode.c16
-rw-r--r--fs/btrfs/ioctl.c80
-rw-r--r--fs/btrfs/ordered-data.c24
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/scrub.c112
-rw-r--r--fs/btrfs/super.c21
-rw-r--r--fs/btrfs/transaction.c2
-rw-r--r--fs/btrfs/tree-log.c52
-rw-r--r--fs/btrfs/volumes.c7
20 files changed, 363 insertions, 175 deletions
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index d0ae226926ee..71f074e1870b 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -213,7 +213,10 @@ static inline bool btrfs_is_free_space_inode(struct inode *inode)
213static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) 213static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
214{ 214{
215 if (BTRFS_I(inode)->logged_trans == generation && 215 if (BTRFS_I(inode)->logged_trans == generation &&
216 BTRFS_I(inode)->last_sub_trans <= BTRFS_I(inode)->last_log_commit) 216 BTRFS_I(inode)->last_sub_trans <=
217 BTRFS_I(inode)->last_log_commit &&
218 BTRFS_I(inode)->last_sub_trans <=
219 BTRFS_I(inode)->root->last_log_commit)
217 return 1; 220 return 1;
218 return 0; 221 return 0;
219} 222}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 64346721173f..61b5bcd57b7e 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1005,8 +1005,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
1005 return ret; 1005 return ret;
1006 } 1006 }
1007 1007
1008 if (root->ref_cows) 1008 if (root->ref_cows) {
1009 btrfs_reloc_cow_block(trans, root, buf, cow); 1009 ret = btrfs_reloc_cow_block(trans, root, buf, cow);
1010 if (ret)
1011 return ret;
1012 }
1010 1013
1011 if (buf == root->node) { 1014 if (buf == root->node) {
1012 WARN_ON(parent && parent != buf); 1015 WARN_ON(parent && parent != buf);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 3c1da6f98a4d..0506f40ede83 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1118,15 +1118,6 @@ struct btrfs_space_info {
1118 */ 1118 */
1119 struct percpu_counter total_bytes_pinned; 1119 struct percpu_counter total_bytes_pinned;
1120 1120
1121 /*
1122 * we bump reservation progress every time we decrement
1123 * bytes_reserved. This way people waiting for reservations
1124 * know something good has happened and they can check
1125 * for progress. The number here isn't to be trusted, it
1126 * just shows reclaim activity
1127 */
1128 unsigned long reservation_progress;
1129
1130 unsigned int full:1; /* indicates that we cannot allocate any more 1121 unsigned int full:1; /* indicates that we cannot allocate any more
1131 chunks for this space */ 1122 chunks for this space */
1132 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ 1123 unsigned int chunk_alloc:1; /* set if we are allocating a chunk */
@@ -3135,7 +3126,7 @@ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
3135 unsigned num_items) 3126 unsigned num_items)
3136{ 3127{
3137 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 3128 return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
3138 3 * num_items; 3129 2 * num_items;
3139} 3130}
3140 3131
3141/* 3132/*
@@ -3939,9 +3930,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
3939 struct btrfs_root *root); 3930 struct btrfs_root *root);
3940int btrfs_recover_relocation(struct btrfs_root *root); 3931int btrfs_recover_relocation(struct btrfs_root *root);
3941int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); 3932int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
3942void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 3933int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
3943 struct btrfs_root *root, struct extent_buffer *buf, 3934 struct btrfs_root *root, struct extent_buffer *buf,
3944 struct extent_buffer *cow); 3935 struct extent_buffer *cow);
3945void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, 3936void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
3946 struct btrfs_pending_snapshot *pending, 3937 struct btrfs_pending_snapshot *pending,
3947 u64 *bytes_to_reserve); 3938 u64 *bytes_to_reserve);
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index a64435359385..70681686e8dc 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -400,7 +400,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
400 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; 400 args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
401 btrfs_dev_replace_unlock(dev_replace); 401 btrfs_dev_replace_unlock(dev_replace);
402 402
403 btrfs_wait_all_ordered_extents(root->fs_info, 0); 403 btrfs_wait_all_ordered_extents(root->fs_info);
404 404
405 /* force writing the updated state information to disk */ 405 /* force writing the updated state information to disk */
406 trans = btrfs_start_transaction(root, 0); 406 trans = btrfs_start_transaction(root, 0);
@@ -475,7 +475,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
475 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); 475 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
476 return ret; 476 return ret;
477 } 477 }
478 btrfs_wait_all_ordered_extents(root->fs_info, 0); 478 btrfs_wait_all_ordered_extents(root->fs_info);
479 479
480 trans = btrfs_start_transaction(root, 0); 480 trans = btrfs_start_transaction(root, 0);
481 if (IS_ERR(trans)) { 481 if (IS_ERR(trans)) {
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 4cbb00af92ff..4ae17ed13b32 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -157,6 +157,7 @@ static struct btrfs_lockdep_keyset {
157 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, 157 { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
158 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, 158 { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
159 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, 159 { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
160 { .id = BTRFS_UUID_TREE_OBJECTID, .name_stem = "uuid" },
160 { .id = 0, .name_stem = "tree" }, 161 { .id = 0, .name_stem = "tree" },
161}; 162};
162 163
@@ -3415,6 +3416,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
3415 if (total_errors > max_errors) { 3416 if (total_errors > max_errors) {
3416 printk(KERN_ERR "btrfs: %d errors while writing supers\n", 3417 printk(KERN_ERR "btrfs: %d errors while writing supers\n",
3417 total_errors); 3418 total_errors);
3419 mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
3418 3420
3419 /* FUA is masked off if unsupported and can't be the reason */ 3421 /* FUA is masked off if unsupported and can't be the reason */
3420 btrfs_error(root->fs_info, -EIO, 3422 btrfs_error(root->fs_info, -EIO,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cfb3cf711b34..d58bef130a41 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3925,7 +3925,6 @@ static int can_overcommit(struct btrfs_root *root,
3925 u64 space_size; 3925 u64 space_size;
3926 u64 avail; 3926 u64 avail;
3927 u64 used; 3927 u64 used;
3928 u64 to_add;
3929 3928
3930 used = space_info->bytes_used + space_info->bytes_reserved + 3929 used = space_info->bytes_used + space_info->bytes_reserved +
3931 space_info->bytes_pinned + space_info->bytes_readonly; 3930 space_info->bytes_pinned + space_info->bytes_readonly;
@@ -3959,25 +3958,17 @@ static int can_overcommit(struct btrfs_root *root,
3959 BTRFS_BLOCK_GROUP_RAID10)) 3958 BTRFS_BLOCK_GROUP_RAID10))
3960 avail >>= 1; 3959 avail >>= 1;
3961 3960
3962 to_add = space_info->total_bytes;
3963
3964 /* 3961 /*
3965 * If we aren't flushing all things, let us overcommit up to 3962 * If we aren't flushing all things, let us overcommit up to
3966 * 1/2th of the space. If we can flush, don't let us overcommit 3963 * 1/2th of the space. If we can flush, don't let us overcommit
3967 * too much, let it overcommit up to 1/8 of the space. 3964 * too much, let it overcommit up to 1/8 of the space.
3968 */ 3965 */
3969 if (flush == BTRFS_RESERVE_FLUSH_ALL) 3966 if (flush == BTRFS_RESERVE_FLUSH_ALL)
3970 to_add >>= 3; 3967 avail >>= 3;
3971 else 3968 else
3972 to_add >>= 1; 3969 avail >>= 1;
3973
3974 /*
3975 * Limit the overcommit to the amount of free space we could possibly
3976 * allocate for chunks.
3977 */
3978 to_add = min(avail, to_add);
3979 3970
3980 if (used + bytes < space_info->total_bytes + to_add) 3971 if (used + bytes < space_info->total_bytes + avail)
3981 return 1; 3972 return 1;
3982 return 0; 3973 return 0;
3983} 3974}
@@ -4000,7 +3991,7 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
4000 */ 3991 */
4001 btrfs_start_all_delalloc_inodes(root->fs_info, 0); 3992 btrfs_start_all_delalloc_inodes(root->fs_info, 0);
4002 if (!current->journal_info) 3993 if (!current->journal_info)
4003 btrfs_wait_all_ordered_extents(root->fs_info, 0); 3994 btrfs_wait_all_ordered_extents(root->fs_info);
4004 } 3995 }
4005} 3996}
4006 3997
@@ -4030,7 +4021,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4030 if (delalloc_bytes == 0) { 4021 if (delalloc_bytes == 0) {
4031 if (trans) 4022 if (trans)
4032 return; 4023 return;
4033 btrfs_wait_all_ordered_extents(root->fs_info, 0); 4024 btrfs_wait_all_ordered_extents(root->fs_info);
4034 return; 4025 return;
4035 } 4026 }
4036 4027
@@ -4058,7 +4049,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
4058 4049
4059 loops++; 4050 loops++;
4060 if (wait_ordered && !trans) { 4051 if (wait_ordered && !trans) {
4061 btrfs_wait_all_ordered_extents(root->fs_info, 0); 4052 btrfs_wait_all_ordered_extents(root->fs_info);
4062 } else { 4053 } else {
4063 time_left = schedule_timeout_killable(1); 4054 time_left = schedule_timeout_killable(1);
4064 if (time_left) 4055 if (time_left)
@@ -4465,7 +4456,6 @@ static void block_rsv_release_bytes(struct btrfs_fs_info *fs_info,
4465 space_info->bytes_may_use -= num_bytes; 4456 space_info->bytes_may_use -= num_bytes;
4466 trace_btrfs_space_reservation(fs_info, "space_info", 4457 trace_btrfs_space_reservation(fs_info, "space_info",
4467 space_info->flags, num_bytes, 0); 4458 space_info->flags, num_bytes, 0);
4468 space_info->reservation_progress++;
4469 spin_unlock(&space_info->lock); 4459 spin_unlock(&space_info->lock);
4470 } 4460 }
4471 } 4461 }
@@ -4666,7 +4656,6 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
4666 sinfo->bytes_may_use -= num_bytes; 4656 sinfo->bytes_may_use -= num_bytes;
4667 trace_btrfs_space_reservation(fs_info, "space_info", 4657 trace_btrfs_space_reservation(fs_info, "space_info",
4668 sinfo->flags, num_bytes, 0); 4658 sinfo->flags, num_bytes, 0);
4669 sinfo->reservation_progress++;
4670 block_rsv->reserved = block_rsv->size; 4659 block_rsv->reserved = block_rsv->size;
4671 block_rsv->full = 1; 4660 block_rsv->full = 1;
4672 } 4661 }
@@ -5446,7 +5435,6 @@ static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
5446 space_info->bytes_readonly += num_bytes; 5435 space_info->bytes_readonly += num_bytes;
5447 cache->reserved -= num_bytes; 5436 cache->reserved -= num_bytes;
5448 space_info->bytes_reserved -= num_bytes; 5437 space_info->bytes_reserved -= num_bytes;
5449 space_info->reservation_progress++;
5450 } 5438 }
5451 spin_unlock(&cache->lock); 5439 spin_unlock(&cache->lock);
5452 spin_unlock(&space_info->lock); 5440 spin_unlock(&space_info->lock);
@@ -6117,10 +6105,13 @@ enum btrfs_loop_type {
6117/* 6105/*
6118 * walks the btree of allocated extents and find a hole of a given size. 6106 * walks the btree of allocated extents and find a hole of a given size.
6119 * The key ins is changed to record the hole: 6107 * The key ins is changed to record the hole:
6120 * ins->objectid == block start 6108 * ins->objectid == start position
6121 * ins->flags = BTRFS_EXTENT_ITEM_KEY 6109 * ins->flags = BTRFS_EXTENT_ITEM_KEY
6122 * ins->offset == number of blocks 6110 * ins->offset == the size of the hole.
6123 * Any available blocks before search_start are skipped. 6111 * Any available blocks before search_start are skipped.
6112 *
6113 * If there is no suitable free space, we will record the max size of
6114 * the free space extent currently.
6124 */ 6115 */
6125static noinline int find_free_extent(struct btrfs_root *orig_root, 6116static noinline int find_free_extent(struct btrfs_root *orig_root,
6126 u64 num_bytes, u64 empty_size, 6117 u64 num_bytes, u64 empty_size,
@@ -6133,6 +6124,7 @@ static noinline int find_free_extent(struct btrfs_root *orig_root,
6133 struct btrfs_block_group_cache *block_group = NULL; 6124 struct btrfs_block_group_cache *block_group = NULL;
6134 struct btrfs_block_group_cache *used_block_group; 6125 struct btrfs_block_group_cache *used_block_group;
6135 u64 search_start = 0; 6126 u64 search_start = 0;
6127 u64 max_extent_size = 0;
6136 int empty_cluster = 2 * 1024 * 1024; 6128 int empty_cluster = 2 * 1024 * 1024;
6137 struct btrfs_space_info *space_info; 6129 struct btrfs_space_info *space_info;
6138 int loop = 0; 6130 int loop = 0;
@@ -6292,7 +6284,10 @@ have_block_group:
6292 btrfs_get_block_group(used_block_group); 6284 btrfs_get_block_group(used_block_group);
6293 6285
6294 offset = btrfs_alloc_from_cluster(used_block_group, 6286 offset = btrfs_alloc_from_cluster(used_block_group,
6295 last_ptr, num_bytes, used_block_group->key.objectid); 6287 last_ptr,
6288 num_bytes,
6289 used_block_group->key.objectid,
6290 &max_extent_size);
6296 if (offset) { 6291 if (offset) {
6297 /* we have a block, we're done */ 6292 /* we have a block, we're done */
6298 spin_unlock(&last_ptr->refill_lock); 6293 spin_unlock(&last_ptr->refill_lock);
@@ -6355,8 +6350,10 @@ refill_cluster:
6355 * cluster 6350 * cluster
6356 */ 6351 */
6357 offset = btrfs_alloc_from_cluster(block_group, 6352 offset = btrfs_alloc_from_cluster(block_group,
6358 last_ptr, num_bytes, 6353 last_ptr,
6359 search_start); 6354 num_bytes,
6355 search_start,
6356 &max_extent_size);
6360 if (offset) { 6357 if (offset) {
6361 /* we found one, proceed */ 6358 /* we found one, proceed */
6362 spin_unlock(&last_ptr->refill_lock); 6359 spin_unlock(&last_ptr->refill_lock);
@@ -6391,13 +6388,18 @@ unclustered_alloc:
6391 if (cached && 6388 if (cached &&
6392 block_group->free_space_ctl->free_space < 6389 block_group->free_space_ctl->free_space <
6393 num_bytes + empty_cluster + empty_size) { 6390 num_bytes + empty_cluster + empty_size) {
6391 if (block_group->free_space_ctl->free_space >
6392 max_extent_size)
6393 max_extent_size =
6394 block_group->free_space_ctl->free_space;
6394 spin_unlock(&block_group->free_space_ctl->tree_lock); 6395 spin_unlock(&block_group->free_space_ctl->tree_lock);
6395 goto loop; 6396 goto loop;
6396 } 6397 }
6397 spin_unlock(&block_group->free_space_ctl->tree_lock); 6398 spin_unlock(&block_group->free_space_ctl->tree_lock);
6398 6399
6399 offset = btrfs_find_space_for_alloc(block_group, search_start, 6400 offset = btrfs_find_space_for_alloc(block_group, search_start,
6400 num_bytes, empty_size); 6401 num_bytes, empty_size,
6402 &max_extent_size);
6401 /* 6403 /*
6402 * If we didn't find a chunk, and we haven't failed on this 6404 * If we didn't find a chunk, and we haven't failed on this
6403 * block group before, and this block group is in the middle of 6405 * block group before, and this block group is in the middle of
@@ -6515,7 +6517,8 @@ loop:
6515 ret = 0; 6517 ret = 0;
6516 } 6518 }
6517out: 6519out:
6518 6520 if (ret == -ENOSPC)
6521 ins->offset = max_extent_size;
6519 return ret; 6522 return ret;
6520} 6523}
6521 6524
@@ -6573,8 +6576,8 @@ again:
6573 flags); 6576 flags);
6574 6577
6575 if (ret == -ENOSPC) { 6578 if (ret == -ENOSPC) {
6576 if (!final_tried) { 6579 if (!final_tried && ins->offset) {
6577 num_bytes = num_bytes >> 1; 6580 num_bytes = min(num_bytes >> 1, ins->offset);
6578 num_bytes = round_down(num_bytes, root->sectorsize); 6581 num_bytes = round_down(num_bytes, root->sectorsize);
6579 num_bytes = max(num_bytes, min_alloc_size); 6582 num_bytes = max(num_bytes, min_alloc_size);
6580 if (num_bytes == min_alloc_size) 6583 if (num_bytes == min_alloc_size)
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 09582b81640c..c09a40db53db 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1481,10 +1481,12 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1481 *end = state->end; 1481 *end = state->end;
1482 cur_start = state->end + 1; 1482 cur_start = state->end + 1;
1483 node = rb_next(node); 1483 node = rb_next(node);
1484 if (!node)
1485 break;
1486 total_bytes += state->end - state->start + 1; 1484 total_bytes += state->end - state->start + 1;
1487 if (total_bytes >= max_bytes) 1485 if (total_bytes >= max_bytes) {
1486 *end = *start + max_bytes - 1;
1487 break;
1488 }
1489 if (!node)
1488 break; 1490 break;
1489 } 1491 }
1490out: 1492out:
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index bc5072b2db53..72da4df53c9a 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1859,8 +1859,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1859 1859
1860 ret = btrfs_log_dentry_safe(trans, root, dentry); 1860 ret = btrfs_log_dentry_safe(trans, root, dentry);
1861 if (ret < 0) { 1861 if (ret < 0) {
1862 mutex_unlock(&inode->i_mutex); 1862 /* Fallthrough and commit/free transaction. */
1863 goto out; 1863 ret = 1;
1864 } 1864 }
1865 1865
1866 /* we've logged all the items and now have a consistent 1866 /* we've logged all the items and now have a consistent
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 3f0ddfce96e6..b4f9904c4c6b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1431,13 +1431,19 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
1431 ctl->free_space += bytes; 1431 ctl->free_space += bytes;
1432} 1432}
1433 1433
1434/*
1435 * If we can not find suitable extent, we will use bytes to record
1436 * the size of the max extent.
1437 */
1434static int search_bitmap(struct btrfs_free_space_ctl *ctl, 1438static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1435 struct btrfs_free_space *bitmap_info, u64 *offset, 1439 struct btrfs_free_space *bitmap_info, u64 *offset,
1436 u64 *bytes) 1440 u64 *bytes)
1437{ 1441{
1438 unsigned long found_bits = 0; 1442 unsigned long found_bits = 0;
1443 unsigned long max_bits = 0;
1439 unsigned long bits, i; 1444 unsigned long bits, i;
1440 unsigned long next_zero; 1445 unsigned long next_zero;
1446 unsigned long extent_bits;
1441 1447
1442 i = offset_to_bit(bitmap_info->offset, ctl->unit, 1448 i = offset_to_bit(bitmap_info->offset, ctl->unit,
1443 max_t(u64, *offset, bitmap_info->offset)); 1449 max_t(u64, *offset, bitmap_info->offset));
@@ -1446,9 +1452,12 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1446 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) { 1452 for_each_set_bit_from(i, bitmap_info->bitmap, BITS_PER_BITMAP) {
1447 next_zero = find_next_zero_bit(bitmap_info->bitmap, 1453 next_zero = find_next_zero_bit(bitmap_info->bitmap,
1448 BITS_PER_BITMAP, i); 1454 BITS_PER_BITMAP, i);
1449 if ((next_zero - i) >= bits) { 1455 extent_bits = next_zero - i;
1450 found_bits = next_zero - i; 1456 if (extent_bits >= bits) {
1457 found_bits = extent_bits;
1451 break; 1458 break;
1459 } else if (extent_bits > max_bits) {
1460 max_bits = extent_bits;
1452 } 1461 }
1453 i = next_zero; 1462 i = next_zero;
1454 } 1463 }
@@ -1459,38 +1468,41 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
1459 return 0; 1468 return 0;
1460 } 1469 }
1461 1470
1471 *bytes = (u64)(max_bits) * ctl->unit;
1462 return -1; 1472 return -1;
1463} 1473}
1464 1474
1475/* Cache the size of the max extent in bytes */
1465static struct btrfs_free_space * 1476static struct btrfs_free_space *
1466find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes, 1477find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1467 unsigned long align) 1478 unsigned long align, u64 *max_extent_size)
1468{ 1479{
1469 struct btrfs_free_space *entry; 1480 struct btrfs_free_space *entry;
1470 struct rb_node *node; 1481 struct rb_node *node;
1471 u64 ctl_off;
1472 u64 tmp; 1482 u64 tmp;
1473 u64 align_off; 1483 u64 align_off;
1474 int ret; 1484 int ret;
1475 1485
1476 if (!ctl->free_space_offset.rb_node) 1486 if (!ctl->free_space_offset.rb_node)
1477 return NULL; 1487 goto out;
1478 1488
1479 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1); 1489 entry = tree_search_offset(ctl, offset_to_bitmap(ctl, *offset), 0, 1);
1480 if (!entry) 1490 if (!entry)
1481 return NULL; 1491 goto out;
1482 1492
1483 for (node = &entry->offset_index; node; node = rb_next(node)) { 1493 for (node = &entry->offset_index; node; node = rb_next(node)) {
1484 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1494 entry = rb_entry(node, struct btrfs_free_space, offset_index);
1485 if (entry->bytes < *bytes) 1495 if (entry->bytes < *bytes) {
1496 if (entry->bytes > *max_extent_size)
1497 *max_extent_size = entry->bytes;
1486 continue; 1498 continue;
1499 }
1487 1500
1488 /* make sure the space returned is big enough 1501 /* make sure the space returned is big enough
1489 * to match our requested alignment 1502 * to match our requested alignment
1490 */ 1503 */
1491 if (*bytes >= align) { 1504 if (*bytes >= align) {
1492 ctl_off = entry->offset - ctl->start; 1505 tmp = entry->offset - ctl->start + align - 1;
1493 tmp = ctl_off + align - 1;;
1494 do_div(tmp, align); 1506 do_div(tmp, align);
1495 tmp = tmp * align + ctl->start; 1507 tmp = tmp * align + ctl->start;
1496 align_off = tmp - entry->offset; 1508 align_off = tmp - entry->offset;
@@ -1499,14 +1511,22 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1499 tmp = entry->offset; 1511 tmp = entry->offset;
1500 } 1512 }
1501 1513
1502 if (entry->bytes < *bytes + align_off) 1514 if (entry->bytes < *bytes + align_off) {
1515 if (entry->bytes > *max_extent_size)
1516 *max_extent_size = entry->bytes;
1503 continue; 1517 continue;
1518 }
1504 1519
1505 if (entry->bitmap) { 1520 if (entry->bitmap) {
1506 ret = search_bitmap(ctl, entry, &tmp, bytes); 1521 u64 size = *bytes;
1522
1523 ret = search_bitmap(ctl, entry, &tmp, &size);
1507 if (!ret) { 1524 if (!ret) {
1508 *offset = tmp; 1525 *offset = tmp;
1526 *bytes = size;
1509 return entry; 1527 return entry;
1528 } else if (size > *max_extent_size) {
1529 *max_extent_size = size;
1510 } 1530 }
1511 continue; 1531 continue;
1512 } 1532 }
@@ -1515,7 +1535,7 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
1515 *bytes = entry->bytes - align_off; 1535 *bytes = entry->bytes - align_off;
1516 return entry; 1536 return entry;
1517 } 1537 }
1518 1538out:
1519 return NULL; 1539 return NULL;
1520} 1540}
1521 1541
@@ -2116,7 +2136,8 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group)
2116} 2136}
2117 2137
2118u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 2138u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2119 u64 offset, u64 bytes, u64 empty_size) 2139 u64 offset, u64 bytes, u64 empty_size,
2140 u64 *max_extent_size)
2120{ 2141{
2121 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2142 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2122 struct btrfs_free_space *entry = NULL; 2143 struct btrfs_free_space *entry = NULL;
@@ -2127,7 +2148,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2127 2148
2128 spin_lock(&ctl->tree_lock); 2149 spin_lock(&ctl->tree_lock);
2129 entry = find_free_space(ctl, &offset, &bytes_search, 2150 entry = find_free_space(ctl, &offset, &bytes_search,
2130 block_group->full_stripe_len); 2151 block_group->full_stripe_len, max_extent_size);
2131 if (!entry) 2152 if (!entry)
2132 goto out; 2153 goto out;
2133 2154
@@ -2137,7 +2158,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2137 if (!entry->bytes) 2158 if (!entry->bytes)
2138 free_bitmap(ctl, entry); 2159 free_bitmap(ctl, entry);
2139 } else { 2160 } else {
2140
2141 unlink_free_space(ctl, entry); 2161 unlink_free_space(ctl, entry);
2142 align_gap_len = offset - entry->offset; 2162 align_gap_len = offset - entry->offset;
2143 align_gap = entry->offset; 2163 align_gap = entry->offset;
@@ -2151,7 +2171,6 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
2151 else 2171 else
2152 link_free_space(ctl, entry); 2172 link_free_space(ctl, entry);
2153 } 2173 }
2154
2155out: 2174out:
2156 spin_unlock(&ctl->tree_lock); 2175 spin_unlock(&ctl->tree_lock);
2157 2176
@@ -2206,7 +2225,8 @@ int btrfs_return_cluster_to_free_space(
2206static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 2225static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2207 struct btrfs_free_cluster *cluster, 2226 struct btrfs_free_cluster *cluster,
2208 struct btrfs_free_space *entry, 2227 struct btrfs_free_space *entry,
2209 u64 bytes, u64 min_start) 2228 u64 bytes, u64 min_start,
2229 u64 *max_extent_size)
2210{ 2230{
2211 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2231 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2212 int err; 2232 int err;
@@ -2218,8 +2238,11 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2218 search_bytes = bytes; 2238 search_bytes = bytes;
2219 2239
2220 err = search_bitmap(ctl, entry, &search_start, &search_bytes); 2240 err = search_bitmap(ctl, entry, &search_start, &search_bytes);
2221 if (err) 2241 if (err) {
2242 if (search_bytes > *max_extent_size)
2243 *max_extent_size = search_bytes;
2222 return 0; 2244 return 0;
2245 }
2223 2246
2224 ret = search_start; 2247 ret = search_start;
2225 __bitmap_clear_bits(ctl, entry, ret, bytes); 2248 __bitmap_clear_bits(ctl, entry, ret, bytes);
@@ -2234,7 +2257,7 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
2234 */ 2257 */
2235u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 2258u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2236 struct btrfs_free_cluster *cluster, u64 bytes, 2259 struct btrfs_free_cluster *cluster, u64 bytes,
2237 u64 min_start) 2260 u64 min_start, u64 *max_extent_size)
2238{ 2261{
2239 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 2262 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
2240 struct btrfs_free_space *entry = NULL; 2263 struct btrfs_free_space *entry = NULL;
@@ -2254,6 +2277,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2254 2277
2255 entry = rb_entry(node, struct btrfs_free_space, offset_index); 2278 entry = rb_entry(node, struct btrfs_free_space, offset_index);
2256 while(1) { 2279 while(1) {
2280 if (entry->bytes < bytes && entry->bytes > *max_extent_size)
2281 *max_extent_size = entry->bytes;
2282
2257 if (entry->bytes < bytes || 2283 if (entry->bytes < bytes ||
2258 (!entry->bitmap && entry->offset < min_start)) { 2284 (!entry->bitmap && entry->offset < min_start)) {
2259 node = rb_next(&entry->offset_index); 2285 node = rb_next(&entry->offset_index);
@@ -2267,7 +2293,8 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
2267 if (entry->bitmap) { 2293 if (entry->bitmap) {
2268 ret = btrfs_alloc_from_bitmap(block_group, 2294 ret = btrfs_alloc_from_bitmap(block_group,
2269 cluster, entry, bytes, 2295 cluster, entry, bytes,
2270 cluster->window_start); 2296 cluster->window_start,
2297 max_extent_size);
2271 if (ret == 0) { 2298 if (ret == 0) {
2272 node = rb_next(&entry->offset_index); 2299 node = rb_next(&entry->offset_index);
2273 if (!node) 2300 if (!node)
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index c74904167476..e737f92cf6d0 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -94,7 +94,8 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl);
94void btrfs_remove_free_space_cache(struct btrfs_block_group_cache 94void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
95 *block_group); 95 *block_group);
96u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 96u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
97 u64 offset, u64 bytes, u64 empty_size); 97 u64 offset, u64 bytes, u64 empty_size,
98 u64 *max_extent_size);
98u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root); 99u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
99void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 100void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
100 u64 bytes); 101 u64 bytes);
@@ -105,7 +106,7 @@ int btrfs_find_space_cluster(struct btrfs_root *root,
105void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster); 106void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster);
106u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 107u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
107 struct btrfs_free_cluster *cluster, u64 bytes, 108 struct btrfs_free_cluster *cluster, u64 bytes,
108 u64 min_start); 109 u64 min_start, u64 *max_extent_size);
109int btrfs_return_cluster_to_free_space( 110int btrfs_return_cluster_to_free_space(
110 struct btrfs_block_group_cache *block_group, 111 struct btrfs_block_group_cache *block_group,
111 struct btrfs_free_cluster *cluster); 112 struct btrfs_free_cluster *cluster);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index f338c5672d58..22ebc13b6c99 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4688,11 +4688,11 @@ static void inode_tree_add(struct inode *inode)
4688 struct btrfs_inode *entry; 4688 struct btrfs_inode *entry;
4689 struct rb_node **p; 4689 struct rb_node **p;
4690 struct rb_node *parent; 4690 struct rb_node *parent;
4691 struct rb_node *new = &BTRFS_I(inode)->rb_node;
4691 u64 ino = btrfs_ino(inode); 4692 u64 ino = btrfs_ino(inode);
4692 4693
4693 if (inode_unhashed(inode)) 4694 if (inode_unhashed(inode))
4694 return; 4695 return;
4695again:
4696 parent = NULL; 4696 parent = NULL;
4697 spin_lock(&root->inode_lock); 4697 spin_lock(&root->inode_lock);
4698 p = &root->inode_tree.rb_node; 4698 p = &root->inode_tree.rb_node;
@@ -4707,14 +4707,14 @@ again:
4707 else { 4707 else {
4708 WARN_ON(!(entry->vfs_inode.i_state & 4708 WARN_ON(!(entry->vfs_inode.i_state &
4709 (I_WILL_FREE | I_FREEING))); 4709 (I_WILL_FREE | I_FREEING)));
4710 rb_erase(parent, &root->inode_tree); 4710 rb_replace_node(parent, new, &root->inode_tree);
4711 RB_CLEAR_NODE(parent); 4711 RB_CLEAR_NODE(parent);
4712 spin_unlock(&root->inode_lock); 4712 spin_unlock(&root->inode_lock);
4713 goto again; 4713 return;
4714 } 4714 }
4715 } 4715 }
4716 rb_link_node(&BTRFS_I(inode)->rb_node, parent, p); 4716 rb_link_node(new, parent, p);
4717 rb_insert_color(&BTRFS_I(inode)->rb_node, &root->inode_tree); 4717 rb_insert_color(new, &root->inode_tree);
4718 spin_unlock(&root->inode_lock); 4718 spin_unlock(&root->inode_lock);
4719} 4719}
4720 4720
@@ -8216,6 +8216,10 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
8216 8216
8217 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); 8217 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
8218 if (unlikely(!work)) { 8218 if (unlikely(!work)) {
8219 if (delay_iput)
8220 btrfs_add_delayed_iput(inode);
8221 else
8222 iput(inode);
8219 ret = -ENOMEM; 8223 ret = -ENOMEM;
8220 goto out; 8224 goto out;
8221 } 8225 }
@@ -8613,11 +8617,13 @@ static const struct inode_operations btrfs_dir_inode_operations = {
8613 .removexattr = btrfs_removexattr, 8617 .removexattr = btrfs_removexattr,
8614 .permission = btrfs_permission, 8618 .permission = btrfs_permission,
8615 .get_acl = btrfs_get_acl, 8619 .get_acl = btrfs_get_acl,
8620 .update_time = btrfs_update_time,
8616}; 8621};
8617static const struct inode_operations btrfs_dir_ro_inode_operations = { 8622static const struct inode_operations btrfs_dir_ro_inode_operations = {
8618 .lookup = btrfs_lookup, 8623 .lookup = btrfs_lookup,
8619 .permission = btrfs_permission, 8624 .permission = btrfs_permission,
8620 .get_acl = btrfs_get_acl, 8625 .get_acl = btrfs_get_acl,
8626 .update_time = btrfs_update_time,
8621}; 8627};
8622 8628
8623static const struct file_operations btrfs_dir_file_operations = { 8629static const struct file_operations btrfs_dir_file_operations = {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 1a5b9462dd9a..9d46f60cb943 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -574,7 +574,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
574 if (ret) 574 if (ret)
575 return ret; 575 return ret;
576 576
577 btrfs_wait_ordered_extents(root, 0); 577 btrfs_wait_ordered_extents(root);
578 578
579 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 579 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
580 if (!pending_snapshot) 580 if (!pending_snapshot)
@@ -2696,9 +2696,9 @@ out_unlock:
2696static long btrfs_ioctl_file_extent_same(struct file *file, 2696static long btrfs_ioctl_file_extent_same(struct file *file,
2697 void __user *argp) 2697 void __user *argp)
2698{ 2698{
2699 struct btrfs_ioctl_same_args *args = argp; 2699 struct btrfs_ioctl_same_args tmp;
2700 struct btrfs_ioctl_same_args same; 2700 struct btrfs_ioctl_same_args *same;
2701 struct btrfs_ioctl_same_extent_info info; 2701 struct btrfs_ioctl_same_extent_info *info;
2702 struct inode *src = file->f_dentry->d_inode; 2702 struct inode *src = file->f_dentry->d_inode;
2703 struct file *dst_file = NULL; 2703 struct file *dst_file = NULL;
2704 struct inode *dst; 2704 struct inode *dst;
@@ -2706,6 +2706,7 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2706 u64 len; 2706 u64 len;
2707 int i; 2707 int i;
2708 int ret; 2708 int ret;
2709 unsigned long size;
2709 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 2710 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
2710 bool is_admin = capable(CAP_SYS_ADMIN); 2711 bool is_admin = capable(CAP_SYS_ADMIN);
2711 2712
@@ -2716,15 +2717,30 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2716 if (ret) 2717 if (ret)
2717 return ret; 2718 return ret;
2718 2719
2719 if (copy_from_user(&same, 2720 if (copy_from_user(&tmp,
2720 (struct btrfs_ioctl_same_args __user *)argp, 2721 (struct btrfs_ioctl_same_args __user *)argp,
2721 sizeof(same))) { 2722 sizeof(tmp))) {
2722 ret = -EFAULT; 2723 ret = -EFAULT;
2723 goto out; 2724 goto out;
2724 } 2725 }
2725 2726
2726 off = same.logical_offset; 2727 size = sizeof(tmp) +
2727 len = same.length; 2728 tmp.dest_count * sizeof(struct btrfs_ioctl_same_extent_info);
2729
2730 same = kmalloc(size, GFP_NOFS);
2731 if (!same) {
2732 ret = -EFAULT;
2733 goto out;
2734 }
2735
2736 if (copy_from_user(same,
2737 (struct btrfs_ioctl_same_args __user *)argp, size)) {
2738 ret = -EFAULT;
2739 goto out;
2740 }
2741
2742 off = same->logical_offset;
2743 len = same->length;
2728 2744
2729 /* 2745 /*
2730 * Limit the total length we will dedupe for each operation. 2746 * Limit the total length we will dedupe for each operation.
@@ -2752,27 +2768,28 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2752 if (!S_ISREG(src->i_mode)) 2768 if (!S_ISREG(src->i_mode))
2753 goto out; 2769 goto out;
2754 2770
2755 ret = 0; 2771 /* pre-format output fields to sane values */
2756 for (i = 0; i < same.dest_count; i++) { 2772 for (i = 0; i < same->dest_count; i++) {
2757 if (copy_from_user(&info, &args->info[i], sizeof(info))) { 2773 same->info[i].bytes_deduped = 0ULL;
2758 ret = -EFAULT; 2774 same->info[i].status = 0;
2759 goto out; 2775 }
2760 }
2761 2776
2762 info.bytes_deduped = 0; 2777 ret = 0;
2778 for (i = 0; i < same->dest_count; i++) {
2779 info = &same->info[i];
2763 2780
2764 dst_file = fget(info.fd); 2781 dst_file = fget(info->fd);
2765 if (!dst_file) { 2782 if (!dst_file) {
2766 info.status = -EBADF; 2783 info->status = -EBADF;
2767 goto next; 2784 goto next;
2768 } 2785 }
2769 2786
2770 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { 2787 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
2771 info.status = -EINVAL; 2788 info->status = -EINVAL;
2772 goto next; 2789 goto next;
2773 } 2790 }
2774 2791
2775 info.status = -EXDEV; 2792 info->status = -EXDEV;
2776 if (file->f_path.mnt != dst_file->f_path.mnt) 2793 if (file->f_path.mnt != dst_file->f_path.mnt)
2777 goto next; 2794 goto next;
2778 2795
@@ -2781,32 +2798,29 @@ static long btrfs_ioctl_file_extent_same(struct file *file,
2781 goto next; 2798 goto next;
2782 2799
2783 if (S_ISDIR(dst->i_mode)) { 2800 if (S_ISDIR(dst->i_mode)) {
2784 info.status = -EISDIR; 2801 info->status = -EISDIR;
2785 goto next; 2802 goto next;
2786 } 2803 }
2787 2804
2788 if (!S_ISREG(dst->i_mode)) { 2805 if (!S_ISREG(dst->i_mode)) {
2789 info.status = -EACCES; 2806 info->status = -EACCES;
2790 goto next; 2807 goto next;
2791 } 2808 }
2792 2809
2793 info.status = btrfs_extent_same(src, off, len, dst, 2810 info->status = btrfs_extent_same(src, off, len, dst,
2794 info.logical_offset); 2811 info->logical_offset);
2795 if (info.status == 0) 2812 if (info->status == 0)
2796 info.bytes_deduped += len; 2813 info->bytes_deduped += len;
2797 2814
2798next: 2815next:
2799 if (dst_file) 2816 if (dst_file)
2800 fput(dst_file); 2817 fput(dst_file);
2801
2802 if (__put_user_unaligned(info.status, &args->info[i].status) ||
2803 __put_user_unaligned(info.bytes_deduped,
2804 &args->info[i].bytes_deduped)) {
2805 ret = -EFAULT;
2806 goto out;
2807 }
2808 } 2818 }
2809 2819
2820 ret = copy_to_user(argp, same, size);
2821 if (ret)
2822 ret = -EFAULT;
2823
2810out: 2824out:
2811 mnt_drop_write_file(file); 2825 mnt_drop_write_file(file);
2812 return ret; 2826 return ret;
@@ -3310,7 +3324,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
3310 } 3324 }
3311 3325
3312 if (!objectid) 3326 if (!objectid)
3313 objectid = root->root_key.objectid; 3327 objectid = BTRFS_FS_TREE_OBJECTID;
3314 3328
3315 location.objectid = objectid; 3329 location.objectid = objectid;
3316 location.type = BTRFS_ROOT_ITEM_KEY; 3330 location.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 966b413a33b8..c702cb62f78a 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -563,11 +563,10 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
563 * wait for all the ordered extents in a root. This is done when balancing 563 * wait for all the ordered extents in a root. This is done when balancing
564 * space between drives. 564 * space between drives.
565 */ 565 */
566void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) 566void btrfs_wait_ordered_extents(struct btrfs_root *root)
567{ 567{
568 struct list_head splice, works; 568 struct list_head splice, works;
569 struct btrfs_ordered_extent *ordered, *next; 569 struct btrfs_ordered_extent *ordered, *next;
570 struct inode *inode;
571 570
572 INIT_LIST_HEAD(&splice); 571 INIT_LIST_HEAD(&splice);
573 INIT_LIST_HEAD(&works); 572 INIT_LIST_HEAD(&works);
@@ -580,15 +579,6 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
580 root_extent_list); 579 root_extent_list);
581 list_move_tail(&ordered->root_extent_list, 580 list_move_tail(&ordered->root_extent_list,
582 &root->ordered_extents); 581 &root->ordered_extents);
583 /*
584 * the inode may be getting freed (in sys_unlink path).
585 */
586 inode = igrab(ordered->inode);
587 if (!inode) {
588 cond_resched_lock(&root->ordered_extent_lock);
589 continue;
590 }
591
592 atomic_inc(&ordered->refs); 582 atomic_inc(&ordered->refs);
593 spin_unlock(&root->ordered_extent_lock); 583 spin_unlock(&root->ordered_extent_lock);
594 584
@@ -605,21 +595,13 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
605 list_for_each_entry_safe(ordered, next, &works, work_list) { 595 list_for_each_entry_safe(ordered, next, &works, work_list) {
606 list_del_init(&ordered->work_list); 596 list_del_init(&ordered->work_list);
607 wait_for_completion(&ordered->completion); 597 wait_for_completion(&ordered->completion);
608
609 inode = ordered->inode;
610 btrfs_put_ordered_extent(ordered); 598 btrfs_put_ordered_extent(ordered);
611 if (delay_iput)
612 btrfs_add_delayed_iput(inode);
613 else
614 iput(inode);
615
616 cond_resched(); 599 cond_resched();
617 } 600 }
618 mutex_unlock(&root->fs_info->ordered_operations_mutex); 601 mutex_unlock(&root->fs_info->ordered_operations_mutex);
619} 602}
620 603
621void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, 604void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info)
622 int delay_iput)
623{ 605{
624 struct btrfs_root *root; 606 struct btrfs_root *root;
625 struct list_head splice; 607 struct list_head splice;
@@ -637,7 +619,7 @@ void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info,
637 &fs_info->ordered_roots); 619 &fs_info->ordered_roots);
638 spin_unlock(&fs_info->ordered_root_lock); 620 spin_unlock(&fs_info->ordered_root_lock);
639 621
640 btrfs_wait_ordered_extents(root, delay_iput); 622 btrfs_wait_ordered_extents(root);
641 btrfs_put_fs_root(root); 623 btrfs_put_fs_root(root);
642 624
643 spin_lock(&fs_info->ordered_root_lock); 625 spin_lock(&fs_info->ordered_root_lock);
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index d9a5aa097b4f..0c0b35612d7a 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -195,9 +195,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
195void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 195void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
196 struct btrfs_root *root, 196 struct btrfs_root *root,
197 struct inode *inode); 197 struct inode *inode);
198void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); 198void btrfs_wait_ordered_extents(struct btrfs_root *root);
199void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info, 199void btrfs_wait_all_ordered_extents(struct btrfs_fs_info *fs_info);
200 int delay_iput);
201void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); 200void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
202void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); 201void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
203void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); 202void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index aacc2121e87c..a5a26320503f 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1548,7 +1548,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
1548 btrfs_file_extent_other_encoding(leaf, fi)); 1548 btrfs_file_extent_other_encoding(leaf, fi));
1549 1549
1550 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1550 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) {
1551 ret = 1; 1551 ret = -EINVAL;
1552 goto out; 1552 goto out;
1553 } 1553 }
1554 1554
@@ -1579,7 +1579,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1579 u64 end; 1579 u64 end;
1580 u32 nritems; 1580 u32 nritems;
1581 u32 i; 1581 u32 i;
1582 int ret; 1582 int ret = 0;
1583 int first = 1; 1583 int first = 1;
1584 int dirty = 0; 1584 int dirty = 0;
1585 1585
@@ -1642,11 +1642,13 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1642 1642
1643 ret = get_new_location(rc->data_inode, &new_bytenr, 1643 ret = get_new_location(rc->data_inode, &new_bytenr,
1644 bytenr, num_bytes); 1644 bytenr, num_bytes);
1645 if (ret > 0) { 1645 if (ret) {
1646 WARN_ON(1); 1646 /*
1647 continue; 1647 * Don't have to abort since we've not changed anything
1648 * in the file extent yet.
1649 */
1650 break;
1648 } 1651 }
1649 BUG_ON(ret < 0);
1650 1652
1651 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1653 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);
1652 dirty = 1; 1654 dirty = 1;
@@ -1656,18 +1658,24 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
1656 num_bytes, parent, 1658 num_bytes, parent,
1657 btrfs_header_owner(leaf), 1659 btrfs_header_owner(leaf),
1658 key.objectid, key.offset, 1); 1660 key.objectid, key.offset, 1);
1659 BUG_ON(ret); 1661 if (ret) {
1662 btrfs_abort_transaction(trans, root, ret);
1663 break;
1664 }
1660 1665
1661 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1666 ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
1662 parent, btrfs_header_owner(leaf), 1667 parent, btrfs_header_owner(leaf),
1663 key.objectid, key.offset, 1); 1668 key.objectid, key.offset, 1);
1664 BUG_ON(ret); 1669 if (ret) {
1670 btrfs_abort_transaction(trans, root, ret);
1671 break;
1672 }
1665 } 1673 }
1666 if (dirty) 1674 if (dirty)
1667 btrfs_mark_buffer_dirty(leaf); 1675 btrfs_mark_buffer_dirty(leaf);
1668 if (inode) 1676 if (inode)
1669 btrfs_add_delayed_iput(inode); 1677 btrfs_add_delayed_iput(inode);
1670 return 0; 1678 return ret;
1671} 1679}
1672 1680
1673static noinline_for_stack 1681static noinline_for_stack
@@ -4238,7 +4246,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
4238 err = ret; 4246 err = ret;
4239 goto out; 4247 goto out;
4240 } 4248 }
4241 btrfs_wait_all_ordered_extents(fs_info, 0); 4249 btrfs_wait_all_ordered_extents(fs_info);
4242 4250
4243 while (1) { 4251 while (1) {
4244 mutex_lock(&fs_info->cleaner_mutex); 4252 mutex_lock(&fs_info->cleaner_mutex);
@@ -4499,19 +4507,19 @@ out:
4499 return ret; 4507 return ret;
4500} 4508}
4501 4509
4502void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4510int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4503 struct btrfs_root *root, struct extent_buffer *buf, 4511 struct btrfs_root *root, struct extent_buffer *buf,
4504 struct extent_buffer *cow) 4512 struct extent_buffer *cow)
4505{ 4513{
4506 struct reloc_control *rc; 4514 struct reloc_control *rc;
4507 struct backref_node *node; 4515 struct backref_node *node;
4508 int first_cow = 0; 4516 int first_cow = 0;
4509 int level; 4517 int level;
4510 int ret; 4518 int ret = 0;
4511 4519
4512 rc = root->fs_info->reloc_ctl; 4520 rc = root->fs_info->reloc_ctl;
4513 if (!rc) 4521 if (!rc)
4514 return; 4522 return 0;
4515 4523
4516 BUG_ON(rc->stage == UPDATE_DATA_PTRS && 4524 BUG_ON(rc->stage == UPDATE_DATA_PTRS &&
4517 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); 4525 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID);
@@ -4547,10 +4555,9 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
4547 rc->nodes_relocated += buf->len; 4555 rc->nodes_relocated += buf->len;
4548 } 4556 }
4549 4557
4550 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { 4558 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)
4551 ret = replace_file_extents(trans, rc, root, cow); 4559 ret = replace_file_extents(trans, rc, root, cow);
4552 BUG_ON(ret); 4560 return ret;
4553 }
4554} 4561}
4555 4562
4556/* 4563/*
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 0afcd452fcb3..a18e0e23f6a6 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -158,12 +158,20 @@ struct scrub_fixup_nodatasum {
158 int mirror_num; 158 int mirror_num;
159}; 159};
160 160
161struct scrub_nocow_inode {
162 u64 inum;
163 u64 offset;
164 u64 root;
165 struct list_head list;
166};
167
161struct scrub_copy_nocow_ctx { 168struct scrub_copy_nocow_ctx {
162 struct scrub_ctx *sctx; 169 struct scrub_ctx *sctx;
163 u64 logical; 170 u64 logical;
164 u64 len; 171 u64 len;
165 int mirror_num; 172 int mirror_num;
166 u64 physical_for_dev_replace; 173 u64 physical_for_dev_replace;
174 struct list_head inodes;
167 struct btrfs_work work; 175 struct btrfs_work work;
168}; 176};
169 177
@@ -245,7 +253,7 @@ static void scrub_wr_bio_end_io_worker(struct btrfs_work *work);
245static int write_page_nocow(struct scrub_ctx *sctx, 253static int write_page_nocow(struct scrub_ctx *sctx,
246 u64 physical_for_dev_replace, struct page *page); 254 u64 physical_for_dev_replace, struct page *page);
247static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, 255static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
248 void *ctx); 256 struct scrub_copy_nocow_ctx *ctx);
249static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, 257static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
250 int mirror_num, u64 physical_for_dev_replace); 258 int mirror_num, u64 physical_for_dev_replace);
251static void copy_nocow_pages_worker(struct btrfs_work *work); 259static void copy_nocow_pages_worker(struct btrfs_work *work);
@@ -3126,12 +3134,30 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
3126 nocow_ctx->mirror_num = mirror_num; 3134 nocow_ctx->mirror_num = mirror_num;
3127 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; 3135 nocow_ctx->physical_for_dev_replace = physical_for_dev_replace;
3128 nocow_ctx->work.func = copy_nocow_pages_worker; 3136 nocow_ctx->work.func = copy_nocow_pages_worker;
3137 INIT_LIST_HEAD(&nocow_ctx->inodes);
3129 btrfs_queue_worker(&fs_info->scrub_nocow_workers, 3138 btrfs_queue_worker(&fs_info->scrub_nocow_workers,
3130 &nocow_ctx->work); 3139 &nocow_ctx->work);
3131 3140
3132 return 0; 3141 return 0;
3133} 3142}
3134 3143
3144static int record_inode_for_nocow(u64 inum, u64 offset, u64 root, void *ctx)
3145{
3146 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3147 struct scrub_nocow_inode *nocow_inode;
3148
3149 nocow_inode = kzalloc(sizeof(*nocow_inode), GFP_NOFS);
3150 if (!nocow_inode)
3151 return -ENOMEM;
3152 nocow_inode->inum = inum;
3153 nocow_inode->offset = offset;
3154 nocow_inode->root = root;
3155 list_add_tail(&nocow_inode->list, &nocow_ctx->inodes);
3156 return 0;
3157}
3158
3159#define COPY_COMPLETE 1
3160
3135static void copy_nocow_pages_worker(struct btrfs_work *work) 3161static void copy_nocow_pages_worker(struct btrfs_work *work)
3136{ 3162{
3137 struct scrub_copy_nocow_ctx *nocow_ctx = 3163 struct scrub_copy_nocow_ctx *nocow_ctx =
@@ -3167,8 +3193,7 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3167 } 3193 }
3168 3194
3169 ret = iterate_inodes_from_logical(logical, fs_info, path, 3195 ret = iterate_inodes_from_logical(logical, fs_info, path,
3170 copy_nocow_pages_for_inode, 3196 record_inode_for_nocow, nocow_ctx);
3171 nocow_ctx);
3172 if (ret != 0 && ret != -ENOENT) { 3197 if (ret != 0 && ret != -ENOENT) {
3173 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n", 3198 pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
3174 logical, physical_for_dev_replace, len, mirror_num, 3199 logical, physical_for_dev_replace, len, mirror_num,
@@ -3177,7 +3202,33 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
3177 goto out; 3202 goto out;
3178 } 3203 }
3179 3204
3205 btrfs_end_transaction(trans, root);
3206 trans = NULL;
3207 while (!list_empty(&nocow_ctx->inodes)) {
3208 struct scrub_nocow_inode *entry;
3209 entry = list_first_entry(&nocow_ctx->inodes,
3210 struct scrub_nocow_inode,
3211 list);
3212 list_del_init(&entry->list);
3213 ret = copy_nocow_pages_for_inode(entry->inum, entry->offset,
3214 entry->root, nocow_ctx);
3215 kfree(entry);
3216 if (ret == COPY_COMPLETE) {
3217 ret = 0;
3218 break;
3219 } else if (ret) {
3220 break;
3221 }
3222 }
3180out: 3223out:
3224 while (!list_empty(&nocow_ctx->inodes)) {
3225 struct scrub_nocow_inode *entry;
3226 entry = list_first_entry(&nocow_ctx->inodes,
3227 struct scrub_nocow_inode,
3228 list);
3229 list_del_init(&entry->list);
3230 kfree(entry);
3231 }
3181 if (trans && !IS_ERR(trans)) 3232 if (trans && !IS_ERR(trans))
3182 btrfs_end_transaction(trans, root); 3233 btrfs_end_transaction(trans, root);
3183 if (not_written) 3234 if (not_written)
@@ -3190,20 +3241,25 @@ out:
3190 scrub_pending_trans_workers_dec(sctx); 3241 scrub_pending_trans_workers_dec(sctx);
3191} 3242}
3192 3243
3193static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) 3244static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
3245 struct scrub_copy_nocow_ctx *nocow_ctx)
3194{ 3246{
3195 struct scrub_copy_nocow_ctx *nocow_ctx = ctx;
3196 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; 3247 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3197 struct btrfs_key key; 3248 struct btrfs_key key;
3198 struct inode *inode; 3249 struct inode *inode;
3199 struct page *page; 3250 struct page *page;
3200 struct btrfs_root *local_root; 3251 struct btrfs_root *local_root;
3252 struct btrfs_ordered_extent *ordered;
3253 struct extent_map *em;
3254 struct extent_state *cached_state = NULL;
3255 struct extent_io_tree *io_tree;
3201 u64 physical_for_dev_replace; 3256 u64 physical_for_dev_replace;
3202 u64 len; 3257 u64 len = nocow_ctx->len;
3258 u64 lockstart = offset, lockend = offset + len - 1;
3203 unsigned long index; 3259 unsigned long index;
3204 int srcu_index; 3260 int srcu_index;
3205 int ret; 3261 int ret = 0;
3206 int err; 3262 int err = 0;
3207 3263
3208 key.objectid = root; 3264 key.objectid = root;
3209 key.type = BTRFS_ROOT_ITEM_KEY; 3265 key.type = BTRFS_ROOT_ITEM_KEY;
@@ -3229,9 +3285,33 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3229 mutex_lock(&inode->i_mutex); 3285 mutex_lock(&inode->i_mutex);
3230 inode_dio_wait(inode); 3286 inode_dio_wait(inode);
3231 3287
3232 ret = 0;
3233 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; 3288 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace;
3234 len = nocow_ctx->len; 3289 io_tree = &BTRFS_I(inode)->io_tree;
3290
3291 lock_extent_bits(io_tree, lockstart, lockend, 0, &cached_state);
3292 ordered = btrfs_lookup_ordered_range(inode, lockstart, len);
3293 if (ordered) {
3294 btrfs_put_ordered_extent(ordered);
3295 goto out_unlock;
3296 }
3297
3298 em = btrfs_get_extent(inode, NULL, 0, lockstart, len, 0);
3299 if (IS_ERR(em)) {
3300 ret = PTR_ERR(em);
3301 goto out_unlock;
3302 }
3303
3304 /*
3305 * This extent does not actually cover the logical extent anymore,
3306 * move on to the next inode.
3307 */
3308 if (em->block_start > nocow_ctx->logical ||
3309 em->block_start + em->block_len < nocow_ctx->logical + len) {
3310 free_extent_map(em);
3311 goto out_unlock;
3312 }
3313 free_extent_map(em);
3314
3235 while (len >= PAGE_CACHE_SIZE) { 3315 while (len >= PAGE_CACHE_SIZE) {
3236 index = offset >> PAGE_CACHE_SHIFT; 3316 index = offset >> PAGE_CACHE_SHIFT;
3237again: 3317again:
@@ -3247,10 +3327,9 @@ again:
3247 goto next_page; 3327 goto next_page;
3248 } else { 3328 } else {
3249 ClearPageError(page); 3329 ClearPageError(page);
3250 err = extent_read_full_page(&BTRFS_I(inode)-> 3330 err = extent_read_full_page_nolock(io_tree, page,
3251 io_tree, 3331 btrfs_get_extent,
3252 page, btrfs_get_extent, 3332 nocow_ctx->mirror_num);
3253 nocow_ctx->mirror_num);
3254 if (err) { 3333 if (err) {
3255 ret = err; 3334 ret = err;
3256 goto next_page; 3335 goto next_page;
@@ -3264,6 +3343,7 @@ again:
3264 * page in the page cache. 3343 * page in the page cache.
3265 */ 3344 */
3266 if (page->mapping != inode->i_mapping) { 3345 if (page->mapping != inode->i_mapping) {
3346 unlock_page(page);
3267 page_cache_release(page); 3347 page_cache_release(page);
3268 goto again; 3348 goto again;
3269 } 3349 }
@@ -3287,6 +3367,10 @@ next_page:
3287 physical_for_dev_replace += PAGE_CACHE_SIZE; 3367 physical_for_dev_replace += PAGE_CACHE_SIZE;
3288 len -= PAGE_CACHE_SIZE; 3368 len -= PAGE_CACHE_SIZE;
3289 } 3369 }
3370 ret = COPY_COMPLETE;
3371out_unlock:
3372 unlock_extent_cached(io_tree, lockstart, lockend, &cached_state,
3373 GFP_NOFS);
3290out: 3374out:
3291 mutex_unlock(&inode->i_mutex); 3375 mutex_unlock(&inode->i_mutex);
3292 iput(inode); 3376 iput(inode);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 3aab10ce63e8..e913328d0f2a 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -921,7 +921,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
921 return 0; 921 return 0;
922 } 922 }
923 923
924 btrfs_wait_all_ordered_extents(fs_info, 1); 924 btrfs_wait_all_ordered_extents(fs_info);
925 925
926 trans = btrfs_attach_transaction_barrier(root); 926 trans = btrfs_attach_transaction_barrier(root);
927 if (IS_ERR(trans)) { 927 if (IS_ERR(trans)) {
@@ -1340,6 +1340,12 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1340 if (ret) 1340 if (ret)
1341 goto restore; 1341 goto restore;
1342 } else { 1342 } else {
1343 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1344 btrfs_err(fs_info,
1345 "Remounting read-write after error is not allowed\n");
1346 ret = -EINVAL;
1347 goto restore;
1348 }
1343 if (fs_info->fs_devices->rw_devices == 0) { 1349 if (fs_info->fs_devices->rw_devices == 0) {
1344 ret = -EACCES; 1350 ret = -EACCES;
1345 goto restore; 1351 goto restore;
@@ -1377,6 +1383,16 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
1377 pr_warn("btrfs: failed to resume dev_replace\n"); 1383 pr_warn("btrfs: failed to resume dev_replace\n");
1378 goto restore; 1384 goto restore;
1379 } 1385 }
1386
1387 if (!fs_info->uuid_root) {
1388 pr_info("btrfs: creating UUID tree\n");
1389 ret = btrfs_create_uuid_tree(fs_info);
1390 if (ret) {
1391 pr_warn("btrfs: failed to create the uuid tree"
1392 "%d\n", ret);
1393 goto restore;
1394 }
1395 }
1380 sb->s_flags &= ~MS_RDONLY; 1396 sb->s_flags &= ~MS_RDONLY;
1381 } 1397 }
1382out: 1398out:
@@ -1762,6 +1778,9 @@ static void btrfs_print_info(void)
1762#ifdef CONFIG_BTRFS_DEBUG 1778#ifdef CONFIG_BTRFS_DEBUG
1763 ", debug=on" 1779 ", debug=on"
1764#endif 1780#endif
1781#ifdef CONFIG_BTRFS_ASSERT
1782 ", assert=on"
1783#endif
1765#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 1784#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
1766 ", integrity-checker=on" 1785 ", integrity-checker=on"
1767#endif 1786#endif
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index cac4a3f76323..e7a95356df83 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1603,7 +1603,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
1603static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) 1603static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
1604{ 1604{
1605 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) 1605 if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
1606 btrfs_wait_all_ordered_extents(fs_info, 1); 1606 btrfs_wait_all_ordered_extents(fs_info);
1607} 1607}
1608 1608
1609int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1609int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 0d9613c3f5e5..79f057c0619a 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -93,7 +93,8 @@
93 */ 93 */
94#define LOG_WALK_PIN_ONLY 0 94#define LOG_WALK_PIN_ONLY 0
95#define LOG_WALK_REPLAY_INODES 1 95#define LOG_WALK_REPLAY_INODES 1
96#define LOG_WALK_REPLAY_ALL 2 96#define LOG_WALK_REPLAY_DIR_INDEX 2
97#define LOG_WALK_REPLAY_ALL 3
97 98
98static int btrfs_log_inode(struct btrfs_trans_handle *trans, 99static int btrfs_log_inode(struct btrfs_trans_handle *trans,
99 struct btrfs_root *root, struct inode *inode, 100 struct btrfs_root *root, struct inode *inode,
@@ -393,6 +394,7 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
393 if (inode_item) { 394 if (inode_item) {
394 struct btrfs_inode_item *item; 395 struct btrfs_inode_item *item;
395 u64 nbytes; 396 u64 nbytes;
397 u32 mode;
396 398
397 item = btrfs_item_ptr(path->nodes[0], path->slots[0], 399 item = btrfs_item_ptr(path->nodes[0], path->slots[0],
398 struct btrfs_inode_item); 400 struct btrfs_inode_item);
@@ -400,9 +402,19 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
400 item = btrfs_item_ptr(eb, slot, 402 item = btrfs_item_ptr(eb, slot,
401 struct btrfs_inode_item); 403 struct btrfs_inode_item);
402 btrfs_set_inode_nbytes(eb, item, nbytes); 404 btrfs_set_inode_nbytes(eb, item, nbytes);
405
406 /*
407 * If this is a directory we need to reset the i_size to
408 * 0 so that we can set it up properly when replaying
409 * the rest of the items in this log.
410 */
411 mode = btrfs_inode_mode(eb, item);
412 if (S_ISDIR(mode))
413 btrfs_set_inode_size(eb, item, 0);
403 } 414 }
404 } else if (inode_item) { 415 } else if (inode_item) {
405 struct btrfs_inode_item *item; 416 struct btrfs_inode_item *item;
417 u32 mode;
406 418
407 /* 419 /*
408 * New inode, set nbytes to 0 so that the nbytes comes out 420 * New inode, set nbytes to 0 so that the nbytes comes out
@@ -410,6 +422,15 @@ static noinline int overwrite_item(struct btrfs_trans_handle *trans,
410 */ 422 */
411 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item); 423 item = btrfs_item_ptr(eb, slot, struct btrfs_inode_item);
412 btrfs_set_inode_nbytes(eb, item, 0); 424 btrfs_set_inode_nbytes(eb, item, 0);
425
426 /*
427 * If this is a directory we need to reset the i_size to 0 so
428 * that we can set it up properly when replaying the rest of
429 * the items in this log.
430 */
431 mode = btrfs_inode_mode(eb, item);
432 if (S_ISDIR(mode))
433 btrfs_set_inode_size(eb, item, 0);
413 } 434 }
414insert: 435insert:
415 btrfs_release_path(path); 436 btrfs_release_path(path);
@@ -1496,6 +1517,7 @@ static noinline int insert_one_name(struct btrfs_trans_handle *trans,
1496 iput(inode); 1517 iput(inode);
1497 return -EIO; 1518 return -EIO;
1498 } 1519 }
1520
1499 ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index); 1521 ret = btrfs_add_link(trans, dir, inode, name, name_len, 1, index);
1500 1522
1501 /* FIXME, put inode into FIXUP list */ 1523 /* FIXME, put inode into FIXUP list */
@@ -1534,6 +1556,7 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1534 u8 log_type; 1556 u8 log_type;
1535 int exists; 1557 int exists;
1536 int ret = 0; 1558 int ret = 0;
1559 bool update_size = (key->type == BTRFS_DIR_INDEX_KEY);
1537 1560
1538 dir = read_one_inode(root, key->objectid); 1561 dir = read_one_inode(root, key->objectid);
1539 if (!dir) 1562 if (!dir)
@@ -1604,6 +1627,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
1604 goto insert; 1627 goto insert;
1605out: 1628out:
1606 btrfs_release_path(path); 1629 btrfs_release_path(path);
1630 if (!ret && update_size) {
1631 btrfs_i_size_write(dir, dir->i_size + name_len * 2);
1632 ret = btrfs_update_inode(trans, root, dir);
1633 }
1607 kfree(name); 1634 kfree(name);
1608 iput(dir); 1635 iput(dir);
1609 return ret; 1636 return ret;
@@ -1614,6 +1641,7 @@ insert:
1614 name, name_len, log_type, &log_key); 1641 name, name_len, log_type, &log_key);
1615 if (ret && ret != -ENOENT) 1642 if (ret && ret != -ENOENT)
1616 goto out; 1643 goto out;
1644 update_size = false;
1617 ret = 0; 1645 ret = 0;
1618 goto out; 1646 goto out;
1619} 1647}
@@ -2027,6 +2055,15 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2027 if (ret) 2055 if (ret)
2028 break; 2056 break;
2029 } 2057 }
2058
2059 if (key.type == BTRFS_DIR_INDEX_KEY &&
2060 wc->stage == LOG_WALK_REPLAY_DIR_INDEX) {
2061 ret = replay_one_dir_item(wc->trans, root, path,
2062 eb, i, &key);
2063 if (ret)
2064 break;
2065 }
2066
2030 if (wc->stage < LOG_WALK_REPLAY_ALL) 2067 if (wc->stage < LOG_WALK_REPLAY_ALL)
2031 continue; 2068 continue;
2032 2069
@@ -2048,8 +2085,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
2048 eb, i, &key); 2085 eb, i, &key);
2049 if (ret) 2086 if (ret)
2050 break; 2087 break;
2051 } else if (key.type == BTRFS_DIR_ITEM_KEY || 2088 } else if (key.type == BTRFS_DIR_ITEM_KEY) {
2052 key.type == BTRFS_DIR_INDEX_KEY) {
2053 ret = replay_one_dir_item(wc->trans, root, path, 2089 ret = replay_one_dir_item(wc->trans, root, path,
2054 eb, i, &key); 2090 eb, i, &key);
2055 if (ret) 2091 if (ret)
@@ -3805,6 +3841,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
3805 int ret = 0; 3841 int ret = 0;
3806 struct btrfs_root *root; 3842 struct btrfs_root *root;
3807 struct dentry *old_parent = NULL; 3843 struct dentry *old_parent = NULL;
3844 struct inode *orig_inode = inode;
3808 3845
3809 /* 3846 /*
3810 * for regular files, if its inode is already on disk, we don't 3847 * for regular files, if its inode is already on disk, we don't
@@ -3824,7 +3861,14 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
3824 } 3861 }
3825 3862
3826 while (1) { 3863 while (1) {
3827 BTRFS_I(inode)->logged_trans = trans->transid; 3864 /*
3865 * If we are logging a directory then we start with our inode,
3866 * not our parents inode, so we need to skipp setting the
3867 * logged_trans so that further down in the log code we don't
3868 * think this inode has already been logged.
3869 */
3870 if (inode != orig_inode)
3871 BTRFS_I(inode)->logged_trans = trans->transid;
3828 smp_mb(); 3872 smp_mb();
3829 3873
3830 if (BTRFS_I(inode)->last_unlink_trans > last_committed) { 3874 if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0052ca8264d9..a10645830223 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -796,7 +796,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
796 fs_devices->rotating = 1; 796 fs_devices->rotating = 1;
797 797
798 fs_devices->open_devices++; 798 fs_devices->open_devices++;
799 if (device->writeable && !device->is_tgtdev_for_dev_replace) { 799 if (device->writeable &&
800 device->devid != BTRFS_DEV_REPLACE_DEVID) {
800 fs_devices->rw_devices++; 801 fs_devices->rw_devices++;
801 list_add(&device->dev_alloc_list, 802 list_add(&device->dev_alloc_list,
802 &fs_devices->alloc_list); 803 &fs_devices->alloc_list);
@@ -911,9 +912,9 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
911 if (disk_super->label[0]) { 912 if (disk_super->label[0]) {
912 if (disk_super->label[BTRFS_LABEL_SIZE - 1]) 913 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
913 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; 914 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
914 printk(KERN_INFO "device label %s ", disk_super->label); 915 printk(KERN_INFO "btrfs: device label %s ", disk_super->label);
915 } else { 916 } else {
916 printk(KERN_INFO "device fsid %pU ", disk_super->fsid); 917 printk(KERN_INFO "btrfs: device fsid %pU ", disk_super->fsid);
917 } 918 }
918 919
919 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); 920 printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);