diff options
author | Chris Mason <chris.mason@fusionio.com> | 2013-02-20 14:05:45 -0500 |
---|---|---|
committer | Chris Mason <chris.mason@fusionio.com> | 2013-02-20 14:05:45 -0500 |
commit | b2c6b3e0611c58fbeb6b9c0892b6249f7bdfaf6b (patch) | |
tree | de7cf0825605aa6acf33a8d107003efd7aedbe72 | |
parent | 19f949f52599ba7c3f67a5897ac6be14bfcb1200 (diff) | |
parent | 272d26d0ad8c0e326689f2fa3cdc6a5fcc8e74e0 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next into for-linus-3.9
Signed-off-by: Chris Mason <chris.mason@fusionio.com>
Conflicts:
fs/btrfs/disk-io.c
38 files changed, 1630 insertions, 702 deletions
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index d61feca79455..310a7f6d09b1 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h | |||
@@ -19,7 +19,7 @@ | |||
19 | #ifndef __BTRFS_BACKREF__ | 19 | #ifndef __BTRFS_BACKREF__ |
20 | #define __BTRFS_BACKREF__ | 20 | #define __BTRFS_BACKREF__ |
21 | 21 | ||
22 | #include "ioctl.h" | 22 | #include <linux/btrfs.h> |
23 | #include "ulist.h" | 23 | #include "ulist.h" |
24 | #include "extent_io.h" | 24 | #include "extent_io.h" |
25 | 25 | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 2a8c242bc4f5..d9b97d4960e6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -40,6 +40,8 @@ | |||
40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 | 40 | #define BTRFS_INODE_HAS_ASYNC_EXTENT 6 |
41 | #define BTRFS_INODE_NEEDS_FULL_SYNC 7 | 41 | #define BTRFS_INODE_NEEDS_FULL_SYNC 7 |
42 | #define BTRFS_INODE_COPY_EVERYTHING 8 | 42 | #define BTRFS_INODE_COPY_EVERYTHING 8 |
43 | #define BTRFS_INODE_IN_DELALLOC_LIST 9 | ||
44 | #define BTRFS_INODE_READDIO_NEED_LOCK 10 | ||
43 | 45 | ||
44 | /* in memory btrfs inode */ | 46 | /* in memory btrfs inode */ |
45 | struct btrfs_inode { | 47 | struct btrfs_inode { |
@@ -216,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation) | |||
216 | return 0; | 218 | return 0; |
217 | } | 219 | } |
218 | 220 | ||
221 | /* | ||
222 | * Disable DIO read nolock optimization, so new dio readers will be forced | ||
223 | * to grab i_mutex. It is used to avoid the endless truncate due to | ||
224 | * nonlocked dio read. | ||
225 | */ | ||
226 | static inline void btrfs_inode_block_unlocked_dio(struct inode *inode) | ||
227 | { | ||
228 | set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags); | ||
229 | smp_mb(); | ||
230 | } | ||
231 | |||
232 | static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode) | ||
233 | { | ||
234 | smp_mb__before_clear_bit(); | ||
235 | clear_bit(BTRFS_INODE_READDIO_NEED_LOCK, | ||
236 | &BTRFS_I(inode)->runtime_flags); | ||
237 | } | ||
238 | |||
219 | #endif | 239 | #endif |
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index 11d47bfb62b4..18af6f48781a 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c | |||
@@ -813,8 +813,7 @@ static int btrfsic_process_superblock_dev_mirror( | |||
813 | (bh->b_data + (dev_bytenr & 4095)); | 813 | (bh->b_data + (dev_bytenr & 4095)); |
814 | 814 | ||
815 | if (btrfs_super_bytenr(super_tmp) != dev_bytenr || | 815 | if (btrfs_super_bytenr(super_tmp) != dev_bytenr || |
816 | strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, | 816 | super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) || |
817 | sizeof(super_tmp->magic)) || | ||
818 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || | 817 | memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || |
819 | btrfs_super_nodesize(super_tmp) != state->metablock_size || | 818 | btrfs_super_nodesize(super_tmp) != state->metablock_size || |
820 | btrfs_super_leafsize(super_tmp) != state->metablock_size || | 819 | btrfs_super_leafsize(super_tmp) != state->metablock_size || |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index eea5da7a2b9a..ecd25a1b4e51 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -1138,6 +1138,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, | |||
1138 | switch (tm->op) { | 1138 | switch (tm->op) { |
1139 | case MOD_LOG_KEY_REMOVE_WHILE_FREEING: | 1139 | case MOD_LOG_KEY_REMOVE_WHILE_FREEING: |
1140 | BUG_ON(tm->slot < n); | 1140 | BUG_ON(tm->slot < n); |
1141 | /* Fallthrough */ | ||
1141 | case MOD_LOG_KEY_REMOVE_WHILE_MOVING: | 1142 | case MOD_LOG_KEY_REMOVE_WHILE_MOVING: |
1142 | case MOD_LOG_KEY_REMOVE: | 1143 | case MOD_LOG_KEY_REMOVE: |
1143 | btrfs_set_node_key(eb, &tm->key, tm->slot); | 1144 | btrfs_set_node_key(eb, &tm->key, tm->slot); |
@@ -1222,7 +1223,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, | |||
1222 | 1223 | ||
1223 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); | 1224 | __tree_mod_log_rewind(eb_rewin, time_seq, tm); |
1224 | WARN_ON(btrfs_header_nritems(eb_rewin) > | 1225 | WARN_ON(btrfs_header_nritems(eb_rewin) > |
1225 | BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root)); | 1226 | BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); |
1226 | 1227 | ||
1227 | return eb_rewin; | 1228 | return eb_rewin; |
1228 | } | 1229 | } |
@@ -1441,7 +1442,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) | |||
1441 | */ | 1442 | */ |
1442 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 1443 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
1443 | struct btrfs_root *root, struct extent_buffer *parent, | 1444 | struct btrfs_root *root, struct extent_buffer *parent, |
1444 | int start_slot, int cache_only, u64 *last_ret, | 1445 | int start_slot, u64 *last_ret, |
1445 | struct btrfs_key *progress) | 1446 | struct btrfs_key *progress) |
1446 | { | 1447 | { |
1447 | struct extent_buffer *cur; | 1448 | struct extent_buffer *cur; |
@@ -1461,8 +1462,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1461 | struct btrfs_disk_key disk_key; | 1462 | struct btrfs_disk_key disk_key; |
1462 | 1463 | ||
1463 | parent_level = btrfs_header_level(parent); | 1464 | parent_level = btrfs_header_level(parent); |
1464 | if (cache_only && parent_level != 1) | ||
1465 | return 0; | ||
1466 | 1465 | ||
1467 | WARN_ON(trans->transaction != root->fs_info->running_transaction); | 1466 | WARN_ON(trans->transaction != root->fs_info->running_transaction); |
1468 | WARN_ON(trans->transid != root->fs_info->generation); | 1467 | WARN_ON(trans->transid != root->fs_info->generation); |
@@ -1508,10 +1507,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
1508 | else | 1507 | else |
1509 | uptodate = 0; | 1508 | uptodate = 0; |
1510 | if (!cur || !uptodate) { | 1509 | if (!cur || !uptodate) { |
1511 | if (cache_only) { | ||
1512 | free_extent_buffer(cur); | ||
1513 | continue; | ||
1514 | } | ||
1515 | if (!cur) { | 1510 | if (!cur) { |
1516 | cur = read_tree_block(root, blocknr, | 1511 | cur = read_tree_block(root, blocknr, |
1517 | blocksize, gen); | 1512 | blocksize, gen); |
@@ -4825,8 +4820,8 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4825 | 4820 | ||
4826 | /* | 4821 | /* |
4827 | * A helper function to walk down the tree starting at min_key, and looking | 4822 | * A helper function to walk down the tree starting at min_key, and looking |
4828 | * for nodes or leaves that are either in cache or have a minimum | 4823 | * for nodes or leaves that are have a minimum transaction id. |
4829 | * transaction id. This is used by the btree defrag code, and tree logging | 4824 | * This is used by the btree defrag code, and tree logging |
4830 | * | 4825 | * |
4831 | * This does not cow, but it does stuff the starting key it finds back | 4826 | * This does not cow, but it does stuff the starting key it finds back |
4832 | * into min_key, so you can call btrfs_search_slot with cow=1 on the | 4827 | * into min_key, so you can call btrfs_search_slot with cow=1 on the |
@@ -4847,7 +4842,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path) | |||
4847 | */ | 4842 | */ |
4848 | int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | 4843 | int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, |
4849 | struct btrfs_key *max_key, | 4844 | struct btrfs_key *max_key, |
4850 | struct btrfs_path *path, int cache_only, | 4845 | struct btrfs_path *path, |
4851 | u64 min_trans) | 4846 | u64 min_trans) |
4852 | { | 4847 | { |
4853 | struct extent_buffer *cur; | 4848 | struct extent_buffer *cur; |
@@ -4887,15 +4882,12 @@ again: | |||
4887 | if (sret && slot > 0) | 4882 | if (sret && slot > 0) |
4888 | slot--; | 4883 | slot--; |
4889 | /* | 4884 | /* |
4890 | * check this node pointer against the cache_only and | 4885 | * check this node pointer against the min_trans parameters. |
4891 | * min_trans parameters. If it isn't in cache or is too | 4886 | * If it is too old, old, skip to the next one. |
4892 | * old, skip to the next one. | ||
4893 | */ | 4887 | */ |
4894 | while (slot < nritems) { | 4888 | while (slot < nritems) { |
4895 | u64 blockptr; | 4889 | u64 blockptr; |
4896 | u64 gen; | 4890 | u64 gen; |
4897 | struct extent_buffer *tmp; | ||
4898 | struct btrfs_disk_key disk_key; | ||
4899 | 4891 | ||
4900 | blockptr = btrfs_node_blockptr(cur, slot); | 4892 | blockptr = btrfs_node_blockptr(cur, slot); |
4901 | gen = btrfs_node_ptr_generation(cur, slot); | 4893 | gen = btrfs_node_ptr_generation(cur, slot); |
@@ -4903,27 +4895,7 @@ again: | |||
4903 | slot++; | 4895 | slot++; |
4904 | continue; | 4896 | continue; |
4905 | } | 4897 | } |
4906 | if (!cache_only) | 4898 | break; |
4907 | break; | ||
4908 | |||
4909 | if (max_key) { | ||
4910 | btrfs_node_key(cur, &disk_key, slot); | ||
4911 | if (comp_keys(&disk_key, max_key) >= 0) { | ||
4912 | ret = 1; | ||
4913 | goto out; | ||
4914 | } | ||
4915 | } | ||
4916 | |||
4917 | tmp = btrfs_find_tree_block(root, blockptr, | ||
4918 | btrfs_level_size(root, level - 1)); | ||
4919 | |||
4920 | if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) { | ||
4921 | free_extent_buffer(tmp); | ||
4922 | break; | ||
4923 | } | ||
4924 | if (tmp) | ||
4925 | free_extent_buffer(tmp); | ||
4926 | slot++; | ||
4927 | } | 4899 | } |
4928 | find_next_key: | 4900 | find_next_key: |
4929 | /* | 4901 | /* |
@@ -4934,7 +4906,7 @@ find_next_key: | |||
4934 | path->slots[level] = slot; | 4906 | path->slots[level] = slot; |
4935 | btrfs_set_path_blocking(path); | 4907 | btrfs_set_path_blocking(path); |
4936 | sret = btrfs_find_next_key(root, path, min_key, level, | 4908 | sret = btrfs_find_next_key(root, path, min_key, level, |
4937 | cache_only, min_trans); | 4909 | min_trans); |
4938 | if (sret == 0) { | 4910 | if (sret == 0) { |
4939 | btrfs_release_path(path); | 4911 | btrfs_release_path(path); |
4940 | goto again; | 4912 | goto again; |
@@ -5399,8 +5371,7 @@ out: | |||
5399 | /* | 5371 | /* |
5400 | * this is similar to btrfs_next_leaf, but does not try to preserve | 5372 | * this is similar to btrfs_next_leaf, but does not try to preserve |
5401 | * and fixup the path. It looks for and returns the next key in the | 5373 | * and fixup the path. It looks for and returns the next key in the |
5402 | * tree based on the current path and the cache_only and min_trans | 5374 | * tree based on the current path and the min_trans parameters. |
5403 | * parameters. | ||
5404 | * | 5375 | * |
5405 | * 0 is returned if another key is found, < 0 if there are any errors | 5376 | * 0 is returned if another key is found, < 0 if there are any errors |
5406 | * and 1 is returned if there are no higher keys in the tree | 5377 | * and 1 is returned if there are no higher keys in the tree |
@@ -5409,8 +5380,7 @@ out: | |||
5409 | * calling this function. | 5380 | * calling this function. |
5410 | */ | 5381 | */ |
5411 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, | 5382 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, |
5412 | struct btrfs_key *key, int level, | 5383 | struct btrfs_key *key, int level, u64 min_trans) |
5413 | int cache_only, u64 min_trans) | ||
5414 | { | 5384 | { |
5415 | int slot; | 5385 | int slot; |
5416 | struct extent_buffer *c; | 5386 | struct extent_buffer *c; |
@@ -5461,22 +5431,8 @@ next: | |||
5461 | if (level == 0) | 5431 | if (level == 0) |
5462 | btrfs_item_key_to_cpu(c, key, slot); | 5432 | btrfs_item_key_to_cpu(c, key, slot); |
5463 | else { | 5433 | else { |
5464 | u64 blockptr = btrfs_node_blockptr(c, slot); | ||
5465 | u64 gen = btrfs_node_ptr_generation(c, slot); | 5434 | u64 gen = btrfs_node_ptr_generation(c, slot); |
5466 | 5435 | ||
5467 | if (cache_only) { | ||
5468 | struct extent_buffer *cur; | ||
5469 | cur = btrfs_find_tree_block(root, blockptr, | ||
5470 | btrfs_level_size(root, level - 1)); | ||
5471 | if (!cur || | ||
5472 | btrfs_buffer_uptodate(cur, gen, 1) <= 0) { | ||
5473 | slot++; | ||
5474 | if (cur) | ||
5475 | free_extent_buffer(cur); | ||
5476 | goto next; | ||
5477 | } | ||
5478 | free_extent_buffer(cur); | ||
5479 | } | ||
5480 | if (gen < min_trans) { | 5436 | if (gen < min_trans) { |
5481 | slot++; | 5437 | slot++; |
5482 | goto next; | 5438 | goto next; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 547b7b05727f..1679051f4d39 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -31,10 +31,10 @@ | |||
31 | #include <trace/events/btrfs.h> | 31 | #include <trace/events/btrfs.h> |
32 | #include <asm/kmap_types.h> | 32 | #include <asm/kmap_types.h> |
33 | #include <linux/pagemap.h> | 33 | #include <linux/pagemap.h> |
34 | #include <linux/btrfs.h> | ||
34 | #include "extent_io.h" | 35 | #include "extent_io.h" |
35 | #include "extent_map.h" | 36 | #include "extent_map.h" |
36 | #include "async-thread.h" | 37 | #include "async-thread.h" |
37 | #include "ioctl.h" | ||
38 | 38 | ||
39 | struct btrfs_trans_handle; | 39 | struct btrfs_trans_handle; |
40 | struct btrfs_transaction; | 40 | struct btrfs_transaction; |
@@ -46,7 +46,7 @@ extern struct kmem_cache *btrfs_path_cachep; | |||
46 | extern struct kmem_cache *btrfs_free_space_cachep; | 46 | extern struct kmem_cache *btrfs_free_space_cachep; |
47 | struct btrfs_ordered_sum; | 47 | struct btrfs_ordered_sum; |
48 | 48 | ||
49 | #define BTRFS_MAGIC "_BHRfS_M" | 49 | #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ |
50 | 50 | ||
51 | #define BTRFS_MAX_MIRRORS 3 | 51 | #define BTRFS_MAX_MIRRORS 3 |
52 | 52 | ||
@@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
191 | /* ioprio of readahead is set to idle */ | 191 | /* ioprio of readahead is set to idle */ |
192 | #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) | 192 | #define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) |
193 | 193 | ||
194 | #define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024) | ||
195 | |||
194 | /* | 196 | /* |
195 | * The key defines the order in the tree, and so it also defines (optimal) | 197 | * The key defines the order in the tree, and so it also defines (optimal) |
196 | * block layout. | 198 | * block layout. |
@@ -336,7 +338,9 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
336 | /* | 338 | /* |
337 | * File system states | 339 | * File system states |
338 | */ | 340 | */ |
341 | #define BTRFS_FS_STATE_ERROR 0 | ||
339 | 342 | ||
343 | /* Super block flags */ | ||
340 | /* Errors detected */ | 344 | /* Errors detected */ |
341 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) | 345 | #define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) |
342 | 346 | ||
@@ -953,7 +957,15 @@ struct btrfs_dev_replace_item { | |||
953 | #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) | 957 | #define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) |
954 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) | 958 | #define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) |
955 | #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE | 959 | #define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE |
956 | #define BTRFS_NR_RAID_TYPES 5 | 960 | |
961 | enum btrfs_raid_types { | ||
962 | BTRFS_RAID_RAID10, | ||
963 | BTRFS_RAID_RAID1, | ||
964 | BTRFS_RAID_DUP, | ||
965 | BTRFS_RAID_RAID0, | ||
966 | BTRFS_RAID_SINGLE, | ||
967 | BTRFS_NR_RAID_TYPES | ||
968 | }; | ||
957 | 969 | ||
958 | #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ | 970 | #define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ |
959 | BTRFS_BLOCK_GROUP_SYSTEM | \ | 971 | BTRFS_BLOCK_GROUP_SYSTEM | \ |
@@ -1225,6 +1237,11 @@ struct seq_list { | |||
1225 | u64 seq; | 1237 | u64 seq; |
1226 | }; | 1238 | }; |
1227 | 1239 | ||
1240 | enum btrfs_orphan_cleanup_state { | ||
1241 | ORPHAN_CLEANUP_STARTED = 1, | ||
1242 | ORPHAN_CLEANUP_DONE = 2, | ||
1243 | }; | ||
1244 | |||
1228 | /* fs_info */ | 1245 | /* fs_info */ |
1229 | struct reloc_control; | 1246 | struct reloc_control; |
1230 | struct btrfs_device; | 1247 | struct btrfs_device; |
@@ -1250,6 +1267,7 @@ struct btrfs_fs_info { | |||
1250 | 1267 | ||
1251 | /* block group cache stuff */ | 1268 | /* block group cache stuff */ |
1252 | spinlock_t block_group_cache_lock; | 1269 | spinlock_t block_group_cache_lock; |
1270 | u64 first_logical_byte; | ||
1253 | struct rb_root block_group_cache_tree; | 1271 | struct rb_root block_group_cache_tree; |
1254 | 1272 | ||
1255 | /* keep track of unallocated space */ | 1273 | /* keep track of unallocated space */ |
@@ -1288,7 +1306,23 @@ struct btrfs_fs_info { | |||
1288 | u64 last_trans_log_full_commit; | 1306 | u64 last_trans_log_full_commit; |
1289 | unsigned long mount_opt; | 1307 | unsigned long mount_opt; |
1290 | unsigned long compress_type:4; | 1308 | unsigned long compress_type:4; |
1309 | /* | ||
1310 | * It is a suggestive number, the read side is safe even it gets a | ||
1311 | * wrong number because we will write out the data into a regular | ||
1312 | * extent. The write side(mount/remount) is under ->s_umount lock, | ||
1313 | * so it is also safe. | ||
1314 | */ | ||
1291 | u64 max_inline; | 1315 | u64 max_inline; |
1316 | /* | ||
1317 | * Protected by ->chunk_mutex and sb->s_umount. | ||
1318 | * | ||
1319 | * The reason that we use two lock to protect it is because only | ||
1320 | * remount and mount operations can change it and these two operations | ||
1321 | * are under sb->s_umount, but the read side (chunk allocation) can not | ||
1322 | * acquire sb->s_umount or the deadlock would happen. So we use two | ||
1323 | * locks to protect it. On the write side, we must acquire two locks, | ||
1324 | * and on the read side, we just need acquire one of them. | ||
1325 | */ | ||
1292 | u64 alloc_start; | 1326 | u64 alloc_start; |
1293 | struct btrfs_transaction *running_transaction; | 1327 | struct btrfs_transaction *running_transaction; |
1294 | wait_queue_head_t transaction_throttle; | 1328 | wait_queue_head_t transaction_throttle; |
@@ -1365,6 +1399,7 @@ struct btrfs_fs_info { | |||
1365 | */ | 1399 | */ |
1366 | struct list_head ordered_extents; | 1400 | struct list_head ordered_extents; |
1367 | 1401 | ||
1402 | spinlock_t delalloc_lock; | ||
1368 | /* | 1403 | /* |
1369 | * all of the inodes that have delalloc bytes. It is possible for | 1404 | * all of the inodes that have delalloc bytes. It is possible for |
1370 | * this list to be empty even when there is still dirty data=ordered | 1405 | * this list to be empty even when there is still dirty data=ordered |
@@ -1373,13 +1408,6 @@ struct btrfs_fs_info { | |||
1373 | struct list_head delalloc_inodes; | 1408 | struct list_head delalloc_inodes; |
1374 | 1409 | ||
1375 | /* | 1410 | /* |
1376 | * special rename and truncate targets that must be on disk before | ||
1377 | * we're allowed to commit. This is basically the ext3 style | ||
1378 | * data=ordered list. | ||
1379 | */ | ||
1380 | struct list_head ordered_operations; | ||
1381 | |||
1382 | /* | ||
1383 | * there is a pool of worker threads for checksumming during writes | 1411 | * there is a pool of worker threads for checksumming during writes |
1384 | * and a pool for checksumming after reads. This is because readers | 1412 | * and a pool for checksumming after reads. This is because readers |
1385 | * can run with FS locks held, and the writers may be waiting for | 1413 | * can run with FS locks held, and the writers may be waiting for |
@@ -1423,10 +1451,12 @@ struct btrfs_fs_info { | |||
1423 | 1451 | ||
1424 | u64 total_pinned; | 1452 | u64 total_pinned; |
1425 | 1453 | ||
1426 | /* protected by the delalloc lock, used to keep from writing | 1454 | /* used to keep from writing metadata until there is a nice batch */ |
1427 | * metadata until there is a nice batch | 1455 | struct percpu_counter dirty_metadata_bytes; |
1428 | */ | 1456 | struct percpu_counter delalloc_bytes; |
1429 | u64 dirty_metadata_bytes; | 1457 | s32 dirty_metadata_batch; |
1458 | s32 delalloc_batch; | ||
1459 | |||
1430 | struct list_head dirty_cowonly_roots; | 1460 | struct list_head dirty_cowonly_roots; |
1431 | 1461 | ||
1432 | struct btrfs_fs_devices *fs_devices; | 1462 | struct btrfs_fs_devices *fs_devices; |
@@ -1442,9 +1472,6 @@ struct btrfs_fs_info { | |||
1442 | 1472 | ||
1443 | struct reloc_control *reloc_ctl; | 1473 | struct reloc_control *reloc_ctl; |
1444 | 1474 | ||
1445 | spinlock_t delalloc_lock; | ||
1446 | u64 delalloc_bytes; | ||
1447 | |||
1448 | /* data_alloc_cluster is only used in ssd mode */ | 1475 | /* data_alloc_cluster is only used in ssd mode */ |
1449 | struct btrfs_free_cluster data_alloc_cluster; | 1476 | struct btrfs_free_cluster data_alloc_cluster; |
1450 | 1477 | ||
@@ -1456,6 +1483,8 @@ struct btrfs_fs_info { | |||
1456 | struct rb_root defrag_inodes; | 1483 | struct rb_root defrag_inodes; |
1457 | atomic_t defrag_running; | 1484 | atomic_t defrag_running; |
1458 | 1485 | ||
1486 | /* Used to protect avail_{data, metadata, system}_alloc_bits */ | ||
1487 | seqlock_t profiles_lock; | ||
1459 | /* | 1488 | /* |
1460 | * these three are in extended format (availability of single | 1489 | * these three are in extended format (availability of single |
1461 | * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other | 1490 | * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other |
@@ -1520,7 +1549,7 @@ struct btrfs_fs_info { | |||
1520 | u64 qgroup_seq; | 1549 | u64 qgroup_seq; |
1521 | 1550 | ||
1522 | /* filesystem state */ | 1551 | /* filesystem state */ |
1523 | u64 fs_state; | 1552 | unsigned long fs_state; |
1524 | 1553 | ||
1525 | struct btrfs_delayed_root *delayed_root; | 1554 | struct btrfs_delayed_root *delayed_root; |
1526 | 1555 | ||
@@ -1623,6 +1652,9 @@ struct btrfs_root { | |||
1623 | 1652 | ||
1624 | struct list_head root_list; | 1653 | struct list_head root_list; |
1625 | 1654 | ||
1655 | spinlock_t log_extents_lock[2]; | ||
1656 | struct list_head logged_list[2]; | ||
1657 | |||
1626 | spinlock_t orphan_lock; | 1658 | spinlock_t orphan_lock; |
1627 | atomic_t orphan_inodes; | 1659 | atomic_t orphan_inodes; |
1628 | struct btrfs_block_rsv *orphan_block_rsv; | 1660 | struct btrfs_block_rsv *orphan_block_rsv; |
@@ -2936,8 +2968,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | |||
2936 | u64 num_bytes, u64 *refs, u64 *flags); | 2968 | u64 num_bytes, u64 *refs, u64 *flags); |
2937 | int btrfs_pin_extent(struct btrfs_root *root, | 2969 | int btrfs_pin_extent(struct btrfs_root *root, |
2938 | u64 bytenr, u64 num, int reserved); | 2970 | u64 bytenr, u64 num, int reserved); |
2939 | int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | 2971 | int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, |
2940 | struct btrfs_root *root, | ||
2941 | u64 bytenr, u64 num_bytes); | 2972 | u64 bytenr, u64 num_bytes); |
2942 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 2973 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
2943 | struct btrfs_root *root, | 2974 | struct btrfs_root *root, |
@@ -3092,10 +3123,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root); | |||
3092 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); | 3123 | struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); |
3093 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, | 3124 | int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, |
3094 | struct btrfs_key *key, int lowest_level, | 3125 | struct btrfs_key *key, int lowest_level, |
3095 | int cache_only, u64 min_trans); | 3126 | u64 min_trans); |
3096 | int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | 3127 | int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, |
3097 | struct btrfs_key *max_key, | 3128 | struct btrfs_key *max_key, |
3098 | struct btrfs_path *path, int cache_only, | 3129 | struct btrfs_path *path, |
3099 | u64 min_trans); | 3130 | u64 min_trans); |
3100 | enum btrfs_compare_tree_result { | 3131 | enum btrfs_compare_tree_result { |
3101 | BTRFS_COMPARE_TREE_NEW, | 3132 | BTRFS_COMPARE_TREE_NEW, |
@@ -3148,7 +3179,7 @@ int btrfs_search_slot_for_read(struct btrfs_root *root, | |||
3148 | int find_higher, int return_any); | 3179 | int find_higher, int return_any); |
3149 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, | 3180 | int btrfs_realloc_node(struct btrfs_trans_handle *trans, |
3150 | struct btrfs_root *root, struct extent_buffer *parent, | 3181 | struct btrfs_root *root, struct extent_buffer *parent, |
3151 | int start_slot, int cache_only, u64 *last_ret, | 3182 | int start_slot, u64 *last_ret, |
3152 | struct btrfs_key *progress); | 3183 | struct btrfs_key *progress); |
3153 | void btrfs_release_path(struct btrfs_path *p); | 3184 | void btrfs_release_path(struct btrfs_path *p); |
3154 | struct btrfs_path *btrfs_alloc_path(void); | 3185 | struct btrfs_path *btrfs_alloc_path(void); |
@@ -3543,7 +3574,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode, | |||
3543 | 3574 | ||
3544 | /* tree-defrag.c */ | 3575 | /* tree-defrag.c */ |
3545 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | 3576 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, |
3546 | struct btrfs_root *root, int cache_only); | 3577 | struct btrfs_root *root); |
3547 | 3578 | ||
3548 | /* sysfs.c */ | 3579 | /* sysfs.c */ |
3549 | int btrfs_init_sysfs(void); | 3580 | int btrfs_init_sysfs(void); |
@@ -3620,11 +3651,14 @@ __printf(5, 6) | |||
3620 | void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | 3651 | void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, |
3621 | unsigned int line, int errno, const char *fmt, ...); | 3652 | unsigned int line, int errno, const char *fmt, ...); |
3622 | 3653 | ||
3654 | /* | ||
3655 | * If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic | ||
3656 | * will panic(). Otherwise we BUG() here. | ||
3657 | */ | ||
3623 | #define btrfs_panic(fs_info, errno, fmt, args...) \ | 3658 | #define btrfs_panic(fs_info, errno, fmt, args...) \ |
3624 | do { \ | 3659 | do { \ |
3625 | struct btrfs_fs_info *_i = (fs_info); \ | 3660 | __btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args); \ |
3626 | __btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args); \ | 3661 | BUG(); \ |
3627 | BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)); \ | ||
3628 | } while (0) | 3662 | } while (0) |
3629 | 3663 | ||
3630 | /* acl.c */ | 3664 | /* acl.c */ |
@@ -3745,4 +3779,11 @@ static inline int is_fstree(u64 rootid) | |||
3745 | return 1; | 3779 | return 1; |
3746 | return 0; | 3780 | return 0; |
3747 | } | 3781 | } |
3782 | |||
3783 | static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info) | ||
3784 | { | ||
3785 | return signal_pending(current); | ||
3786 | } | ||
3787 | |||
3788 | |||
3748 | #endif | 3789 | #endif |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 34836036f01b..0b278b117cbe 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -875,7 +875,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, | |||
875 | struct btrfs_delayed_item *delayed_item) | 875 | struct btrfs_delayed_item *delayed_item) |
876 | { | 876 | { |
877 | struct extent_buffer *leaf; | 877 | struct extent_buffer *leaf; |
878 | struct btrfs_item *item; | ||
879 | char *ptr; | 878 | char *ptr; |
880 | int ret; | 879 | int ret; |
881 | 880 | ||
@@ -886,7 +885,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans, | |||
886 | 885 | ||
887 | leaf = path->nodes[0]; | 886 | leaf = path->nodes[0]; |
888 | 887 | ||
889 | item = btrfs_item_nr(leaf, path->slots[0]); | ||
890 | ptr = btrfs_item_ptr(leaf, path->slots[0], char); | 888 | ptr = btrfs_item_ptr(leaf, path->slots[0], char); |
891 | 889 | ||
892 | write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr, | 890 | write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr, |
@@ -1065,32 +1063,25 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node) | |||
1065 | } | 1063 | } |
1066 | } | 1064 | } |
1067 | 1065 | ||
1068 | static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, | 1066 | static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, |
1069 | struct btrfs_root *root, | 1067 | struct btrfs_root *root, |
1070 | struct btrfs_path *path, | 1068 | struct btrfs_path *path, |
1071 | struct btrfs_delayed_node *node) | 1069 | struct btrfs_delayed_node *node) |
1072 | { | 1070 | { |
1073 | struct btrfs_key key; | 1071 | struct btrfs_key key; |
1074 | struct btrfs_inode_item *inode_item; | 1072 | struct btrfs_inode_item *inode_item; |
1075 | struct extent_buffer *leaf; | 1073 | struct extent_buffer *leaf; |
1076 | int ret; | 1074 | int ret; |
1077 | 1075 | ||
1078 | mutex_lock(&node->mutex); | ||
1079 | if (!node->inode_dirty) { | ||
1080 | mutex_unlock(&node->mutex); | ||
1081 | return 0; | ||
1082 | } | ||
1083 | |||
1084 | key.objectid = node->inode_id; | 1076 | key.objectid = node->inode_id; |
1085 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); | 1077 | btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); |
1086 | key.offset = 0; | 1078 | key.offset = 0; |
1079 | |||
1087 | ret = btrfs_lookup_inode(trans, root, path, &key, 1); | 1080 | ret = btrfs_lookup_inode(trans, root, path, &key, 1); |
1088 | if (ret > 0) { | 1081 | if (ret > 0) { |
1089 | btrfs_release_path(path); | 1082 | btrfs_release_path(path); |
1090 | mutex_unlock(&node->mutex); | ||
1091 | return -ENOENT; | 1083 | return -ENOENT; |
1092 | } else if (ret < 0) { | 1084 | } else if (ret < 0) { |
1093 | mutex_unlock(&node->mutex); | ||
1094 | return ret; | 1085 | return ret; |
1095 | } | 1086 | } |
1096 | 1087 | ||
@@ -1105,11 +1096,47 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, | |||
1105 | 1096 | ||
1106 | btrfs_delayed_inode_release_metadata(root, node); | 1097 | btrfs_delayed_inode_release_metadata(root, node); |
1107 | btrfs_release_delayed_inode(node); | 1098 | btrfs_release_delayed_inode(node); |
1108 | mutex_unlock(&node->mutex); | ||
1109 | 1099 | ||
1110 | return 0; | 1100 | return 0; |
1111 | } | 1101 | } |
1112 | 1102 | ||
1103 | static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, | ||
1104 | struct btrfs_root *root, | ||
1105 | struct btrfs_path *path, | ||
1106 | struct btrfs_delayed_node *node) | ||
1107 | { | ||
1108 | int ret; | ||
1109 | |||
1110 | mutex_lock(&node->mutex); | ||
1111 | if (!node->inode_dirty) { | ||
1112 | mutex_unlock(&node->mutex); | ||
1113 | return 0; | ||
1114 | } | ||
1115 | |||
1116 | ret = __btrfs_update_delayed_inode(trans, root, path, node); | ||
1117 | mutex_unlock(&node->mutex); | ||
1118 | return ret; | ||
1119 | } | ||
1120 | |||
1121 | static inline int | ||
1122 | __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | ||
1123 | struct btrfs_path *path, | ||
1124 | struct btrfs_delayed_node *node) | ||
1125 | { | ||
1126 | int ret; | ||
1127 | |||
1128 | ret = btrfs_insert_delayed_items(trans, path, node->root, node); | ||
1129 | if (ret) | ||
1130 | return ret; | ||
1131 | |||
1132 | ret = btrfs_delete_delayed_items(trans, path, node->root, node); | ||
1133 | if (ret) | ||
1134 | return ret; | ||
1135 | |||
1136 | ret = btrfs_update_delayed_inode(trans, node->root, path, node); | ||
1137 | return ret; | ||
1138 | } | ||
1139 | |||
1113 | /* | 1140 | /* |
1114 | * Called when committing the transaction. | 1141 | * Called when committing the transaction. |
1115 | * Returns 0 on success. | 1142 | * Returns 0 on success. |
@@ -1119,7 +1146,6 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, | |||
1119 | static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | 1146 | static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, |
1120 | struct btrfs_root *root, int nr) | 1147 | struct btrfs_root *root, int nr) |
1121 | { | 1148 | { |
1122 | struct btrfs_root *curr_root = root; | ||
1123 | struct btrfs_delayed_root *delayed_root; | 1149 | struct btrfs_delayed_root *delayed_root; |
1124 | struct btrfs_delayed_node *curr_node, *prev_node; | 1150 | struct btrfs_delayed_node *curr_node, *prev_node; |
1125 | struct btrfs_path *path; | 1151 | struct btrfs_path *path; |
@@ -1142,15 +1168,8 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, | |||
1142 | 1168 | ||
1143 | curr_node = btrfs_first_delayed_node(delayed_root); | 1169 | curr_node = btrfs_first_delayed_node(delayed_root); |
1144 | while (curr_node && (!count || (count && nr--))) { | 1170 | while (curr_node && (!count || (count && nr--))) { |
1145 | curr_root = curr_node->root; | 1171 | ret = __btrfs_commit_inode_delayed_items(trans, path, |
1146 | ret = btrfs_insert_delayed_items(trans, path, curr_root, | 1172 | curr_node); |
1147 | curr_node); | ||
1148 | if (!ret) | ||
1149 | ret = btrfs_delete_delayed_items(trans, path, | ||
1150 | curr_root, curr_node); | ||
1151 | if (!ret) | ||
1152 | ret = btrfs_update_delayed_inode(trans, curr_root, | ||
1153 | path, curr_node); | ||
1154 | if (ret) { | 1173 | if (ret) { |
1155 | btrfs_release_delayed_node(curr_node); | 1174 | btrfs_release_delayed_node(curr_node); |
1156 | curr_node = NULL; | 1175 | curr_node = NULL; |
@@ -1183,51 +1202,93 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans, | |||
1183 | return __btrfs_run_delayed_items(trans, root, nr); | 1202 | return __btrfs_run_delayed_items(trans, root, nr); |
1184 | } | 1203 | } |
1185 | 1204 | ||
1186 | static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | 1205 | int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, |
1187 | struct btrfs_delayed_node *node) | 1206 | struct inode *inode) |
1188 | { | 1207 | { |
1208 | struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); | ||
1189 | struct btrfs_path *path; | 1209 | struct btrfs_path *path; |
1190 | struct btrfs_block_rsv *block_rsv; | 1210 | struct btrfs_block_rsv *block_rsv; |
1191 | int ret; | 1211 | int ret; |
1192 | 1212 | ||
1213 | if (!delayed_node) | ||
1214 | return 0; | ||
1215 | |||
1216 | mutex_lock(&delayed_node->mutex); | ||
1217 | if (!delayed_node->count) { | ||
1218 | mutex_unlock(&delayed_node->mutex); | ||
1219 | btrfs_release_delayed_node(delayed_node); | ||
1220 | return 0; | ||
1221 | } | ||
1222 | mutex_unlock(&delayed_node->mutex); | ||
1223 | |||
1193 | path = btrfs_alloc_path(); | 1224 | path = btrfs_alloc_path(); |
1194 | if (!path) | 1225 | if (!path) |
1195 | return -ENOMEM; | 1226 | return -ENOMEM; |
1196 | path->leave_spinning = 1; | 1227 | path->leave_spinning = 1; |
1197 | 1228 | ||
1198 | block_rsv = trans->block_rsv; | 1229 | block_rsv = trans->block_rsv; |
1199 | trans->block_rsv = &node->root->fs_info->delayed_block_rsv; | 1230 | trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv; |
1200 | 1231 | ||
1201 | ret = btrfs_insert_delayed_items(trans, path, node->root, node); | 1232 | ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node); |
1202 | if (!ret) | ||
1203 | ret = btrfs_delete_delayed_items(trans, path, node->root, node); | ||
1204 | if (!ret) | ||
1205 | ret = btrfs_update_delayed_inode(trans, node->root, path, node); | ||
1206 | btrfs_free_path(path); | ||
1207 | 1233 | ||
1234 | btrfs_release_delayed_node(delayed_node); | ||
1235 | btrfs_free_path(path); | ||
1208 | trans->block_rsv = block_rsv; | 1236 | trans->block_rsv = block_rsv; |
1237 | |||
1209 | return ret; | 1238 | return ret; |
1210 | } | 1239 | } |
1211 | 1240 | ||
1212 | int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | 1241 | int btrfs_commit_inode_delayed_inode(struct inode *inode) |
1213 | struct inode *inode) | ||
1214 | { | 1242 | { |
1243 | struct btrfs_trans_handle *trans; | ||
1215 | struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); | 1244 | struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); |
1245 | struct btrfs_path *path; | ||
1246 | struct btrfs_block_rsv *block_rsv; | ||
1216 | int ret; | 1247 | int ret; |
1217 | 1248 | ||
1218 | if (!delayed_node) | 1249 | if (!delayed_node) |
1219 | return 0; | 1250 | return 0; |
1220 | 1251 | ||
1221 | mutex_lock(&delayed_node->mutex); | 1252 | mutex_lock(&delayed_node->mutex); |
1222 | if (!delayed_node->count) { | 1253 | if (!delayed_node->inode_dirty) { |
1223 | mutex_unlock(&delayed_node->mutex); | 1254 | mutex_unlock(&delayed_node->mutex); |
1224 | btrfs_release_delayed_node(delayed_node); | 1255 | btrfs_release_delayed_node(delayed_node); |
1225 | return 0; | 1256 | return 0; |
1226 | } | 1257 | } |
1227 | mutex_unlock(&delayed_node->mutex); | 1258 | mutex_unlock(&delayed_node->mutex); |
1228 | 1259 | ||
1229 | ret = __btrfs_commit_inode_delayed_items(trans, delayed_node); | 1260 | trans = btrfs_join_transaction(delayed_node->root); |
1261 | if (IS_ERR(trans)) { | ||
1262 | ret = PTR_ERR(trans); | ||
1263 | goto out; | ||
1264 | } | ||
1265 | |||
1266 | path = btrfs_alloc_path(); | ||
1267 | if (!path) { | ||
1268 | ret = -ENOMEM; | ||
1269 | goto trans_out; | ||
1270 | } | ||
1271 | path->leave_spinning = 1; | ||
1272 | |||
1273 | block_rsv = trans->block_rsv; | ||
1274 | trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv; | ||
1275 | |||
1276 | mutex_lock(&delayed_node->mutex); | ||
1277 | if (delayed_node->inode_dirty) | ||
1278 | ret = __btrfs_update_delayed_inode(trans, delayed_node->root, | ||
1279 | path, delayed_node); | ||
1280 | else | ||
1281 | ret = 0; | ||
1282 | mutex_unlock(&delayed_node->mutex); | ||
1283 | |||
1284 | btrfs_free_path(path); | ||
1285 | trans->block_rsv = block_rsv; | ||
1286 | trans_out: | ||
1287 | btrfs_end_transaction(trans, delayed_node->root); | ||
1288 | btrfs_btree_balance_dirty(delayed_node->root); | ||
1289 | out: | ||
1230 | btrfs_release_delayed_node(delayed_node); | 1290 | btrfs_release_delayed_node(delayed_node); |
1291 | |||
1231 | return ret; | 1292 | return ret; |
1232 | } | 1293 | } |
1233 | 1294 | ||
@@ -1258,7 +1319,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
1258 | struct btrfs_root *root; | 1319 | struct btrfs_root *root; |
1259 | struct btrfs_block_rsv *block_rsv; | 1320 | struct btrfs_block_rsv *block_rsv; |
1260 | int need_requeue = 0; | 1321 | int need_requeue = 0; |
1261 | int ret; | ||
1262 | 1322 | ||
1263 | async_node = container_of(work, struct btrfs_async_delayed_node, work); | 1323 | async_node = container_of(work, struct btrfs_async_delayed_node, work); |
1264 | 1324 | ||
@@ -1277,14 +1337,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work) | |||
1277 | block_rsv = trans->block_rsv; | 1337 | block_rsv = trans->block_rsv; |
1278 | trans->block_rsv = &root->fs_info->delayed_block_rsv; | 1338 | trans->block_rsv = &root->fs_info->delayed_block_rsv; |
1279 | 1339 | ||
1280 | ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); | 1340 | __btrfs_commit_inode_delayed_items(trans, path, delayed_node); |
1281 | if (!ret) | ||
1282 | ret = btrfs_delete_delayed_items(trans, path, root, | ||
1283 | delayed_node); | ||
1284 | |||
1285 | if (!ret) | ||
1286 | btrfs_update_delayed_inode(trans, root, path, delayed_node); | ||
1287 | |||
1288 | /* | 1341 | /* |
1289 | * Maybe new delayed items have been inserted, so we need requeue | 1342 | * Maybe new delayed items have been inserted, so we need requeue |
1290 | * the work. Besides that, we must dequeue the empty delayed nodes | 1343 | * the work. Besides that, we must dequeue the empty delayed nodes |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 4f808e1baeed..78b6ad0fc669 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
@@ -117,6 +117,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, | |||
117 | /* Used for evicting the inode. */ | 117 | /* Used for evicting the inode. */ |
118 | void btrfs_remove_delayed_node(struct inode *inode); | 118 | void btrfs_remove_delayed_node(struct inode *inode); |
119 | void btrfs_kill_delayed_inode_items(struct inode *inode); | 119 | void btrfs_kill_delayed_inode_items(struct inode *inode); |
120 | int btrfs_commit_inode_delayed_inode(struct inode *inode); | ||
120 | 121 | ||
121 | 122 | ||
122 | int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, | 123 | int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index ae9411773397..b7a0641ead77 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -23,6 +23,10 @@ | |||
23 | #include "delayed-ref.h" | 23 | #include "delayed-ref.h" |
24 | #include "transaction.h" | 24 | #include "transaction.h" |
25 | 25 | ||
26 | struct kmem_cache *btrfs_delayed_ref_head_cachep; | ||
27 | struct kmem_cache *btrfs_delayed_tree_ref_cachep; | ||
28 | struct kmem_cache *btrfs_delayed_data_ref_cachep; | ||
29 | struct kmem_cache *btrfs_delayed_extent_op_cachep; | ||
26 | /* | 30 | /* |
27 | * delayed back reference update tracking. For subvolume trees | 31 | * delayed back reference update tracking. For subvolume trees |
28 | * we queue up extent allocations and backref maintenance for | 32 | * we queue up extent allocations and backref maintenance for |
@@ -422,6 +426,14 @@ again: | |||
422 | return 1; | 426 | return 1; |
423 | } | 427 | } |
424 | 428 | ||
429 | void btrfs_release_ref_cluster(struct list_head *cluster) | ||
430 | { | ||
431 | struct list_head *pos, *q; | ||
432 | |||
433 | list_for_each_safe(pos, q, cluster) | ||
434 | list_del_init(pos); | ||
435 | } | ||
436 | |||
425 | /* | 437 | /* |
426 | * helper function to update an extent delayed ref in the | 438 | * helper function to update an extent delayed ref in the |
427 | * rbtree. existing and update must both have the same | 439 | * rbtree. existing and update must both have the same |
@@ -511,7 +523,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
511 | ref->extent_op->flags_to_set; | 523 | ref->extent_op->flags_to_set; |
512 | existing_ref->extent_op->update_flags = 1; | 524 | existing_ref->extent_op->update_flags = 1; |
513 | } | 525 | } |
514 | kfree(ref->extent_op); | 526 | btrfs_free_delayed_extent_op(ref->extent_op); |
515 | } | 527 | } |
516 | } | 528 | } |
517 | /* | 529 | /* |
@@ -592,7 +604,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, | |||
592 | * we've updated the existing ref, free the newly | 604 | * we've updated the existing ref, free the newly |
593 | * allocated ref | 605 | * allocated ref |
594 | */ | 606 | */ |
595 | kfree(head_ref); | 607 | kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref); |
596 | } else { | 608 | } else { |
597 | delayed_refs->num_heads++; | 609 | delayed_refs->num_heads++; |
598 | delayed_refs->num_heads_ready++; | 610 | delayed_refs->num_heads_ready++; |
@@ -653,7 +665,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
653 | * we've updated the existing ref, free the newly | 665 | * we've updated the existing ref, free the newly |
654 | * allocated ref | 666 | * allocated ref |
655 | */ | 667 | */ |
656 | kfree(full_ref); | 668 | kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref); |
657 | } else { | 669 | } else { |
658 | delayed_refs->num_entries++; | 670 | delayed_refs->num_entries++; |
659 | trans->delayed_ref_updates++; | 671 | trans->delayed_ref_updates++; |
@@ -714,7 +726,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
714 | * we've updated the existing ref, free the newly | 726 | * we've updated the existing ref, free the newly |
715 | * allocated ref | 727 | * allocated ref |
716 | */ | 728 | */ |
717 | kfree(full_ref); | 729 | kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref); |
718 | } else { | 730 | } else { |
719 | delayed_refs->num_entries++; | 731 | delayed_refs->num_entries++; |
720 | trans->delayed_ref_updates++; | 732 | trans->delayed_ref_updates++; |
@@ -738,13 +750,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info, | |||
738 | struct btrfs_delayed_ref_root *delayed_refs; | 750 | struct btrfs_delayed_ref_root *delayed_refs; |
739 | 751 | ||
740 | BUG_ON(extent_op && extent_op->is_data); | 752 | BUG_ON(extent_op && extent_op->is_data); |
741 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | 753 | ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS); |
742 | if (!ref) | 754 | if (!ref) |
743 | return -ENOMEM; | 755 | return -ENOMEM; |
744 | 756 | ||
745 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | 757 | head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); |
746 | if (!head_ref) { | 758 | if (!head_ref) { |
747 | kfree(ref); | 759 | kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); |
748 | return -ENOMEM; | 760 | return -ENOMEM; |
749 | } | 761 | } |
750 | 762 | ||
@@ -786,13 +798,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info, | |||
786 | struct btrfs_delayed_ref_root *delayed_refs; | 798 | struct btrfs_delayed_ref_root *delayed_refs; |
787 | 799 | ||
788 | BUG_ON(extent_op && !extent_op->is_data); | 800 | BUG_ON(extent_op && !extent_op->is_data); |
789 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | 801 | ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS); |
790 | if (!ref) | 802 | if (!ref) |
791 | return -ENOMEM; | 803 | return -ENOMEM; |
792 | 804 | ||
793 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | 805 | head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); |
794 | if (!head_ref) { | 806 | if (!head_ref) { |
795 | kfree(ref); | 807 | kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); |
796 | return -ENOMEM; | 808 | return -ENOMEM; |
797 | } | 809 | } |
798 | 810 | ||
@@ -826,7 +838,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info, | |||
826 | struct btrfs_delayed_ref_head *head_ref; | 838 | struct btrfs_delayed_ref_head *head_ref; |
827 | struct btrfs_delayed_ref_root *delayed_refs; | 839 | struct btrfs_delayed_ref_root *delayed_refs; |
828 | 840 | ||
829 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | 841 | head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS); |
830 | if (!head_ref) | 842 | if (!head_ref) |
831 | return -ENOMEM; | 843 | return -ENOMEM; |
832 | 844 | ||
@@ -860,3 +872,51 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
860 | return btrfs_delayed_node_to_head(ref); | 872 | return btrfs_delayed_node_to_head(ref); |
861 | return NULL; | 873 | return NULL; |
862 | } | 874 | } |
875 | |||
876 | void btrfs_delayed_ref_exit(void) | ||
877 | { | ||
878 | if (btrfs_delayed_ref_head_cachep) | ||
879 | kmem_cache_destroy(btrfs_delayed_ref_head_cachep); | ||
880 | if (btrfs_delayed_tree_ref_cachep) | ||
881 | kmem_cache_destroy(btrfs_delayed_tree_ref_cachep); | ||
882 | if (btrfs_delayed_data_ref_cachep) | ||
883 | kmem_cache_destroy(btrfs_delayed_data_ref_cachep); | ||
884 | if (btrfs_delayed_extent_op_cachep) | ||
885 | kmem_cache_destroy(btrfs_delayed_extent_op_cachep); | ||
886 | } | ||
887 | |||
888 | int btrfs_delayed_ref_init(void) | ||
889 | { | ||
890 | btrfs_delayed_ref_head_cachep = kmem_cache_create( | ||
891 | "btrfs_delayed_ref_head", | ||
892 | sizeof(struct btrfs_delayed_ref_head), 0, | ||
893 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
894 | if (!btrfs_delayed_ref_head_cachep) | ||
895 | goto fail; | ||
896 | |||
897 | btrfs_delayed_tree_ref_cachep = kmem_cache_create( | ||
898 | "btrfs_delayed_tree_ref", | ||
899 | sizeof(struct btrfs_delayed_tree_ref), 0, | ||
900 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
901 | if (!btrfs_delayed_tree_ref_cachep) | ||
902 | goto fail; | ||
903 | |||
904 | btrfs_delayed_data_ref_cachep = kmem_cache_create( | ||
905 | "btrfs_delayed_data_ref", | ||
906 | sizeof(struct btrfs_delayed_data_ref), 0, | ||
907 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
908 | if (!btrfs_delayed_data_ref_cachep) | ||
909 | goto fail; | ||
910 | |||
911 | btrfs_delayed_extent_op_cachep = kmem_cache_create( | ||
912 | "btrfs_delayed_extent_op", | ||
913 | sizeof(struct btrfs_delayed_extent_op), 0, | ||
914 | SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); | ||
915 | if (!btrfs_delayed_extent_op_cachep) | ||
916 | goto fail; | ||
917 | |||
918 | return 0; | ||
919 | fail: | ||
920 | btrfs_delayed_ref_exit(); | ||
921 | return -ENOMEM; | ||
922 | } | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index c9d703693df0..7939149f8f27 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -141,12 +141,47 @@ struct btrfs_delayed_ref_root { | |||
141 | u64 run_delayed_start; | 141 | u64 run_delayed_start; |
142 | }; | 142 | }; |
143 | 143 | ||
144 | extern struct kmem_cache *btrfs_delayed_ref_head_cachep; | ||
145 | extern struct kmem_cache *btrfs_delayed_tree_ref_cachep; | ||
146 | extern struct kmem_cache *btrfs_delayed_data_ref_cachep; | ||
147 | extern struct kmem_cache *btrfs_delayed_extent_op_cachep; | ||
148 | |||
149 | int btrfs_delayed_ref_init(void); | ||
150 | void btrfs_delayed_ref_exit(void); | ||
151 | |||
152 | static inline struct btrfs_delayed_extent_op * | ||
153 | btrfs_alloc_delayed_extent_op(void) | ||
154 | { | ||
155 | return kmem_cache_alloc(btrfs_delayed_extent_op_cachep, GFP_NOFS); | ||
156 | } | ||
157 | |||
158 | static inline void | ||
159 | btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op) | ||
160 | { | ||
161 | if (op) | ||
162 | kmem_cache_free(btrfs_delayed_extent_op_cachep, op); | ||
163 | } | ||
164 | |||
144 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | 165 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) |
145 | { | 166 | { |
146 | WARN_ON(atomic_read(&ref->refs) == 0); | 167 | WARN_ON(atomic_read(&ref->refs) == 0); |
147 | if (atomic_dec_and_test(&ref->refs)) { | 168 | if (atomic_dec_and_test(&ref->refs)) { |
148 | WARN_ON(ref->in_tree); | 169 | WARN_ON(ref->in_tree); |
149 | kfree(ref); | 170 | switch (ref->type) { |
171 | case BTRFS_TREE_BLOCK_REF_KEY: | ||
172 | case BTRFS_SHARED_BLOCK_REF_KEY: | ||
173 | kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref); | ||
174 | break; | ||
175 | case BTRFS_EXTENT_DATA_REF_KEY: | ||
176 | case BTRFS_SHARED_DATA_REF_KEY: | ||
177 | kmem_cache_free(btrfs_delayed_data_ref_cachep, ref); | ||
178 | break; | ||
179 | case 0: | ||
180 | kmem_cache_free(btrfs_delayed_ref_head_cachep, ref); | ||
181 | break; | ||
182 | default: | ||
183 | BUG(); | ||
184 | } | ||
150 | } | 185 | } |
151 | } | 186 | } |
152 | 187 | ||
@@ -176,8 +211,14 @@ struct btrfs_delayed_ref_head * | |||
176 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 211 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
177 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | 212 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, |
178 | struct btrfs_delayed_ref_head *head); | 213 | struct btrfs_delayed_ref_head *head); |
214 | static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head) | ||
215 | { | ||
216 | mutex_unlock(&head->mutex); | ||
217 | } | ||
218 | |||
179 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | 219 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, |
180 | struct list_head *cluster, u64 search_start); | 220 | struct list_head *cluster, u64 search_start); |
221 | void btrfs_release_ref_cluster(struct list_head *cluster); | ||
181 | 222 | ||
182 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, | 223 | int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, |
183 | struct btrfs_delayed_ref_root *delayed_refs, | 224 | struct btrfs_delayed_ref_root *delayed_refs, |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 66dbc8dbddf7..7ba7b3900cb8 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -465,7 +465,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
465 | * flush all outstanding I/O and inode extent mappings before the | 465 | * flush all outstanding I/O and inode extent mappings before the |
466 | * copy operation is declared as being finished | 466 | * copy operation is declared as being finished |
467 | */ | 467 | */ |
468 | btrfs_start_delalloc_inodes(root, 0); | 468 | ret = btrfs_start_delalloc_inodes(root, 0); |
469 | if (ret) { | ||
470 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | ||
471 | return ret; | ||
472 | } | ||
469 | btrfs_wait_ordered_extents(root, 0); | 473 | btrfs_wait_ordered_extents(root, 0); |
470 | 474 | ||
471 | trans = btrfs_start_transaction(root, 0); | 475 | trans = btrfs_start_transaction(root, 0); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index a8f652dc940b..779b401cd952 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -56,7 +56,8 @@ static void end_workqueue_fn(struct btrfs_work *work); | |||
56 | static void free_fs_root(struct btrfs_root *root); | 56 | static void free_fs_root(struct btrfs_root *root); |
57 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 57 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
58 | int read_only); | 58 | int read_only); |
59 | static void btrfs_destroy_ordered_operations(struct btrfs_root *root); | 59 | static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, |
60 | struct btrfs_root *root); | ||
60 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root); | 61 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root); |
61 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | 62 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, |
62 | struct btrfs_root *root); | 63 | struct btrfs_root *root); |
@@ -420,7 +421,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
420 | static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | 421 | static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) |
421 | { | 422 | { |
422 | struct extent_io_tree *tree; | 423 | struct extent_io_tree *tree; |
423 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 424 | u64 start = page_offset(page); |
424 | u64 found_start; | 425 | u64 found_start; |
425 | struct extent_buffer *eb; | 426 | struct extent_buffer *eb; |
426 | 427 | ||
@@ -946,18 +947,20 @@ static int btree_writepages(struct address_space *mapping, | |||
946 | struct writeback_control *wbc) | 947 | struct writeback_control *wbc) |
947 | { | 948 | { |
948 | struct extent_io_tree *tree; | 949 | struct extent_io_tree *tree; |
950 | struct btrfs_fs_info *fs_info; | ||
951 | int ret; | ||
952 | |||
949 | tree = &BTRFS_I(mapping->host)->io_tree; | 953 | tree = &BTRFS_I(mapping->host)->io_tree; |
950 | if (wbc->sync_mode == WB_SYNC_NONE) { | 954 | if (wbc->sync_mode == WB_SYNC_NONE) { |
951 | struct btrfs_root *root = BTRFS_I(mapping->host)->root; | ||
952 | u64 num_dirty; | ||
953 | unsigned long thresh = 32 * 1024 * 1024; | ||
954 | 955 | ||
955 | if (wbc->for_kupdate) | 956 | if (wbc->for_kupdate) |
956 | return 0; | 957 | return 0; |
957 | 958 | ||
959 | fs_info = BTRFS_I(mapping->host)->root->fs_info; | ||
958 | /* this is a bit racy, but that's ok */ | 960 | /* this is a bit racy, but that's ok */ |
959 | num_dirty = root->fs_info->dirty_metadata_bytes; | 961 | ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes, |
960 | if (num_dirty < thresh) | 962 | BTRFS_DIRTY_METADATA_THRESH); |
963 | if (ret < 0) | ||
961 | return 0; | 964 | return 0; |
962 | } | 965 | } |
963 | return btree_write_cache_pages(mapping, wbc); | 966 | return btree_write_cache_pages(mapping, wbc); |
@@ -1125,24 +1128,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
1125 | void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1128 | void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1126 | struct extent_buffer *buf) | 1129 | struct extent_buffer *buf) |
1127 | { | 1130 | { |
1131 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1132 | |||
1128 | if (btrfs_header_generation(buf) == | 1133 | if (btrfs_header_generation(buf) == |
1129 | root->fs_info->running_transaction->transid) { | 1134 | fs_info->running_transaction->transid) { |
1130 | btrfs_assert_tree_locked(buf); | 1135 | btrfs_assert_tree_locked(buf); |
1131 | 1136 | ||
1132 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { | 1137 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { |
1133 | spin_lock(&root->fs_info->delalloc_lock); | 1138 | __percpu_counter_add(&fs_info->dirty_metadata_bytes, |
1134 | if (root->fs_info->dirty_metadata_bytes >= buf->len) | 1139 | -buf->len, |
1135 | root->fs_info->dirty_metadata_bytes -= buf->len; | 1140 | fs_info->dirty_metadata_batch); |
1136 | else { | ||
1137 | spin_unlock(&root->fs_info->delalloc_lock); | ||
1138 | btrfs_panic(root->fs_info, -EOVERFLOW, | ||
1139 | "Can't clear %lu bytes from " | ||
1140 | " dirty_mdatadata_bytes (%llu)", | ||
1141 | buf->len, | ||
1142 | root->fs_info->dirty_metadata_bytes); | ||
1143 | } | ||
1144 | spin_unlock(&root->fs_info->delalloc_lock); | ||
1145 | |||
1146 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | 1141 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
1147 | btrfs_set_lock_blocking(buf); | 1142 | btrfs_set_lock_blocking(buf); |
1148 | clear_extent_buffer_dirty(buf); | 1143 | clear_extent_buffer_dirty(buf); |
@@ -1178,9 +1173,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1178 | 1173 | ||
1179 | INIT_LIST_HEAD(&root->dirty_list); | 1174 | INIT_LIST_HEAD(&root->dirty_list); |
1180 | INIT_LIST_HEAD(&root->root_list); | 1175 | INIT_LIST_HEAD(&root->root_list); |
1176 | INIT_LIST_HEAD(&root->logged_list[0]); | ||
1177 | INIT_LIST_HEAD(&root->logged_list[1]); | ||
1181 | spin_lock_init(&root->orphan_lock); | 1178 | spin_lock_init(&root->orphan_lock); |
1182 | spin_lock_init(&root->inode_lock); | 1179 | spin_lock_init(&root->inode_lock); |
1183 | spin_lock_init(&root->accounting_lock); | 1180 | spin_lock_init(&root->accounting_lock); |
1181 | spin_lock_init(&root->log_extents_lock[0]); | ||
1182 | spin_lock_init(&root->log_extents_lock[1]); | ||
1184 | mutex_init(&root->objectid_mutex); | 1183 | mutex_init(&root->objectid_mutex); |
1185 | mutex_init(&root->log_mutex); | 1184 | mutex_init(&root->log_mutex); |
1186 | init_waitqueue_head(&root->log_writer_wait); | 1185 | init_waitqueue_head(&root->log_writer_wait); |
@@ -2004,10 +2003,24 @@ int open_ctree(struct super_block *sb, | |||
2004 | goto fail_srcu; | 2003 | goto fail_srcu; |
2005 | } | 2004 | } |
2006 | 2005 | ||
2006 | ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0); | ||
2007 | if (ret) { | ||
2008 | err = ret; | ||
2009 | goto fail_bdi; | ||
2010 | } | ||
2011 | fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE * | ||
2012 | (1 + ilog2(nr_cpu_ids)); | ||
2013 | |||
2014 | ret = percpu_counter_init(&fs_info->delalloc_bytes, 0); | ||
2015 | if (ret) { | ||
2016 | err = ret; | ||
2017 | goto fail_dirty_metadata_bytes; | ||
2018 | } | ||
2019 | |||
2007 | fs_info->btree_inode = new_inode(sb); | 2020 | fs_info->btree_inode = new_inode(sb); |
2008 | if (!fs_info->btree_inode) { | 2021 | if (!fs_info->btree_inode) { |
2009 | err = -ENOMEM; | 2022 | err = -ENOMEM; |
2010 | goto fail_bdi; | 2023 | goto fail_delalloc_bytes; |
2011 | } | 2024 | } |
2012 | 2025 | ||
2013 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | 2026 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
@@ -2017,7 +2030,6 @@ int open_ctree(struct super_block *sb, | |||
2017 | INIT_LIST_HEAD(&fs_info->dead_roots); | 2030 | INIT_LIST_HEAD(&fs_info->dead_roots); |
2018 | INIT_LIST_HEAD(&fs_info->delayed_iputs); | 2031 | INIT_LIST_HEAD(&fs_info->delayed_iputs); |
2019 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 2032 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
2020 | INIT_LIST_HEAD(&fs_info->ordered_operations); | ||
2021 | INIT_LIST_HEAD(&fs_info->caching_block_groups); | 2033 | INIT_LIST_HEAD(&fs_info->caching_block_groups); |
2022 | spin_lock_init(&fs_info->delalloc_lock); | 2034 | spin_lock_init(&fs_info->delalloc_lock); |
2023 | spin_lock_init(&fs_info->trans_lock); | 2035 | spin_lock_init(&fs_info->trans_lock); |
@@ -2028,6 +2040,7 @@ int open_ctree(struct super_block *sb, | |||
2028 | spin_lock_init(&fs_info->tree_mod_seq_lock); | 2040 | spin_lock_init(&fs_info->tree_mod_seq_lock); |
2029 | rwlock_init(&fs_info->tree_mod_log_lock); | 2041 | rwlock_init(&fs_info->tree_mod_log_lock); |
2030 | mutex_init(&fs_info->reloc_mutex); | 2042 | mutex_init(&fs_info->reloc_mutex); |
2043 | seqlock_init(&fs_info->profiles_lock); | ||
2031 | 2044 | ||
2032 | init_completion(&fs_info->kobj_unregister); | 2045 | init_completion(&fs_info->kobj_unregister); |
2033 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 2046 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
@@ -2126,6 +2139,7 @@ int open_ctree(struct super_block *sb, | |||
2126 | 2139 | ||
2127 | spin_lock_init(&fs_info->block_group_cache_lock); | 2140 | spin_lock_init(&fs_info->block_group_cache_lock); |
2128 | fs_info->block_group_cache_tree = RB_ROOT; | 2141 | fs_info->block_group_cache_tree = RB_ROOT; |
2142 | fs_info->first_logical_byte = (u64)-1; | ||
2129 | 2143 | ||
2130 | extent_io_tree_init(&fs_info->freed_extents[0], | 2144 | extent_io_tree_init(&fs_info->freed_extents[0], |
2131 | fs_info->btree_inode->i_mapping); | 2145 | fs_info->btree_inode->i_mapping); |
@@ -2187,7 +2201,8 @@ int open_ctree(struct super_block *sb, | |||
2187 | goto fail_alloc; | 2201 | goto fail_alloc; |
2188 | 2202 | ||
2189 | /* check FS state, whether FS is broken. */ | 2203 | /* check FS state, whether FS is broken. */ |
2190 | fs_info->fs_state |= btrfs_super_flags(disk_super); | 2204 | if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR) |
2205 | set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); | ||
2191 | 2206 | ||
2192 | ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 2207 | ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
2193 | if (ret) { | 2208 | if (ret) { |
@@ -2261,6 +2276,8 @@ int open_ctree(struct super_block *sb, | |||
2261 | leafsize = btrfs_super_leafsize(disk_super); | 2276 | leafsize = btrfs_super_leafsize(disk_super); |
2262 | sectorsize = btrfs_super_sectorsize(disk_super); | 2277 | sectorsize = btrfs_super_sectorsize(disk_super); |
2263 | stripesize = btrfs_super_stripesize(disk_super); | 2278 | stripesize = btrfs_super_stripesize(disk_super); |
2279 | fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids)); | ||
2280 | fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids)); | ||
2264 | 2281 | ||
2265 | /* | 2282 | /* |
2266 | * mixed block groups end up with duplicate but slightly offset | 2283 | * mixed block groups end up with duplicate but slightly offset |
@@ -2390,8 +2407,7 @@ int open_ctree(struct super_block *sb, | |||
2390 | sb->s_blocksize = sectorsize; | 2407 | sb->s_blocksize = sectorsize; |
2391 | sb->s_blocksize_bits = blksize_bits(sectorsize); | 2408 | sb->s_blocksize_bits = blksize_bits(sectorsize); |
2392 | 2409 | ||
2393 | if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, | 2410 | if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) { |
2394 | sizeof(disk_super->magic))) { | ||
2395 | printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); | 2411 | printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); |
2396 | goto fail_sb_buffer; | 2412 | goto fail_sb_buffer; |
2397 | } | 2413 | } |
@@ -2694,13 +2710,13 @@ fail_cleaner: | |||
2694 | * kthreads | 2710 | * kthreads |
2695 | */ | 2711 | */ |
2696 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); | 2712 | filemap_write_and_wait(fs_info->btree_inode->i_mapping); |
2697 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
2698 | 2713 | ||
2699 | fail_block_groups: | 2714 | fail_block_groups: |
2700 | btrfs_free_block_groups(fs_info); | 2715 | btrfs_free_block_groups(fs_info); |
2701 | 2716 | ||
2702 | fail_tree_roots: | 2717 | fail_tree_roots: |
2703 | free_root_pointers(fs_info, 1); | 2718 | free_root_pointers(fs_info, 1); |
2719 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
2704 | 2720 | ||
2705 | fail_sb_buffer: | 2721 | fail_sb_buffer: |
2706 | btrfs_stop_workers(&fs_info->generic_worker); | 2722 | btrfs_stop_workers(&fs_info->generic_worker); |
@@ -2721,8 +2737,11 @@ fail_alloc: | |||
2721 | fail_iput: | 2737 | fail_iput: |
2722 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2738 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2723 | 2739 | ||
2724 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | ||
2725 | iput(fs_info->btree_inode); | 2740 | iput(fs_info->btree_inode); |
2741 | fail_delalloc_bytes: | ||
2742 | percpu_counter_destroy(&fs_info->delalloc_bytes); | ||
2743 | fail_dirty_metadata_bytes: | ||
2744 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); | ||
2726 | fail_bdi: | 2745 | fail_bdi: |
2727 | bdi_destroy(&fs_info->bdi); | 2746 | bdi_destroy(&fs_info->bdi); |
2728 | fail_srcu: | 2747 | fail_srcu: |
@@ -2795,8 +2814,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev) | |||
2795 | 2814 | ||
2796 | super = (struct btrfs_super_block *)bh->b_data; | 2815 | super = (struct btrfs_super_block *)bh->b_data; |
2797 | if (btrfs_super_bytenr(super) != bytenr || | 2816 | if (btrfs_super_bytenr(super) != bytenr || |
2798 | strncmp((char *)(&super->magic), BTRFS_MAGIC, | 2817 | super->magic != cpu_to_le64(BTRFS_MAGIC)) { |
2799 | sizeof(super->magic))) { | ||
2800 | brelse(bh); | 2818 | brelse(bh); |
2801 | continue; | 2819 | continue; |
2802 | } | 2820 | } |
@@ -3339,7 +3357,7 @@ int close_ctree(struct btrfs_root *root) | |||
3339 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 3357 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
3340 | } | 3358 | } |
3341 | 3359 | ||
3342 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 3360 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) |
3343 | btrfs_error_commit_super(root); | 3361 | btrfs_error_commit_super(root); |
3344 | 3362 | ||
3345 | btrfs_put_block_group_cache(fs_info); | 3363 | btrfs_put_block_group_cache(fs_info); |
@@ -3352,9 +3370,9 @@ int close_ctree(struct btrfs_root *root) | |||
3352 | 3370 | ||
3353 | btrfs_free_qgroup_config(root->fs_info); | 3371 | btrfs_free_qgroup_config(root->fs_info); |
3354 | 3372 | ||
3355 | if (fs_info->delalloc_bytes) { | 3373 | if (percpu_counter_sum(&fs_info->delalloc_bytes)) { |
3356 | printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", | 3374 | printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n", |
3357 | (unsigned long long)fs_info->delalloc_bytes); | 3375 | percpu_counter_sum(&fs_info->delalloc_bytes)); |
3358 | } | 3376 | } |
3359 | 3377 | ||
3360 | free_extent_buffer(fs_info->extent_root->node); | 3378 | free_extent_buffer(fs_info->extent_root->node); |
@@ -3401,6 +3419,8 @@ int close_ctree(struct btrfs_root *root) | |||
3401 | btrfs_close_devices(fs_info->fs_devices); | 3419 | btrfs_close_devices(fs_info->fs_devices); |
3402 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 3420 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
3403 | 3421 | ||
3422 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); | ||
3423 | percpu_counter_destroy(&fs_info->delalloc_bytes); | ||
3404 | bdi_destroy(&fs_info->bdi); | 3424 | bdi_destroy(&fs_info->bdi); |
3405 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3425 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
3406 | 3426 | ||
@@ -3443,11 +3463,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
3443 | (unsigned long long)transid, | 3463 | (unsigned long long)transid, |
3444 | (unsigned long long)root->fs_info->generation); | 3464 | (unsigned long long)root->fs_info->generation); |
3445 | was_dirty = set_extent_buffer_dirty(buf); | 3465 | was_dirty = set_extent_buffer_dirty(buf); |
3446 | if (!was_dirty) { | 3466 | if (!was_dirty) |
3447 | spin_lock(&root->fs_info->delalloc_lock); | 3467 | __percpu_counter_add(&root->fs_info->dirty_metadata_bytes, |
3448 | root->fs_info->dirty_metadata_bytes += buf->len; | 3468 | buf->len, |
3449 | spin_unlock(&root->fs_info->delalloc_lock); | 3469 | root->fs_info->dirty_metadata_batch); |
3450 | } | ||
3451 | } | 3470 | } |
3452 | 3471 | ||
3453 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, | 3472 | static void __btrfs_btree_balance_dirty(struct btrfs_root *root, |
@@ -3457,8 +3476,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, | |||
3457 | * looks as though older kernels can get into trouble with | 3476 | * looks as though older kernels can get into trouble with |
3458 | * this code, they end up stuck in balance_dirty_pages forever | 3477 | * this code, they end up stuck in balance_dirty_pages forever |
3459 | */ | 3478 | */ |
3460 | u64 num_dirty; | 3479 | int ret; |
3461 | unsigned long thresh = 32 * 1024 * 1024; | ||
3462 | 3480 | ||
3463 | if (current->flags & PF_MEMALLOC) | 3481 | if (current->flags & PF_MEMALLOC) |
3464 | return; | 3482 | return; |
@@ -3466,9 +3484,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root, | |||
3466 | if (flush_delayed) | 3484 | if (flush_delayed) |
3467 | btrfs_balance_delayed_items(root); | 3485 | btrfs_balance_delayed_items(root); |
3468 | 3486 | ||
3469 | num_dirty = root->fs_info->dirty_metadata_bytes; | 3487 | ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes, |
3470 | 3488 | BTRFS_DIRTY_METADATA_THRESH); | |
3471 | if (num_dirty > thresh) { | 3489 | if (ret > 0) { |
3472 | balance_dirty_pages_ratelimited( | 3490 | balance_dirty_pages_ratelimited( |
3473 | root->fs_info->btree_inode->i_mapping); | 3491 | root->fs_info->btree_inode->i_mapping); |
3474 | } | 3492 | } |
@@ -3518,7 +3536,8 @@ void btrfs_error_commit_super(struct btrfs_root *root) | |||
3518 | btrfs_cleanup_transaction(root); | 3536 | btrfs_cleanup_transaction(root); |
3519 | } | 3537 | } |
3520 | 3538 | ||
3521 | static void btrfs_destroy_ordered_operations(struct btrfs_root *root) | 3539 | static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, |
3540 | struct btrfs_root *root) | ||
3522 | { | 3541 | { |
3523 | struct btrfs_inode *btrfs_inode; | 3542 | struct btrfs_inode *btrfs_inode; |
3524 | struct list_head splice; | 3543 | struct list_head splice; |
@@ -3528,7 +3547,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root) | |||
3528 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 3547 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
3529 | spin_lock(&root->fs_info->ordered_extent_lock); | 3548 | spin_lock(&root->fs_info->ordered_extent_lock); |
3530 | 3549 | ||
3531 | list_splice_init(&root->fs_info->ordered_operations, &splice); | 3550 | list_splice_init(&t->ordered_operations, &splice); |
3532 | while (!list_empty(&splice)) { | 3551 | while (!list_empty(&splice)) { |
3533 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 3552 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
3534 | ordered_operations); | 3553 | ordered_operations); |
@@ -3544,35 +3563,16 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root) | |||
3544 | 3563 | ||
3545 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root) | 3564 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root) |
3546 | { | 3565 | { |
3547 | struct list_head splice; | ||
3548 | struct btrfs_ordered_extent *ordered; | 3566 | struct btrfs_ordered_extent *ordered; |
3549 | struct inode *inode; | ||
3550 | |||
3551 | INIT_LIST_HEAD(&splice); | ||
3552 | 3567 | ||
3553 | spin_lock(&root->fs_info->ordered_extent_lock); | 3568 | spin_lock(&root->fs_info->ordered_extent_lock); |
3554 | 3569 | /* | |
3555 | list_splice_init(&root->fs_info->ordered_extents, &splice); | 3570 | * This will just short circuit the ordered completion stuff which will |
3556 | while (!list_empty(&splice)) { | 3571 | * make sure the ordered extent gets properly cleaned up. |
3557 | ordered = list_entry(splice.next, struct btrfs_ordered_extent, | 3572 | */ |
3558 | root_extent_list); | 3573 | list_for_each_entry(ordered, &root->fs_info->ordered_extents, |
3559 | 3574 | root_extent_list) | |
3560 | list_del_init(&ordered->root_extent_list); | 3575 | set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); |
3561 | atomic_inc(&ordered->refs); | ||
3562 | |||
3563 | /* the inode may be getting freed (in sys_unlink path). */ | ||
3564 | inode = igrab(ordered->inode); | ||
3565 | |||
3566 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
3567 | if (inode) | ||
3568 | iput(inode); | ||
3569 | |||
3570 | atomic_set(&ordered->refs, 1); | ||
3571 | btrfs_put_ordered_extent(ordered); | ||
3572 | |||
3573 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
3574 | } | ||
3575 | |||
3576 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3576 | spin_unlock(&root->fs_info->ordered_extent_lock); |
3577 | } | 3577 | } |
3578 | 3578 | ||
@@ -3594,11 +3594,11 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3594 | } | 3594 | } |
3595 | 3595 | ||
3596 | while ((node = rb_first(&delayed_refs->root)) != NULL) { | 3596 | while ((node = rb_first(&delayed_refs->root)) != NULL) { |
3597 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | 3597 | struct btrfs_delayed_ref_head *head = NULL; |
3598 | 3598 | ||
3599 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
3599 | atomic_set(&ref->refs, 1); | 3600 | atomic_set(&ref->refs, 1); |
3600 | if (btrfs_delayed_ref_is_head(ref)) { | 3601 | if (btrfs_delayed_ref_is_head(ref)) { |
3601 | struct btrfs_delayed_ref_head *head; | ||
3602 | 3602 | ||
3603 | head = btrfs_delayed_node_to_head(ref); | 3603 | head = btrfs_delayed_node_to_head(ref); |
3604 | if (!mutex_trylock(&head->mutex)) { | 3604 | if (!mutex_trylock(&head->mutex)) { |
@@ -3614,16 +3614,18 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3614 | continue; | 3614 | continue; |
3615 | } | 3615 | } |
3616 | 3616 | ||
3617 | kfree(head->extent_op); | 3617 | btrfs_free_delayed_extent_op(head->extent_op); |
3618 | delayed_refs->num_heads--; | 3618 | delayed_refs->num_heads--; |
3619 | if (list_empty(&head->cluster)) | 3619 | if (list_empty(&head->cluster)) |
3620 | delayed_refs->num_heads_ready--; | 3620 | delayed_refs->num_heads_ready--; |
3621 | list_del_init(&head->cluster); | 3621 | list_del_init(&head->cluster); |
3622 | } | 3622 | } |
3623 | |||
3623 | ref->in_tree = 0; | 3624 | ref->in_tree = 0; |
3624 | rb_erase(&ref->rb_node, &delayed_refs->root); | 3625 | rb_erase(&ref->rb_node, &delayed_refs->root); |
3625 | delayed_refs->num_entries--; | 3626 | delayed_refs->num_entries--; |
3626 | 3627 | if (head) | |
3628 | mutex_unlock(&head->mutex); | ||
3627 | spin_unlock(&delayed_refs->lock); | 3629 | spin_unlock(&delayed_refs->lock); |
3628 | btrfs_put_delayed_ref(ref); | 3630 | btrfs_put_delayed_ref(ref); |
3629 | 3631 | ||
@@ -3671,6 +3673,8 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | |||
3671 | delalloc_inodes); | 3673 | delalloc_inodes); |
3672 | 3674 | ||
3673 | list_del_init(&btrfs_inode->delalloc_inodes); | 3675 | list_del_init(&btrfs_inode->delalloc_inodes); |
3676 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
3677 | &btrfs_inode->runtime_flags); | ||
3674 | 3678 | ||
3675 | btrfs_invalidate_inodes(btrfs_inode->root); | 3679 | btrfs_invalidate_inodes(btrfs_inode->root); |
3676 | } | 3680 | } |
@@ -3823,10 +3827,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3823 | 3827 | ||
3824 | while (!list_empty(&list)) { | 3828 | while (!list_empty(&list)) { |
3825 | t = list_entry(list.next, struct btrfs_transaction, list); | 3829 | t = list_entry(list.next, struct btrfs_transaction, list); |
3826 | if (!t) | ||
3827 | break; | ||
3828 | 3830 | ||
3829 | btrfs_destroy_ordered_operations(root); | 3831 | btrfs_destroy_ordered_operations(t, root); |
3830 | 3832 | ||
3831 | btrfs_destroy_ordered_extents(root); | 3833 | btrfs_destroy_ordered_extents(root); |
3832 | 3834 | ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5a3327b8f90d..5cd44e239595 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -72,8 +72,7 @@ enum { | |||
72 | RESERVE_ALLOC_NO_ACCOUNT = 2, | 72 | RESERVE_ALLOC_NO_ACCOUNT = 2, |
73 | }; | 73 | }; |
74 | 74 | ||
75 | static int update_block_group(struct btrfs_trans_handle *trans, | 75 | static int update_block_group(struct btrfs_root *root, |
76 | struct btrfs_root *root, | ||
77 | u64 bytenr, u64 num_bytes, int alloc); | 76 | u64 bytenr, u64 num_bytes, int alloc); |
78 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 77 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
79 | struct btrfs_root *root, | 78 | struct btrfs_root *root, |
@@ -103,6 +102,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
103 | int dump_block_groups); | 102 | int dump_block_groups); |
104 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, | 103 | static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, |
105 | u64 num_bytes, int reserve); | 104 | u64 num_bytes, int reserve); |
105 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
106 | u64 num_bytes); | ||
106 | 107 | ||
107 | static noinline int | 108 | static noinline int |
108 | block_group_cache_done(struct btrfs_block_group_cache *cache) | 109 | block_group_cache_done(struct btrfs_block_group_cache *cache) |
@@ -162,6 +163,10 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, | |||
162 | rb_link_node(&block_group->cache_node, parent, p); | 163 | rb_link_node(&block_group->cache_node, parent, p); |
163 | rb_insert_color(&block_group->cache_node, | 164 | rb_insert_color(&block_group->cache_node, |
164 | &info->block_group_cache_tree); | 165 | &info->block_group_cache_tree); |
166 | |||
167 | if (info->first_logical_byte > block_group->key.objectid) | ||
168 | info->first_logical_byte = block_group->key.objectid; | ||
169 | |||
165 | spin_unlock(&info->block_group_cache_lock); | 170 | spin_unlock(&info->block_group_cache_lock); |
166 | 171 | ||
167 | return 0; | 172 | return 0; |
@@ -203,8 +208,11 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, | |||
203 | break; | 208 | break; |
204 | } | 209 | } |
205 | } | 210 | } |
206 | if (ret) | 211 | if (ret) { |
207 | btrfs_get_block_group(ret); | 212 | btrfs_get_block_group(ret); |
213 | if (bytenr == 0 && info->first_logical_byte > ret->key.objectid) | ||
214 | info->first_logical_byte = ret->key.objectid; | ||
215 | } | ||
208 | spin_unlock(&info->block_group_cache_lock); | 216 | spin_unlock(&info->block_group_cache_lock); |
209 | 217 | ||
210 | return ret; | 218 | return ret; |
@@ -468,8 +476,6 @@ out: | |||
468 | } | 476 | } |
469 | 477 | ||
470 | static int cache_block_group(struct btrfs_block_group_cache *cache, | 478 | static int cache_block_group(struct btrfs_block_group_cache *cache, |
471 | struct btrfs_trans_handle *trans, | ||
472 | struct btrfs_root *root, | ||
473 | int load_cache_only) | 479 | int load_cache_only) |
474 | { | 480 | { |
475 | DEFINE_WAIT(wait); | 481 | DEFINE_WAIT(wait); |
@@ -527,12 +533,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
527 | cache->cached = BTRFS_CACHE_FAST; | 533 | cache->cached = BTRFS_CACHE_FAST; |
528 | spin_unlock(&cache->lock); | 534 | spin_unlock(&cache->lock); |
529 | 535 | ||
530 | /* | ||
531 | * We can't do the read from on-disk cache during a commit since we need | ||
532 | * to have the normal tree locking. Also if we are currently trying to | ||
533 | * allocate blocks for the tree root we can't do the fast caching since | ||
534 | * we likely hold important locks. | ||
535 | */ | ||
536 | if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { | 536 | if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { |
537 | ret = load_free_space_cache(fs_info, cache); | 537 | ret = load_free_space_cache(fs_info, cache); |
538 | 538 | ||
@@ -2143,7 +2143,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
2143 | node->num_bytes); | 2143 | node->num_bytes); |
2144 | } | 2144 | } |
2145 | } | 2145 | } |
2146 | mutex_unlock(&head->mutex); | ||
2147 | return ret; | 2146 | return ret; |
2148 | } | 2147 | } |
2149 | 2148 | ||
@@ -2258,7 +2257,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2258 | * process of being added. Don't run this ref yet. | 2257 | * process of being added. Don't run this ref yet. |
2259 | */ | 2258 | */ |
2260 | list_del_init(&locked_ref->cluster); | 2259 | list_del_init(&locked_ref->cluster); |
2261 | mutex_unlock(&locked_ref->mutex); | 2260 | btrfs_delayed_ref_unlock(locked_ref); |
2262 | locked_ref = NULL; | 2261 | locked_ref = NULL; |
2263 | delayed_refs->num_heads_ready++; | 2262 | delayed_refs->num_heads_ready++; |
2264 | spin_unlock(&delayed_refs->lock); | 2263 | spin_unlock(&delayed_refs->lock); |
@@ -2285,7 +2284,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2285 | ref = &locked_ref->node; | 2284 | ref = &locked_ref->node; |
2286 | 2285 | ||
2287 | if (extent_op && must_insert_reserved) { | 2286 | if (extent_op && must_insert_reserved) { |
2288 | kfree(extent_op); | 2287 | btrfs_free_delayed_extent_op(extent_op); |
2289 | extent_op = NULL; | 2288 | extent_op = NULL; |
2290 | } | 2289 | } |
2291 | 2290 | ||
@@ -2294,28 +2293,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2294 | 2293 | ||
2295 | ret = run_delayed_extent_op(trans, root, | 2294 | ret = run_delayed_extent_op(trans, root, |
2296 | ref, extent_op); | 2295 | ref, extent_op); |
2297 | kfree(extent_op); | 2296 | btrfs_free_delayed_extent_op(extent_op); |
2298 | 2297 | ||
2299 | if (ret) { | 2298 | if (ret) { |
2300 | list_del_init(&locked_ref->cluster); | 2299 | printk(KERN_DEBUG |
2301 | mutex_unlock(&locked_ref->mutex); | 2300 | "btrfs: run_delayed_extent_op " |
2302 | 2301 | "returned %d\n", ret); | |
2303 | printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); | ||
2304 | spin_lock(&delayed_refs->lock); | 2302 | spin_lock(&delayed_refs->lock); |
2303 | btrfs_delayed_ref_unlock(locked_ref); | ||
2305 | return ret; | 2304 | return ret; |
2306 | } | 2305 | } |
2307 | 2306 | ||
2308 | goto next; | 2307 | goto next; |
2309 | } | 2308 | } |
2310 | |||
2311 | list_del_init(&locked_ref->cluster); | ||
2312 | locked_ref = NULL; | ||
2313 | } | 2309 | } |
2314 | 2310 | ||
2315 | ref->in_tree = 0; | 2311 | ref->in_tree = 0; |
2316 | rb_erase(&ref->rb_node, &delayed_refs->root); | 2312 | rb_erase(&ref->rb_node, &delayed_refs->root); |
2317 | delayed_refs->num_entries--; | 2313 | delayed_refs->num_entries--; |
2318 | if (locked_ref) { | 2314 | if (!btrfs_delayed_ref_is_head(ref)) { |
2319 | /* | 2315 | /* |
2320 | * when we play the delayed ref, also correct the | 2316 | * when we play the delayed ref, also correct the |
2321 | * ref_mod on head | 2317 | * ref_mod on head |
@@ -2337,20 +2333,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | |||
2337 | ret = run_one_delayed_ref(trans, root, ref, extent_op, | 2333 | ret = run_one_delayed_ref(trans, root, ref, extent_op, |
2338 | must_insert_reserved); | 2334 | must_insert_reserved); |
2339 | 2335 | ||
2340 | btrfs_put_delayed_ref(ref); | 2336 | btrfs_free_delayed_extent_op(extent_op); |
2341 | kfree(extent_op); | ||
2342 | count++; | ||
2343 | |||
2344 | if (ret) { | 2337 | if (ret) { |
2345 | if (locked_ref) { | 2338 | btrfs_delayed_ref_unlock(locked_ref); |
2346 | list_del_init(&locked_ref->cluster); | 2339 | btrfs_put_delayed_ref(ref); |
2347 | mutex_unlock(&locked_ref->mutex); | 2340 | printk(KERN_DEBUG |
2348 | } | 2341 | "btrfs: run_one_delayed_ref returned %d\n", ret); |
2349 | printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); | ||
2350 | spin_lock(&delayed_refs->lock); | 2342 | spin_lock(&delayed_refs->lock); |
2351 | return ret; | 2343 | return ret; |
2352 | } | 2344 | } |
2353 | 2345 | ||
2346 | /* | ||
2347 | * If this node is a head, that means all the refs in this head | ||
2348 | * have been dealt with, and we will pick the next head to deal | ||
2349 | * with, so we must unlock the head and drop it from the cluster | ||
2350 | * list before we release it. | ||
2351 | */ | ||
2352 | if (btrfs_delayed_ref_is_head(ref)) { | ||
2353 | list_del_init(&locked_ref->cluster); | ||
2354 | btrfs_delayed_ref_unlock(locked_ref); | ||
2355 | locked_ref = NULL; | ||
2356 | } | ||
2357 | btrfs_put_delayed_ref(ref); | ||
2358 | count++; | ||
2354 | next: | 2359 | next: |
2355 | cond_resched(); | 2360 | cond_resched(); |
2356 | spin_lock(&delayed_refs->lock); | 2361 | spin_lock(&delayed_refs->lock); |
@@ -2500,6 +2505,7 @@ again: | |||
2500 | 2505 | ||
2501 | ret = run_clustered_refs(trans, root, &cluster); | 2506 | ret = run_clustered_refs(trans, root, &cluster); |
2502 | if (ret < 0) { | 2507 | if (ret < 0) { |
2508 | btrfs_release_ref_cluster(&cluster); | ||
2503 | spin_unlock(&delayed_refs->lock); | 2509 | spin_unlock(&delayed_refs->lock); |
2504 | btrfs_abort_transaction(trans, root, ret); | 2510 | btrfs_abort_transaction(trans, root, ret); |
2505 | return ret; | 2511 | return ret; |
@@ -2586,7 +2592,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
2586 | struct btrfs_delayed_extent_op *extent_op; | 2592 | struct btrfs_delayed_extent_op *extent_op; |
2587 | int ret; | 2593 | int ret; |
2588 | 2594 | ||
2589 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 2595 | extent_op = btrfs_alloc_delayed_extent_op(); |
2590 | if (!extent_op) | 2596 | if (!extent_op) |
2591 | return -ENOMEM; | 2597 | return -ENOMEM; |
2592 | 2598 | ||
@@ -2598,7 +2604,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, | |||
2598 | ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, | 2604 | ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, |
2599 | num_bytes, extent_op); | 2605 | num_bytes, extent_op); |
2600 | if (ret) | 2606 | if (ret) |
2601 | kfree(extent_op); | 2607 | btrfs_free_delayed_extent_op(extent_op); |
2602 | return ret; | 2608 | return ret; |
2603 | } | 2609 | } |
2604 | 2610 | ||
@@ -3223,12 +3229,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
3223 | u64 extra_flags = chunk_to_extended(flags) & | 3229 | u64 extra_flags = chunk_to_extended(flags) & |
3224 | BTRFS_EXTENDED_PROFILE_MASK; | 3230 | BTRFS_EXTENDED_PROFILE_MASK; |
3225 | 3231 | ||
3232 | write_seqlock(&fs_info->profiles_lock); | ||
3226 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 3233 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
3227 | fs_info->avail_data_alloc_bits |= extra_flags; | 3234 | fs_info->avail_data_alloc_bits |= extra_flags; |
3228 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | 3235 | if (flags & BTRFS_BLOCK_GROUP_METADATA) |
3229 | fs_info->avail_metadata_alloc_bits |= extra_flags; | 3236 | fs_info->avail_metadata_alloc_bits |= extra_flags; |
3230 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 3237 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
3231 | fs_info->avail_system_alloc_bits |= extra_flags; | 3238 | fs_info->avail_system_alloc_bits |= extra_flags; |
3239 | write_sequnlock(&fs_info->profiles_lock); | ||
3232 | } | 3240 | } |
3233 | 3241 | ||
3234 | /* | 3242 | /* |
@@ -3320,12 +3328,18 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
3320 | 3328 | ||
3321 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) | 3329 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
3322 | { | 3330 | { |
3323 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 3331 | unsigned seq; |
3324 | flags |= root->fs_info->avail_data_alloc_bits; | 3332 | |
3325 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 3333 | do { |
3326 | flags |= root->fs_info->avail_system_alloc_bits; | 3334 | seq = read_seqbegin(&root->fs_info->profiles_lock); |
3327 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | 3335 | |
3328 | flags |= root->fs_info->avail_metadata_alloc_bits; | 3336 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
3337 | flags |= root->fs_info->avail_data_alloc_bits; | ||
3338 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | ||
3339 | flags |= root->fs_info->avail_system_alloc_bits; | ||
3340 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | ||
3341 | flags |= root->fs_info->avail_metadata_alloc_bits; | ||
3342 | } while (read_seqretry(&root->fs_info->profiles_lock, seq)); | ||
3329 | 3343 | ||
3330 | return btrfs_reduce_alloc_profile(root, flags); | 3344 | return btrfs_reduce_alloc_profile(root, flags); |
3331 | } | 3345 | } |
@@ -3564,6 +3578,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3564 | int wait_for_alloc = 0; | 3578 | int wait_for_alloc = 0; |
3565 | int ret = 0; | 3579 | int ret = 0; |
3566 | 3580 | ||
3581 | /* Don't re-enter if we're already allocating a chunk */ | ||
3582 | if (trans->allocating_chunk) | ||
3583 | return -ENOSPC; | ||
3584 | |||
3567 | space_info = __find_space_info(extent_root->fs_info, flags); | 3585 | space_info = __find_space_info(extent_root->fs_info, flags); |
3568 | if (!space_info) { | 3586 | if (!space_info) { |
3569 | ret = update_space_info(extent_root->fs_info, flags, | 3587 | ret = update_space_info(extent_root->fs_info, flags, |
@@ -3606,6 +3624,8 @@ again: | |||
3606 | goto again; | 3624 | goto again; |
3607 | } | 3625 | } |
3608 | 3626 | ||
3627 | trans->allocating_chunk = true; | ||
3628 | |||
3609 | /* | 3629 | /* |
3610 | * If we have mixed data/metadata chunks we want to make sure we keep | 3630 | * If we have mixed data/metadata chunks we want to make sure we keep |
3611 | * allocating mixed chunks instead of individual chunks. | 3631 | * allocating mixed chunks instead of individual chunks. |
@@ -3632,6 +3652,7 @@ again: | |||
3632 | check_system_chunk(trans, extent_root, flags); | 3652 | check_system_chunk(trans, extent_root, flags); |
3633 | 3653 | ||
3634 | ret = btrfs_alloc_chunk(trans, extent_root, flags); | 3654 | ret = btrfs_alloc_chunk(trans, extent_root, flags); |
3655 | trans->allocating_chunk = false; | ||
3635 | if (ret < 0 && ret != -ENOSPC) | 3656 | if (ret < 0 && ret != -ENOSPC) |
3636 | goto out; | 3657 | goto out; |
3637 | 3658 | ||
@@ -3653,13 +3674,31 @@ static int can_overcommit(struct btrfs_root *root, | |||
3653 | struct btrfs_space_info *space_info, u64 bytes, | 3674 | struct btrfs_space_info *space_info, u64 bytes, |
3654 | enum btrfs_reserve_flush_enum flush) | 3675 | enum btrfs_reserve_flush_enum flush) |
3655 | { | 3676 | { |
3677 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
3656 | u64 profile = btrfs_get_alloc_profile(root, 0); | 3678 | u64 profile = btrfs_get_alloc_profile(root, 0); |
3679 | u64 rsv_size = 0; | ||
3657 | u64 avail; | 3680 | u64 avail; |
3658 | u64 used; | 3681 | u64 used; |
3682 | u64 to_add; | ||
3659 | 3683 | ||
3660 | used = space_info->bytes_used + space_info->bytes_reserved + | 3684 | used = space_info->bytes_used + space_info->bytes_reserved + |
3661 | space_info->bytes_pinned + space_info->bytes_readonly + | 3685 | space_info->bytes_pinned + space_info->bytes_readonly; |
3662 | space_info->bytes_may_use; | 3686 | |
3687 | spin_lock(&global_rsv->lock); | ||
3688 | rsv_size = global_rsv->size; | ||
3689 | spin_unlock(&global_rsv->lock); | ||
3690 | |||
3691 | /* | ||
3692 | * We only want to allow over committing if we have lots of actual space | ||
3693 | * free, but if we don't have enough space to handle the global reserve | ||
3694 | * space then we could end up having a real enospc problem when trying | ||
3695 | * to allocate a chunk or some other such important allocation. | ||
3696 | */ | ||
3697 | rsv_size <<= 1; | ||
3698 | if (used + rsv_size >= space_info->total_bytes) | ||
3699 | return 0; | ||
3700 | |||
3701 | used += space_info->bytes_may_use; | ||
3663 | 3702 | ||
3664 | spin_lock(&root->fs_info->free_chunk_lock); | 3703 | spin_lock(&root->fs_info->free_chunk_lock); |
3665 | avail = root->fs_info->free_chunk_space; | 3704 | avail = root->fs_info->free_chunk_space; |
@@ -3674,27 +3713,38 @@ static int can_overcommit(struct btrfs_root *root, | |||
3674 | BTRFS_BLOCK_GROUP_RAID10)) | 3713 | BTRFS_BLOCK_GROUP_RAID10)) |
3675 | avail >>= 1; | 3714 | avail >>= 1; |
3676 | 3715 | ||
3716 | to_add = space_info->total_bytes; | ||
3717 | |||
3677 | /* | 3718 | /* |
3678 | * If we aren't flushing all things, let us overcommit up to | 3719 | * If we aren't flushing all things, let us overcommit up to |
3679 | * 1/2th of the space. If we can flush, don't let us overcommit | 3720 | * 1/2th of the space. If we can flush, don't let us overcommit |
3680 | * too much, let it overcommit up to 1/8 of the space. | 3721 | * too much, let it overcommit up to 1/8 of the space. |
3681 | */ | 3722 | */ |
3682 | if (flush == BTRFS_RESERVE_FLUSH_ALL) | 3723 | if (flush == BTRFS_RESERVE_FLUSH_ALL) |
3683 | avail >>= 3; | 3724 | to_add >>= 3; |
3684 | else | 3725 | else |
3685 | avail >>= 1; | 3726 | to_add >>= 1; |
3686 | 3727 | ||
3687 | if (used + bytes < space_info->total_bytes + avail) | 3728 | /* |
3729 | * Limit the overcommit to the amount of free space we could possibly | ||
3730 | * allocate for chunks. | ||
3731 | */ | ||
3732 | to_add = min(avail, to_add); | ||
3733 | |||
3734 | if (used + bytes < space_info->total_bytes + to_add) | ||
3688 | return 1; | 3735 | return 1; |
3689 | return 0; | 3736 | return 0; |
3690 | } | 3737 | } |
3691 | 3738 | ||
3692 | static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, | 3739 | static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, |
3693 | unsigned long nr_pages, | 3740 | unsigned long nr_pages, |
3694 | enum wb_reason reason) | 3741 | enum wb_reason reason) |
3695 | { | 3742 | { |
3696 | if (!writeback_in_progress(sb->s_bdi) && | 3743 | /* the flusher is dealing with the dirty inodes now. */ |
3697 | down_read_trylock(&sb->s_umount)) { | 3744 | if (writeback_in_progress(sb->s_bdi)) |
3745 | return 1; | ||
3746 | |||
3747 | if (down_read_trylock(&sb->s_umount)) { | ||
3698 | writeback_inodes_sb_nr(sb, nr_pages, reason); | 3748 | writeback_inodes_sb_nr(sb, nr_pages, reason); |
3699 | up_read(&sb->s_umount); | 3749 | up_read(&sb->s_umount); |
3700 | return 1; | 3750 | return 1; |
@@ -3703,6 +3753,28 @@ static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, | |||
3703 | return 0; | 3753 | return 0; |
3704 | } | 3754 | } |
3705 | 3755 | ||
3756 | void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | ||
3757 | unsigned long nr_pages) | ||
3758 | { | ||
3759 | struct super_block *sb = root->fs_info->sb; | ||
3760 | int started; | ||
3761 | |||
3762 | /* If we can not start writeback, just sync all the delalloc file. */ | ||
3763 | started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages, | ||
3764 | WB_REASON_FS_FREE_SPACE); | ||
3765 | if (!started) { | ||
3766 | /* | ||
3767 | * We needn't worry the filesystem going from r/w to r/o though | ||
3768 | * we don't acquire ->s_umount mutex, because the filesystem | ||
3769 | * should guarantee the delalloc inodes list be empty after | ||
3770 | * the filesystem is readonly(all dirty pages are written to | ||
3771 | * the disk). | ||
3772 | */ | ||
3773 | btrfs_start_delalloc_inodes(root, 0); | ||
3774 | btrfs_wait_ordered_extents(root, 0); | ||
3775 | } | ||
3776 | } | ||
3777 | |||
3706 | /* | 3778 | /* |
3707 | * shrink metadata reservation for delalloc | 3779 | * shrink metadata reservation for delalloc |
3708 | */ | 3780 | */ |
@@ -3724,7 +3796,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3724 | space_info = block_rsv->space_info; | 3796 | space_info = block_rsv->space_info; |
3725 | 3797 | ||
3726 | smp_mb(); | 3798 | smp_mb(); |
3727 | delalloc_bytes = root->fs_info->delalloc_bytes; | 3799 | delalloc_bytes = percpu_counter_sum_positive( |
3800 | &root->fs_info->delalloc_bytes); | ||
3728 | if (delalloc_bytes == 0) { | 3801 | if (delalloc_bytes == 0) { |
3729 | if (trans) | 3802 | if (trans) |
3730 | return; | 3803 | return; |
@@ -3735,10 +3808,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3735 | while (delalloc_bytes && loops < 3) { | 3808 | while (delalloc_bytes && loops < 3) { |
3736 | max_reclaim = min(delalloc_bytes, to_reclaim); | 3809 | max_reclaim = min(delalloc_bytes, to_reclaim); |
3737 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 3810 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
3738 | writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, | 3811 | btrfs_writeback_inodes_sb_nr(root, nr_pages); |
3739 | nr_pages, | ||
3740 | WB_REASON_FS_FREE_SPACE); | ||
3741 | |||
3742 | /* | 3812 | /* |
3743 | * We need to wait for the async pages to actually start before | 3813 | * We need to wait for the async pages to actually start before |
3744 | * we do anything. | 3814 | * we do anything. |
@@ -3766,7 +3836,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
3766 | break; | 3836 | break; |
3767 | } | 3837 | } |
3768 | smp_mb(); | 3838 | smp_mb(); |
3769 | delalloc_bytes = root->fs_info->delalloc_bytes; | 3839 | delalloc_bytes = percpu_counter_sum_positive( |
3840 | &root->fs_info->delalloc_bytes); | ||
3770 | } | 3841 | } |
3771 | } | 3842 | } |
3772 | 3843 | ||
@@ -4030,6 +4101,15 @@ again: | |||
4030 | goto again; | 4101 | goto again; |
4031 | 4102 | ||
4032 | out: | 4103 | out: |
4104 | if (ret == -ENOSPC && | ||
4105 | unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) { | ||
4106 | struct btrfs_block_rsv *global_rsv = | ||
4107 | &root->fs_info->global_block_rsv; | ||
4108 | |||
4109 | if (block_rsv != global_rsv && | ||
4110 | !block_rsv_use_bytes(global_rsv, orig_bytes)) | ||
4111 | ret = 0; | ||
4112 | } | ||
4033 | if (flushing) { | 4113 | if (flushing) { |
4034 | spin_lock(&space_info->lock); | 4114 | spin_lock(&space_info->lock); |
4035 | space_info->flush = 0; | 4115 | space_info->flush = 0; |
@@ -4668,7 +4748,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | |||
4668 | spin_lock(&BTRFS_I(inode)->lock); | 4748 | spin_lock(&BTRFS_I(inode)->lock); |
4669 | dropped = drop_outstanding_extent(inode); | 4749 | dropped = drop_outstanding_extent(inode); |
4670 | 4750 | ||
4671 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | 4751 | if (num_bytes) |
4752 | to_free = calc_csum_metadata_size(inode, num_bytes, 0); | ||
4672 | spin_unlock(&BTRFS_I(inode)->lock); | 4753 | spin_unlock(&BTRFS_I(inode)->lock); |
4673 | if (dropped > 0) | 4754 | if (dropped > 0) |
4674 | to_free += btrfs_calc_trans_metadata_size(root, dropped); | 4755 | to_free += btrfs_calc_trans_metadata_size(root, dropped); |
@@ -4735,8 +4816,7 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | |||
4735 | btrfs_free_reserved_data_space(inode, num_bytes); | 4816 | btrfs_free_reserved_data_space(inode, num_bytes); |
4736 | } | 4817 | } |
4737 | 4818 | ||
4738 | static int update_block_group(struct btrfs_trans_handle *trans, | 4819 | static int update_block_group(struct btrfs_root *root, |
4739 | struct btrfs_root *root, | ||
4740 | u64 bytenr, u64 num_bytes, int alloc) | 4820 | u64 bytenr, u64 num_bytes, int alloc) |
4741 | { | 4821 | { |
4742 | struct btrfs_block_group_cache *cache = NULL; | 4822 | struct btrfs_block_group_cache *cache = NULL; |
@@ -4773,7 +4853,7 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
4773 | * space back to the block group, otherwise we will leak space. | 4853 | * space back to the block group, otherwise we will leak space. |
4774 | */ | 4854 | */ |
4775 | if (!alloc && cache->cached == BTRFS_CACHE_NO) | 4855 | if (!alloc && cache->cached == BTRFS_CACHE_NO) |
4776 | cache_block_group(cache, trans, NULL, 1); | 4856 | cache_block_group(cache, 1); |
4777 | 4857 | ||
4778 | byte_in_group = bytenr - cache->key.objectid; | 4858 | byte_in_group = bytenr - cache->key.objectid; |
4779 | WARN_ON(byte_in_group > cache->key.offset); | 4859 | WARN_ON(byte_in_group > cache->key.offset); |
@@ -4823,6 +4903,13 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
4823 | struct btrfs_block_group_cache *cache; | 4903 | struct btrfs_block_group_cache *cache; |
4824 | u64 bytenr; | 4904 | u64 bytenr; |
4825 | 4905 | ||
4906 | spin_lock(&root->fs_info->block_group_cache_lock); | ||
4907 | bytenr = root->fs_info->first_logical_byte; | ||
4908 | spin_unlock(&root->fs_info->block_group_cache_lock); | ||
4909 | |||
4910 | if (bytenr < (u64)-1) | ||
4911 | return bytenr; | ||
4912 | |||
4826 | cache = btrfs_lookup_first_block_group(root->fs_info, search_start); | 4913 | cache = btrfs_lookup_first_block_group(root->fs_info, search_start); |
4827 | if (!cache) | 4914 | if (!cache) |
4828 | return 0; | 4915 | return 0; |
@@ -4873,8 +4960,7 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
4873 | /* | 4960 | /* |
4874 | * this function must be called within transaction | 4961 | * this function must be called within transaction |
4875 | */ | 4962 | */ |
4876 | int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | 4963 | int btrfs_pin_extent_for_log_replay(struct btrfs_root *root, |
4877 | struct btrfs_root *root, | ||
4878 | u64 bytenr, u64 num_bytes) | 4964 | u64 bytenr, u64 num_bytes) |
4879 | { | 4965 | { |
4880 | struct btrfs_block_group_cache *cache; | 4966 | struct btrfs_block_group_cache *cache; |
@@ -4888,7 +4974,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, | |||
4888 | * to one because the slow code to read in the free extents does check | 4974 | * to one because the slow code to read in the free extents does check |
4889 | * the pinned extents. | 4975 | * the pinned extents. |
4890 | */ | 4976 | */ |
4891 | cache_block_group(cache, trans, root, 1); | 4977 | cache_block_group(cache, 1); |
4892 | 4978 | ||
4893 | pin_down_extent(root, cache, bytenr, num_bytes, 0); | 4979 | pin_down_extent(root, cache, bytenr, num_bytes, 0); |
4894 | 4980 | ||
@@ -5285,7 +5371,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
5285 | } | 5371 | } |
5286 | } | 5372 | } |
5287 | 5373 | ||
5288 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); | 5374 | ret = update_block_group(root, bytenr, num_bytes, 0); |
5289 | if (ret) { | 5375 | if (ret) { |
5290 | btrfs_abort_transaction(trans, extent_root, ret); | 5376 | btrfs_abort_transaction(trans, extent_root, ret); |
5291 | goto out; | 5377 | goto out; |
@@ -5330,7 +5416,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
5330 | if (head->extent_op) { | 5416 | if (head->extent_op) { |
5331 | if (!head->must_insert_reserved) | 5417 | if (!head->must_insert_reserved) |
5332 | goto out; | 5418 | goto out; |
5333 | kfree(head->extent_op); | 5419 | btrfs_free_delayed_extent_op(head->extent_op); |
5334 | head->extent_op = NULL; | 5420 | head->extent_op = NULL; |
5335 | } | 5421 | } |
5336 | 5422 | ||
@@ -5476,7 +5562,6 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, | |||
5476 | u64 num_bytes) | 5562 | u64 num_bytes) |
5477 | { | 5563 | { |
5478 | struct btrfs_caching_control *caching_ctl; | 5564 | struct btrfs_caching_control *caching_ctl; |
5479 | DEFINE_WAIT(wait); | ||
5480 | 5565 | ||
5481 | caching_ctl = get_caching_control(cache); | 5566 | caching_ctl = get_caching_control(cache); |
5482 | if (!caching_ctl) | 5567 | if (!caching_ctl) |
@@ -5493,7 +5578,6 @@ static noinline int | |||
5493 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | 5578 | wait_block_group_cache_done(struct btrfs_block_group_cache *cache) |
5494 | { | 5579 | { |
5495 | struct btrfs_caching_control *caching_ctl; | 5580 | struct btrfs_caching_control *caching_ctl; |
5496 | DEFINE_WAIT(wait); | ||
5497 | 5581 | ||
5498 | caching_ctl = get_caching_control(cache); | 5582 | caching_ctl = get_caching_control(cache); |
5499 | if (!caching_ctl) | 5583 | if (!caching_ctl) |
@@ -5507,20 +5591,16 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
5507 | 5591 | ||
5508 | int __get_raid_index(u64 flags) | 5592 | int __get_raid_index(u64 flags) |
5509 | { | 5593 | { |
5510 | int index; | ||
5511 | |||
5512 | if (flags & BTRFS_BLOCK_GROUP_RAID10) | 5594 | if (flags & BTRFS_BLOCK_GROUP_RAID10) |
5513 | index = 0; | 5595 | return BTRFS_RAID_RAID10; |
5514 | else if (flags & BTRFS_BLOCK_GROUP_RAID1) | 5596 | else if (flags & BTRFS_BLOCK_GROUP_RAID1) |
5515 | index = 1; | 5597 | return BTRFS_RAID_RAID1; |
5516 | else if (flags & BTRFS_BLOCK_GROUP_DUP) | 5598 | else if (flags & BTRFS_BLOCK_GROUP_DUP) |
5517 | index = 2; | 5599 | return BTRFS_RAID_DUP; |
5518 | else if (flags & BTRFS_BLOCK_GROUP_RAID0) | 5600 | else if (flags & BTRFS_BLOCK_GROUP_RAID0) |
5519 | index = 3; | 5601 | return BTRFS_RAID_RAID0; |
5520 | else | 5602 | else |
5521 | index = 4; | 5603 | return BTRFS_RAID_SINGLE; |
5522 | |||
5523 | return index; | ||
5524 | } | 5604 | } |
5525 | 5605 | ||
5526 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | 5606 | static int get_block_group_index(struct btrfs_block_group_cache *cache) |
@@ -5678,8 +5758,7 @@ have_block_group: | |||
5678 | cached = block_group_cache_done(block_group); | 5758 | cached = block_group_cache_done(block_group); |
5679 | if (unlikely(!cached)) { | 5759 | if (unlikely(!cached)) { |
5680 | found_uncached_bg = true; | 5760 | found_uncached_bg = true; |
5681 | ret = cache_block_group(block_group, trans, | 5761 | ret = cache_block_group(block_group, 0); |
5682 | orig_root, 0); | ||
5683 | BUG_ON(ret < 0); | 5762 | BUG_ON(ret < 0); |
5684 | ret = 0; | 5763 | ret = 0; |
5685 | } | 5764 | } |
@@ -6108,7 +6187,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
6108 | btrfs_mark_buffer_dirty(path->nodes[0]); | 6187 | btrfs_mark_buffer_dirty(path->nodes[0]); |
6109 | btrfs_free_path(path); | 6188 | btrfs_free_path(path); |
6110 | 6189 | ||
6111 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); | 6190 | ret = update_block_group(root, ins->objectid, ins->offset, 1); |
6112 | if (ret) { /* -ENOENT, logic error */ | 6191 | if (ret) { /* -ENOENT, logic error */ |
6113 | printk(KERN_ERR "btrfs update block group failed for %llu " | 6192 | printk(KERN_ERR "btrfs update block group failed for %llu " |
6114 | "%llu\n", (unsigned long long)ins->objectid, | 6193 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -6172,7 +6251,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
6172 | btrfs_mark_buffer_dirty(leaf); | 6251 | btrfs_mark_buffer_dirty(leaf); |
6173 | btrfs_free_path(path); | 6252 | btrfs_free_path(path); |
6174 | 6253 | ||
6175 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); | 6254 | ret = update_block_group(root, ins->objectid, ins->offset, 1); |
6176 | if (ret) { /* -ENOENT, logic error */ | 6255 | if (ret) { /* -ENOENT, logic error */ |
6177 | printk(KERN_ERR "btrfs update block group failed for %llu " | 6256 | printk(KERN_ERR "btrfs update block group failed for %llu " |
6178 | "%llu\n", (unsigned long long)ins->objectid, | 6257 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -6215,7 +6294,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
6215 | u64 num_bytes = ins->offset; | 6294 | u64 num_bytes = ins->offset; |
6216 | 6295 | ||
6217 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); | 6296 | block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); |
6218 | cache_block_group(block_group, trans, NULL, 0); | 6297 | cache_block_group(block_group, 0); |
6219 | caching_ctl = get_caching_control(block_group); | 6298 | caching_ctl = get_caching_control(block_group); |
6220 | 6299 | ||
6221 | if (!caching_ctl) { | 6300 | if (!caching_ctl) { |
@@ -6329,12 +6408,14 @@ use_block_rsv(struct btrfs_trans_handle *trans, | |||
6329 | if (!ret) | 6408 | if (!ret) |
6330 | return block_rsv; | 6409 | return block_rsv; |
6331 | if (ret && !block_rsv->failfast) { | 6410 | if (ret && !block_rsv->failfast) { |
6332 | static DEFINE_RATELIMIT_STATE(_rs, | 6411 | if (btrfs_test_opt(root, ENOSPC_DEBUG)) { |
6333 | DEFAULT_RATELIMIT_INTERVAL, | 6412 | static DEFINE_RATELIMIT_STATE(_rs, |
6334 | /*DEFAULT_RATELIMIT_BURST*/ 2); | 6413 | DEFAULT_RATELIMIT_INTERVAL * 10, |
6335 | if (__ratelimit(&_rs)) | 6414 | /*DEFAULT_RATELIMIT_BURST*/ 1); |
6336 | WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n", | 6415 | if (__ratelimit(&_rs)) |
6337 | ret); | 6416 | WARN(1, KERN_DEBUG |
6417 | "btrfs: block rsv returned %d\n", ret); | ||
6418 | } | ||
6338 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, | 6419 | ret = reserve_metadata_bytes(root, block_rsv, blocksize, |
6339 | BTRFS_RESERVE_NO_FLUSH); | 6420 | BTRFS_RESERVE_NO_FLUSH); |
6340 | if (!ret) { | 6421 | if (!ret) { |
@@ -6400,7 +6481,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
6400 | 6481 | ||
6401 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 6482 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
6402 | struct btrfs_delayed_extent_op *extent_op; | 6483 | struct btrfs_delayed_extent_op *extent_op; |
6403 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 6484 | extent_op = btrfs_alloc_delayed_extent_op(); |
6404 | BUG_ON(!extent_op); /* -ENOMEM */ | 6485 | BUG_ON(!extent_op); /* -ENOMEM */ |
6405 | if (key) | 6486 | if (key) |
6406 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | 6487 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); |
@@ -7481,16 +7562,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) | |||
7481 | index = get_block_group_index(block_group); | 7562 | index = get_block_group_index(block_group); |
7482 | } | 7563 | } |
7483 | 7564 | ||
7484 | if (index == 0) { | 7565 | if (index == BTRFS_RAID_RAID10) { |
7485 | dev_min = 4; | 7566 | dev_min = 4; |
7486 | /* Divide by 2 */ | 7567 | /* Divide by 2 */ |
7487 | min_free >>= 1; | 7568 | min_free >>= 1; |
7488 | } else if (index == 1) { | 7569 | } else if (index == BTRFS_RAID_RAID1) { |
7489 | dev_min = 2; | 7570 | dev_min = 2; |
7490 | } else if (index == 2) { | 7571 | } else if (index == BTRFS_RAID_DUP) { |
7491 | /* Multiply by 2 */ | 7572 | /* Multiply by 2 */ |
7492 | min_free <<= 1; | 7573 | min_free <<= 1; |
7493 | } else if (index == 3) { | 7574 | } else if (index == BTRFS_RAID_RAID0) { |
7494 | dev_min = fs_devices->rw_devices; | 7575 | dev_min = fs_devices->rw_devices; |
7495 | do_div(min_free, dev_min); | 7576 | do_div(min_free, dev_min); |
7496 | } | 7577 | } |
@@ -7651,11 +7732,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7651 | space_info = list_entry(info->space_info.next, | 7732 | space_info = list_entry(info->space_info.next, |
7652 | struct btrfs_space_info, | 7733 | struct btrfs_space_info, |
7653 | list); | 7734 | list); |
7654 | if (space_info->bytes_pinned > 0 || | 7735 | if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) { |
7655 | space_info->bytes_reserved > 0 || | 7736 | if (space_info->bytes_pinned > 0 || |
7656 | space_info->bytes_may_use > 0) { | 7737 | space_info->bytes_reserved > 0 || |
7657 | WARN_ON(1); | 7738 | space_info->bytes_may_use > 0) { |
7658 | dump_space_info(space_info, 0, 0); | 7739 | WARN_ON(1); |
7740 | dump_space_info(space_info, 0, 0); | ||
7741 | } | ||
7659 | } | 7742 | } |
7660 | list_del(&space_info->list); | 7743 | list_del(&space_info->list); |
7661 | kfree(space_info); | 7744 | kfree(space_info); |
@@ -7932,12 +8015,14 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
7932 | u64 extra_flags = chunk_to_extended(flags) & | 8015 | u64 extra_flags = chunk_to_extended(flags) & |
7933 | BTRFS_EXTENDED_PROFILE_MASK; | 8016 | BTRFS_EXTENDED_PROFILE_MASK; |
7934 | 8017 | ||
8018 | write_seqlock(&fs_info->profiles_lock); | ||
7935 | if (flags & BTRFS_BLOCK_GROUP_DATA) | 8019 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
7936 | fs_info->avail_data_alloc_bits &= ~extra_flags; | 8020 | fs_info->avail_data_alloc_bits &= ~extra_flags; |
7937 | if (flags & BTRFS_BLOCK_GROUP_METADATA) | 8021 | if (flags & BTRFS_BLOCK_GROUP_METADATA) |
7938 | fs_info->avail_metadata_alloc_bits &= ~extra_flags; | 8022 | fs_info->avail_metadata_alloc_bits &= ~extra_flags; |
7939 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) | 8023 | if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
7940 | fs_info->avail_system_alloc_bits &= ~extra_flags; | 8024 | fs_info->avail_system_alloc_bits &= ~extra_flags; |
8025 | write_sequnlock(&fs_info->profiles_lock); | ||
7941 | } | 8026 | } |
7942 | 8027 | ||
7943 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 8028 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
@@ -8036,6 +8121,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | |||
8036 | spin_lock(&root->fs_info->block_group_cache_lock); | 8121 | spin_lock(&root->fs_info->block_group_cache_lock); |
8037 | rb_erase(&block_group->cache_node, | 8122 | rb_erase(&block_group->cache_node, |
8038 | &root->fs_info->block_group_cache_tree); | 8123 | &root->fs_info->block_group_cache_tree); |
8124 | |||
8125 | if (root->fs_info->first_logical_byte == block_group->key.objectid) | ||
8126 | root->fs_info->first_logical_byte = (u64)-1; | ||
8039 | spin_unlock(&root->fs_info->block_group_cache_lock); | 8127 | spin_unlock(&root->fs_info->block_group_cache_lock); |
8040 | 8128 | ||
8041 | down_write(&block_group->space_info->groups_sem); | 8129 | down_write(&block_group->space_info->groups_sem); |
@@ -8158,7 +8246,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
8158 | 8246 | ||
8159 | if (end - start >= range->minlen) { | 8247 | if (end - start >= range->minlen) { |
8160 | if (!block_group_cache_done(cache)) { | 8248 | if (!block_group_cache_done(cache)) { |
8161 | ret = cache_block_group(cache, NULL, root, 0); | 8249 | ret = cache_block_group(cache, 0); |
8162 | if (!ret) | 8250 | if (!ret) |
8163 | wait_block_group_cache_done(cache); | 8251 | wait_block_group_cache_done(cache); |
8164 | } | 8252 | } |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 1b319df29eee..5c00d6aeae75 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -1834,7 +1834,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
1834 | */ | 1834 | */ |
1835 | static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) | 1835 | static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) |
1836 | { | 1836 | { |
1837 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1837 | u64 start = page_offset(page); |
1838 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1838 | u64 end = start + PAGE_CACHE_SIZE - 1; |
1839 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) | 1839 | if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) |
1840 | SetPageUptodate(page); | 1840 | SetPageUptodate(page); |
@@ -1846,7 +1846,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) | |||
1846 | */ | 1846 | */ |
1847 | static void check_page_locked(struct extent_io_tree *tree, struct page *page) | 1847 | static void check_page_locked(struct extent_io_tree *tree, struct page *page) |
1848 | { | 1848 | { |
1849 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 1849 | u64 start = page_offset(page); |
1850 | u64 end = start + PAGE_CACHE_SIZE - 1; | 1850 | u64 end = start + PAGE_CACHE_SIZE - 1; |
1851 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) | 1851 | if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) |
1852 | unlock_page(page); | 1852 | unlock_page(page); |
@@ -1960,7 +1960,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start, | |||
1960 | return -EIO; | 1960 | return -EIO; |
1961 | } | 1961 | } |
1962 | bio->bi_bdev = dev->bdev; | 1962 | bio->bi_bdev = dev->bdev; |
1963 | bio_add_page(bio, page, length, start-page_offset(page)); | 1963 | bio_add_page(bio, page, length, start - page_offset(page)); |
1964 | btrfsic_submit_bio(WRITE_SYNC, bio); | 1964 | btrfsic_submit_bio(WRITE_SYNC, bio); |
1965 | wait_for_completion(&compl); | 1965 | wait_for_completion(&compl); |
1966 | 1966 | ||
@@ -2293,8 +2293,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err) | |||
2293 | struct page *page = bvec->bv_page; | 2293 | struct page *page = bvec->bv_page; |
2294 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2294 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
2295 | 2295 | ||
2296 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 2296 | start = page_offset(page) + bvec->bv_offset; |
2297 | bvec->bv_offset; | ||
2298 | end = start + bvec->bv_len - 1; | 2297 | end = start + bvec->bv_len - 1; |
2299 | 2298 | ||
2300 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) | 2299 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) |
@@ -2353,8 +2352,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) | |||
2353 | (long int)bio->bi_bdev); | 2352 | (long int)bio->bi_bdev); |
2354 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 2353 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
2355 | 2354 | ||
2356 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + | 2355 | start = page_offset(page) + bvec->bv_offset; |
2357 | bvec->bv_offset; | ||
2358 | end = start + bvec->bv_len - 1; | 2356 | end = start + bvec->bv_len - 1; |
2359 | 2357 | ||
2360 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) | 2358 | if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) |
@@ -2471,7 +2469,7 @@ static int __must_check submit_one_bio(int rw, struct bio *bio, | |||
2471 | struct extent_io_tree *tree = bio->bi_private; | 2469 | struct extent_io_tree *tree = bio->bi_private; |
2472 | u64 start; | 2470 | u64 start; |
2473 | 2471 | ||
2474 | start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; | 2472 | start = page_offset(page) + bvec->bv_offset; |
2475 | 2473 | ||
2476 | bio->bi_private = NULL; | 2474 | bio->bi_private = NULL; |
2477 | 2475 | ||
@@ -2595,7 +2593,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2595 | unsigned long *bio_flags) | 2593 | unsigned long *bio_flags) |
2596 | { | 2594 | { |
2597 | struct inode *inode = page->mapping->host; | 2595 | struct inode *inode = page->mapping->host; |
2598 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 2596 | u64 start = page_offset(page); |
2599 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | 2597 | u64 page_end = start + PAGE_CACHE_SIZE - 1; |
2600 | u64 end; | 2598 | u64 end; |
2601 | u64 cur = start; | 2599 | u64 cur = start; |
@@ -2648,6 +2646,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2648 | } | 2646 | } |
2649 | } | 2647 | } |
2650 | while (cur <= end) { | 2648 | while (cur <= end) { |
2649 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | ||
2650 | |||
2651 | if (cur >= last_byte) { | 2651 | if (cur >= last_byte) { |
2652 | char *userpage; | 2652 | char *userpage; |
2653 | struct extent_state *cached = NULL; | 2653 | struct extent_state *cached = NULL; |
@@ -2735,26 +2735,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2735 | continue; | 2735 | continue; |
2736 | } | 2736 | } |
2737 | 2737 | ||
2738 | ret = 0; | 2738 | pnr -= page->index; |
2739 | if (tree->ops && tree->ops->readpage_io_hook) { | 2739 | ret = submit_extent_page(READ, tree, page, |
2740 | ret = tree->ops->readpage_io_hook(page, cur, | ||
2741 | cur + iosize - 1); | ||
2742 | } | ||
2743 | if (!ret) { | ||
2744 | unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1; | ||
2745 | pnr -= page->index; | ||
2746 | ret = submit_extent_page(READ, tree, page, | ||
2747 | sector, disk_io_size, pg_offset, | 2740 | sector, disk_io_size, pg_offset, |
2748 | bdev, bio, pnr, | 2741 | bdev, bio, pnr, |
2749 | end_bio_extent_readpage, mirror_num, | 2742 | end_bio_extent_readpage, mirror_num, |
2750 | *bio_flags, | 2743 | *bio_flags, |
2751 | this_bio_flag); | 2744 | this_bio_flag); |
2752 | if (!ret) { | 2745 | if (!ret) { |
2753 | nr++; | 2746 | nr++; |
2754 | *bio_flags = this_bio_flag; | 2747 | *bio_flags = this_bio_flag; |
2755 | } | 2748 | } else { |
2756 | } | ||
2757 | if (ret) { | ||
2758 | SetPageError(page); | 2749 | SetPageError(page); |
2759 | unlock_extent(tree, cur, cur + iosize - 1); | 2750 | unlock_extent(tree, cur, cur + iosize - 1); |
2760 | } | 2751 | } |
@@ -2806,7 +2797,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
2806 | struct inode *inode = page->mapping->host; | 2797 | struct inode *inode = page->mapping->host; |
2807 | struct extent_page_data *epd = data; | 2798 | struct extent_page_data *epd = data; |
2808 | struct extent_io_tree *tree = epd->tree; | 2799 | struct extent_io_tree *tree = epd->tree; |
2809 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 2800 | u64 start = page_offset(page); |
2810 | u64 delalloc_start; | 2801 | u64 delalloc_start; |
2811 | u64 page_end = start + PAGE_CACHE_SIZE - 1; | 2802 | u64 page_end = start + PAGE_CACHE_SIZE - 1; |
2812 | u64 end; | 2803 | u64 end; |
@@ -3124,12 +3115,9 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb, | |||
3124 | set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); | 3115 | set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); |
3125 | spin_unlock(&eb->refs_lock); | 3116 | spin_unlock(&eb->refs_lock); |
3126 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 3117 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
3127 | spin_lock(&fs_info->delalloc_lock); | 3118 | __percpu_counter_add(&fs_info->dirty_metadata_bytes, |
3128 | if (fs_info->dirty_metadata_bytes >= eb->len) | 3119 | -eb->len, |
3129 | fs_info->dirty_metadata_bytes -= eb->len; | 3120 | fs_info->dirty_metadata_batch); |
3130 | else | ||
3131 | WARN_ON(1); | ||
3132 | spin_unlock(&fs_info->delalloc_lock); | ||
3133 | ret = 1; | 3121 | ret = 1; |
3134 | } else { | 3122 | } else { |
3135 | spin_unlock(&eb->refs_lock); | 3123 | spin_unlock(&eb->refs_lock); |
@@ -3446,15 +3434,9 @@ retry: | |||
3446 | * swizzled back from swapper_space to tmpfs file | 3434 | * swizzled back from swapper_space to tmpfs file |
3447 | * mapping | 3435 | * mapping |
3448 | */ | 3436 | */ |
3449 | if (tree->ops && | 3437 | if (!trylock_page(page)) { |
3450 | tree->ops->write_cache_pages_lock_hook) { | 3438 | flush_fn(data); |
3451 | tree->ops->write_cache_pages_lock_hook(page, | 3439 | lock_page(page); |
3452 | data, flush_fn); | ||
3453 | } else { | ||
3454 | if (!trylock_page(page)) { | ||
3455 | flush_fn(data); | ||
3456 | lock_page(page); | ||
3457 | } | ||
3458 | } | 3440 | } |
3459 | 3441 | ||
3460 | if (unlikely(page->mapping != mapping)) { | 3442 | if (unlikely(page->mapping != mapping)) { |
@@ -3674,7 +3656,7 @@ int extent_invalidatepage(struct extent_io_tree *tree, | |||
3674 | struct page *page, unsigned long offset) | 3656 | struct page *page, unsigned long offset) |
3675 | { | 3657 | { |
3676 | struct extent_state *cached_state = NULL; | 3658 | struct extent_state *cached_state = NULL; |
3677 | u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); | 3659 | u64 start = page_offset(page); |
3678 | u64 end = start + PAGE_CACHE_SIZE - 1; | 3660 | u64 end = start + PAGE_CACHE_SIZE - 1; |
3679 | size_t blocksize = page->mapping->host->i_sb->s_blocksize; | 3661 | size_t blocksize = page->mapping->host->i_sb->s_blocksize; |
3680 | 3662 | ||
@@ -3700,7 +3682,7 @@ int try_release_extent_state(struct extent_map_tree *map, | |||
3700 | struct extent_io_tree *tree, struct page *page, | 3682 | struct extent_io_tree *tree, struct page *page, |
3701 | gfp_t mask) | 3683 | gfp_t mask) |
3702 | { | 3684 | { |
3703 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 3685 | u64 start = page_offset(page); |
3704 | u64 end = start + PAGE_CACHE_SIZE - 1; | 3686 | u64 end = start + PAGE_CACHE_SIZE - 1; |
3705 | int ret = 1; | 3687 | int ret = 1; |
3706 | 3688 | ||
@@ -3739,7 +3721,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, | |||
3739 | gfp_t mask) | 3721 | gfp_t mask) |
3740 | { | 3722 | { |
3741 | struct extent_map *em; | 3723 | struct extent_map *em; |
3742 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 3724 | u64 start = page_offset(page); |
3743 | u64 end = start + PAGE_CACHE_SIZE - 1; | 3725 | u64 end = start + PAGE_CACHE_SIZE - 1; |
3744 | 3726 | ||
3745 | if ((mask & __GFP_WAIT) && | 3727 | if ((mask & __GFP_WAIT) && |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2eacfabd3263..ff182322d112 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -75,7 +75,6 @@ struct extent_io_ops { | |||
75 | int (*merge_bio_hook)(struct page *page, unsigned long offset, | 75 | int (*merge_bio_hook)(struct page *page, unsigned long offset, |
76 | size_t size, struct bio *bio, | 76 | size_t size, struct bio *bio, |
77 | unsigned long bio_flags); | 77 | unsigned long bio_flags); |
78 | int (*readpage_io_hook)(struct page *page, u64 start, u64 end); | ||
79 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); | 78 | int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); |
80 | int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, | 79 | int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, |
81 | struct extent_state *state, int mirror); | 80 | struct extent_state *state, int mirror); |
@@ -90,8 +89,6 @@ struct extent_io_ops { | |||
90 | struct extent_state *other); | 89 | struct extent_state *other); |
91 | void (*split_extent_hook)(struct inode *inode, | 90 | void (*split_extent_hook)(struct inode *inode, |
92 | struct extent_state *orig, u64 split); | 91 | struct extent_state *orig, u64 split); |
93 | int (*write_cache_pages_lock_hook)(struct page *page, void *data, | ||
94 | void (*flush_fn)(void *)); | ||
95 | }; | 92 | }; |
96 | 93 | ||
97 | struct extent_io_tree { | 94 | struct extent_io_tree { |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 94aa53b38721..ec160202be3e 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -684,6 +684,24 @@ out: | |||
684 | return ret; | 684 | return ret; |
685 | } | 685 | } |
686 | 686 | ||
687 | static u64 btrfs_sector_sum_left(struct btrfs_ordered_sum *sums, | ||
688 | struct btrfs_sector_sum *sector_sum, | ||
689 | u64 total_bytes, u64 sectorsize) | ||
690 | { | ||
691 | u64 tmp = sectorsize; | ||
692 | u64 next_sector = sector_sum->bytenr; | ||
693 | struct btrfs_sector_sum *next = sector_sum + 1; | ||
694 | |||
695 | while ((tmp + total_bytes) < sums->len) { | ||
696 | if (next_sector + sectorsize != next->bytenr) | ||
697 | break; | ||
698 | tmp += sectorsize; | ||
699 | next_sector = next->bytenr; | ||
700 | next++; | ||
701 | } | ||
702 | return tmp; | ||
703 | } | ||
704 | |||
687 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, | 705 | int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, |
688 | struct btrfs_root *root, | 706 | struct btrfs_root *root, |
689 | struct btrfs_ordered_sum *sums) | 707 | struct btrfs_ordered_sum *sums) |
@@ -789,20 +807,32 @@ again: | |||
789 | goto insert; | 807 | goto insert; |
790 | } | 808 | } |
791 | 809 | ||
792 | if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / | 810 | if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) / |
793 | csum_size) { | 811 | csum_size) { |
794 | u32 diff = (csum_offset + 1) * csum_size; | 812 | int extend_nr; |
813 | u64 tmp; | ||
814 | u32 diff; | ||
815 | u32 free_space; | ||
795 | 816 | ||
796 | /* | 817 | if (btrfs_leaf_free_space(root, leaf) < |
797 | * is the item big enough already? we dropped our lock | 818 | sizeof(struct btrfs_item) + csum_size * 2) |
798 | * before and need to recheck | 819 | goto insert; |
799 | */ | 820 | |
800 | if (diff < btrfs_item_size_nr(leaf, path->slots[0])) | 821 | free_space = btrfs_leaf_free_space(root, leaf) - |
801 | goto csum; | 822 | sizeof(struct btrfs_item) - csum_size; |
823 | tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes, | ||
824 | root->sectorsize); | ||
825 | tmp >>= root->fs_info->sb->s_blocksize_bits; | ||
826 | WARN_ON(tmp < 1); | ||
827 | |||
828 | extend_nr = max_t(int, 1, (int)tmp); | ||
829 | diff = (csum_offset + extend_nr) * csum_size; | ||
830 | diff = min(diff, MAX_CSUM_ITEMS(root, csum_size) * csum_size); | ||
802 | 831 | ||
803 | diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); | 832 | diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); |
804 | if (diff != csum_size) | 833 | diff = min(free_space, diff); |
805 | goto insert; | 834 | diff /= csum_size; |
835 | diff *= csum_size; | ||
806 | 836 | ||
807 | btrfs_extend_item(trans, root, path, diff); | 837 | btrfs_extend_item(trans, root, path, diff); |
808 | goto csum; | 838 | goto csum; |
@@ -812,19 +842,14 @@ insert: | |||
812 | btrfs_release_path(path); | 842 | btrfs_release_path(path); |
813 | csum_offset = 0; | 843 | csum_offset = 0; |
814 | if (found_next) { | 844 | if (found_next) { |
815 | u64 tmp = total_bytes + root->sectorsize; | 845 | u64 tmp; |
816 | u64 next_sector = sector_sum->bytenr; | ||
817 | struct btrfs_sector_sum *next = sector_sum + 1; | ||
818 | 846 | ||
819 | while (tmp < sums->len) { | 847 | tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes, |
820 | if (next_sector + root->sectorsize != next->bytenr) | 848 | root->sectorsize); |
821 | break; | ||
822 | tmp += root->sectorsize; | ||
823 | next_sector = next->bytenr; | ||
824 | next++; | ||
825 | } | ||
826 | tmp = min(tmp, next_offset - file_key.offset); | ||
827 | tmp >>= root->fs_info->sb->s_blocksize_bits; | 849 | tmp >>= root->fs_info->sb->s_blocksize_bits; |
850 | tmp = min(tmp, (next_offset - file_key.offset) >> | ||
851 | root->fs_info->sb->s_blocksize_bits); | ||
852 | |||
828 | tmp = max((u64)1, tmp); | 853 | tmp = max((u64)1, tmp); |
829 | tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size)); | 854 | tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size)); |
830 | ins_size = csum_size * tmp; | 855 | ins_size = csum_size * tmp; |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index aeb84469d2c4..9f67e623206d 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -30,11 +30,11 @@ | |||
30 | #include <linux/statfs.h> | 30 | #include <linux/statfs.h> |
31 | #include <linux/compat.h> | 31 | #include <linux/compat.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/btrfs.h> | ||
33 | #include "ctree.h" | 34 | #include "ctree.h" |
34 | #include "disk-io.h" | 35 | #include "disk-io.h" |
35 | #include "transaction.h" | 36 | #include "transaction.h" |
36 | #include "btrfs_inode.h" | 37 | #include "btrfs_inode.h" |
37 | #include "ioctl.h" | ||
38 | #include "print-tree.h" | 38 | #include "print-tree.h" |
39 | #include "tree-log.h" | 39 | #include "tree-log.h" |
40 | #include "locking.h" | 40 | #include "locking.h" |
@@ -1544,7 +1544,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1544 | * although we have opened a file as writable, we have | 1544 | * although we have opened a file as writable, we have |
1545 | * to stop this write operation to ensure FS consistency. | 1545 | * to stop this write operation to ensure FS consistency. |
1546 | */ | 1546 | */ |
1547 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 1547 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) { |
1548 | mutex_unlock(&inode->i_mutex); | 1548 | mutex_unlock(&inode->i_mutex); |
1549 | err = -EROFS; | 1549 | err = -EROFS; |
1550 | goto out; | 1550 | goto out; |
@@ -1627,7 +1627,20 @@ int btrfs_release_file(struct inode *inode, struct file *filp) | |||
1627 | */ | 1627 | */ |
1628 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, | 1628 | if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, |
1629 | &BTRFS_I(inode)->runtime_flags)) { | 1629 | &BTRFS_I(inode)->runtime_flags)) { |
1630 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); | 1630 | struct btrfs_trans_handle *trans; |
1631 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1632 | |||
1633 | /* | ||
1634 | * We need to block on a committing transaction to keep us from | ||
1635 | * throwing a ordered operation on to the list and causing | ||
1636 | * something like sync to deadlock trying to flush out this | ||
1637 | * inode. | ||
1638 | */ | ||
1639 | trans = btrfs_start_transaction(root, 0); | ||
1640 | if (IS_ERR(trans)) | ||
1641 | return PTR_ERR(trans); | ||
1642 | btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode); | ||
1643 | btrfs_end_transaction(trans, root); | ||
1631 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | 1644 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) |
1632 | filemap_flush(inode->i_mapping); | 1645 | filemap_flush(inode->i_mapping); |
1633 | } | 1646 | } |
@@ -1654,16 +1667,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1654 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1667 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1655 | int ret = 0; | 1668 | int ret = 0; |
1656 | struct btrfs_trans_handle *trans; | 1669 | struct btrfs_trans_handle *trans; |
1670 | bool full_sync = 0; | ||
1657 | 1671 | ||
1658 | trace_btrfs_sync_file(file, datasync); | 1672 | trace_btrfs_sync_file(file, datasync); |
1659 | 1673 | ||
1660 | /* | 1674 | /* |
1661 | * We write the dirty pages in the range and wait until they complete | 1675 | * We write the dirty pages in the range and wait until they complete |
1662 | * out of the ->i_mutex. If so, we can flush the dirty pages by | 1676 | * out of the ->i_mutex. If so, we can flush the dirty pages by |
1663 | * multi-task, and make the performance up. | 1677 | * multi-task, and make the performance up. See |
1678 | * btrfs_wait_ordered_range for an explanation of the ASYNC check. | ||
1664 | */ | 1679 | */ |
1665 | atomic_inc(&BTRFS_I(inode)->sync_writers); | 1680 | atomic_inc(&BTRFS_I(inode)->sync_writers); |
1666 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 1681 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); |
1682 | if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
1683 | &BTRFS_I(inode)->runtime_flags)) | ||
1684 | ret = filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
1667 | atomic_dec(&BTRFS_I(inode)->sync_writers); | 1685 | atomic_dec(&BTRFS_I(inode)->sync_writers); |
1668 | if (ret) | 1686 | if (ret) |
1669 | return ret; | 1687 | return ret; |
@@ -1675,7 +1693,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1675 | * range being left. | 1693 | * range being left. |
1676 | */ | 1694 | */ |
1677 | atomic_inc(&root->log_batch); | 1695 | atomic_inc(&root->log_batch); |
1678 | btrfs_wait_ordered_range(inode, start, end - start + 1); | 1696 | full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, |
1697 | &BTRFS_I(inode)->runtime_flags); | ||
1698 | if (full_sync) | ||
1699 | btrfs_wait_ordered_range(inode, start, end - start + 1); | ||
1679 | atomic_inc(&root->log_batch); | 1700 | atomic_inc(&root->log_batch); |
1680 | 1701 | ||
1681 | /* | 1702 | /* |
@@ -1742,13 +1763,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1742 | 1763 | ||
1743 | if (ret != BTRFS_NO_LOG_SYNC) { | 1764 | if (ret != BTRFS_NO_LOG_SYNC) { |
1744 | if (ret > 0) { | 1765 | if (ret > 0) { |
1766 | /* | ||
1767 | * If we didn't already wait for ordered extents we need | ||
1768 | * to do that now. | ||
1769 | */ | ||
1770 | if (!full_sync) | ||
1771 | btrfs_wait_ordered_range(inode, start, | ||
1772 | end - start + 1); | ||
1745 | ret = btrfs_commit_transaction(trans, root); | 1773 | ret = btrfs_commit_transaction(trans, root); |
1746 | } else { | 1774 | } else { |
1747 | ret = btrfs_sync_log(trans, root); | 1775 | ret = btrfs_sync_log(trans, root); |
1748 | if (ret == 0) | 1776 | if (ret == 0) { |
1749 | ret = btrfs_end_transaction(trans, root); | 1777 | ret = btrfs_end_transaction(trans, root); |
1750 | else | 1778 | } else { |
1779 | if (!full_sync) | ||
1780 | btrfs_wait_ordered_range(inode, start, | ||
1781 | end - | ||
1782 | start + 1); | ||
1751 | ret = btrfs_commit_transaction(trans, root); | 1783 | ret = btrfs_commit_transaction(trans, root); |
1784 | } | ||
1752 | } | 1785 | } |
1753 | } else { | 1786 | } else { |
1754 | ret = btrfs_end_transaction(trans, root); | 1787 | ret = btrfs_end_transaction(trans, root); |
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 0be7a8742a43..c8090f18c217 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -1356,6 +1356,8 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) | |||
1356 | u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; | 1356 | u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; |
1357 | int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); | 1357 | int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); |
1358 | 1358 | ||
1359 | max_bitmaps = max(max_bitmaps, 1); | ||
1360 | |||
1359 | BUG_ON(ctl->total_bitmaps > max_bitmaps); | 1361 | BUG_ON(ctl->total_bitmaps > max_bitmaps); |
1360 | 1362 | ||
1361 | /* | 1363 | /* |
@@ -1636,10 +1638,14 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, | |||
1636 | } | 1638 | } |
1637 | 1639 | ||
1638 | /* | 1640 | /* |
1639 | * some block groups are so tiny they can't be enveloped by a bitmap, so | 1641 | * The original block groups from mkfs can be really small, like 8 |
1640 | * don't even bother to create a bitmap for this | 1642 | * megabytes, so don't bother with a bitmap for those entries. However |
1643 | * some block groups can be smaller than what a bitmap would cover but | ||
1644 | * are still large enough that they could overflow the 32k memory limit, | ||
1645 | * so allow those block groups to still be allowed to have a bitmap | ||
1646 | * entry. | ||
1641 | */ | 1647 | */ |
1642 | if (BITS_PER_BITMAP * ctl->unit > block_group->key.offset) | 1648 | if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset) |
1643 | return false; | 1649 | return false; |
1644 | 1650 | ||
1645 | return true; | 1651 | return true; |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cc93b23ca352..1aa98be54ce0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -39,12 +39,12 @@ | |||
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/ratelimit.h> | 40 | #include <linux/ratelimit.h> |
41 | #include <linux/mount.h> | 41 | #include <linux/mount.h> |
42 | #include <linux/btrfs.h> | ||
42 | #include "compat.h" | 43 | #include "compat.h" |
43 | #include "ctree.h" | 44 | #include "ctree.h" |
44 | #include "disk-io.h" | 45 | #include "disk-io.h" |
45 | #include "transaction.h" | 46 | #include "transaction.h" |
46 | #include "btrfs_inode.h" | 47 | #include "btrfs_inode.h" |
47 | #include "ioctl.h" | ||
48 | #include "print-tree.h" | 48 | #include "print-tree.h" |
49 | #include "ordered-data.h" | 49 | #include "ordered-data.h" |
50 | #include "xattr.h" | 50 | #include "xattr.h" |
@@ -608,7 +608,7 @@ static noinline int submit_compressed_extents(struct inode *inode, | |||
608 | if (list_empty(&async_cow->extents)) | 608 | if (list_empty(&async_cow->extents)) |
609 | return 0; | 609 | return 0; |
610 | 610 | ||
611 | 611 | again: | |
612 | while (!list_empty(&async_cow->extents)) { | 612 | while (!list_empty(&async_cow->extents)) { |
613 | async_extent = list_entry(async_cow->extents.next, | 613 | async_extent = list_entry(async_cow->extents.next, |
614 | struct async_extent, list); | 614 | struct async_extent, list); |
@@ -648,6 +648,8 @@ retry: | |||
648 | async_extent->ram_size - 1, | 648 | async_extent->ram_size - 1, |
649 | btrfs_get_extent, | 649 | btrfs_get_extent, |
650 | WB_SYNC_ALL); | 650 | WB_SYNC_ALL); |
651 | else if (ret) | ||
652 | unlock_page(async_cow->locked_page); | ||
651 | kfree(async_extent); | 653 | kfree(async_extent); |
652 | cond_resched(); | 654 | cond_resched(); |
653 | continue; | 655 | continue; |
@@ -672,6 +674,7 @@ retry: | |||
672 | 674 | ||
673 | if (ret) { | 675 | if (ret) { |
674 | int i; | 676 | int i; |
677 | |||
675 | for (i = 0; i < async_extent->nr_pages; i++) { | 678 | for (i = 0; i < async_extent->nr_pages; i++) { |
676 | WARN_ON(async_extent->pages[i]->mapping); | 679 | WARN_ON(async_extent->pages[i]->mapping); |
677 | page_cache_release(async_extent->pages[i]); | 680 | page_cache_release(async_extent->pages[i]); |
@@ -679,12 +682,10 @@ retry: | |||
679 | kfree(async_extent->pages); | 682 | kfree(async_extent->pages); |
680 | async_extent->nr_pages = 0; | 683 | async_extent->nr_pages = 0; |
681 | async_extent->pages = NULL; | 684 | async_extent->pages = NULL; |
682 | unlock_extent(io_tree, async_extent->start, | 685 | |
683 | async_extent->start + | ||
684 | async_extent->ram_size - 1); | ||
685 | if (ret == -ENOSPC) | 686 | if (ret == -ENOSPC) |
686 | goto retry; | 687 | goto retry; |
687 | goto out_free; /* JDM: Requeue? */ | 688 | goto out_free; |
688 | } | 689 | } |
689 | 690 | ||
690 | /* | 691 | /* |
@@ -696,10 +697,13 @@ retry: | |||
696 | async_extent->ram_size - 1, 0); | 697 | async_extent->ram_size - 1, 0); |
697 | 698 | ||
698 | em = alloc_extent_map(); | 699 | em = alloc_extent_map(); |
699 | BUG_ON(!em); /* -ENOMEM */ | 700 | if (!em) |
701 | goto out_free_reserve; | ||
700 | em->start = async_extent->start; | 702 | em->start = async_extent->start; |
701 | em->len = async_extent->ram_size; | 703 | em->len = async_extent->ram_size; |
702 | em->orig_start = em->start; | 704 | em->orig_start = em->start; |
705 | em->mod_start = em->start; | ||
706 | em->mod_len = em->len; | ||
703 | 707 | ||
704 | em->block_start = ins.objectid; | 708 | em->block_start = ins.objectid; |
705 | em->block_len = ins.offset; | 709 | em->block_len = ins.offset; |
@@ -726,6 +730,9 @@ retry: | |||
726 | async_extent->ram_size - 1, 0); | 730 | async_extent->ram_size - 1, 0); |
727 | } | 731 | } |
728 | 732 | ||
733 | if (ret) | ||
734 | goto out_free_reserve; | ||
735 | |||
729 | ret = btrfs_add_ordered_extent_compress(inode, | 736 | ret = btrfs_add_ordered_extent_compress(inode, |
730 | async_extent->start, | 737 | async_extent->start, |
731 | ins.objectid, | 738 | ins.objectid, |
@@ -733,7 +740,8 @@ retry: | |||
733 | ins.offset, | 740 | ins.offset, |
734 | BTRFS_ORDERED_COMPRESSED, | 741 | BTRFS_ORDERED_COMPRESSED, |
735 | async_extent->compress_type); | 742 | async_extent->compress_type); |
736 | BUG_ON(ret); /* -ENOMEM */ | 743 | if (ret) |
744 | goto out_free_reserve; | ||
737 | 745 | ||
738 | /* | 746 | /* |
739 | * clear dirty, set writeback and unlock the pages. | 747 | * clear dirty, set writeback and unlock the pages. |
@@ -754,18 +762,30 @@ retry: | |||
754 | ins.objectid, | 762 | ins.objectid, |
755 | ins.offset, async_extent->pages, | 763 | ins.offset, async_extent->pages, |
756 | async_extent->nr_pages); | 764 | async_extent->nr_pages); |
757 | |||
758 | BUG_ON(ret); /* -ENOMEM */ | ||
759 | alloc_hint = ins.objectid + ins.offset; | 765 | alloc_hint = ins.objectid + ins.offset; |
760 | kfree(async_extent); | 766 | kfree(async_extent); |
767 | if (ret) | ||
768 | goto out; | ||
761 | cond_resched(); | 769 | cond_resched(); |
762 | } | 770 | } |
763 | ret = 0; | 771 | ret = 0; |
764 | out: | 772 | out: |
765 | return ret; | 773 | return ret; |
774 | out_free_reserve: | ||
775 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | ||
766 | out_free: | 776 | out_free: |
777 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | ||
778 | async_extent->start, | ||
779 | async_extent->start + | ||
780 | async_extent->ram_size - 1, | ||
781 | NULL, EXTENT_CLEAR_UNLOCK_PAGE | | ||
782 | EXTENT_CLEAR_UNLOCK | | ||
783 | EXTENT_CLEAR_DELALLOC | | ||
784 | EXTENT_CLEAR_DIRTY | | ||
785 | EXTENT_SET_WRITEBACK | | ||
786 | EXTENT_END_WRITEBACK); | ||
767 | kfree(async_extent); | 787 | kfree(async_extent); |
768 | goto out; | 788 | goto again; |
769 | } | 789 | } |
770 | 790 | ||
771 | static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | 791 | static u64 get_extent_allocation_hint(struct inode *inode, u64 start, |
@@ -892,6 +912,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans, | |||
892 | em->orig_start = em->start; | 912 | em->orig_start = em->start; |
893 | ram_size = ins.offset; | 913 | ram_size = ins.offset; |
894 | em->len = ins.offset; | 914 | em->len = ins.offset; |
915 | em->mod_start = em->start; | ||
916 | em->mod_len = em->len; | ||
895 | 917 | ||
896 | em->block_start = ins.objectid; | 918 | em->block_start = ins.objectid; |
897 | em->block_len = ins.offset; | 919 | em->block_len = ins.offset; |
@@ -1338,6 +1360,8 @@ out_check: | |||
1338 | em->block_start = disk_bytenr; | 1360 | em->block_start = disk_bytenr; |
1339 | em->orig_block_len = disk_num_bytes; | 1361 | em->orig_block_len = disk_num_bytes; |
1340 | em->bdev = root->fs_info->fs_devices->latest_bdev; | 1362 | em->bdev = root->fs_info->fs_devices->latest_bdev; |
1363 | em->mod_start = em->start; | ||
1364 | em->mod_len = em->len; | ||
1341 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | 1365 | set_bit(EXTENT_FLAG_PINNED, &em->flags); |
1342 | set_bit(EXTENT_FLAG_FILLING, &em->flags); | 1366 | set_bit(EXTENT_FLAG_FILLING, &em->flags); |
1343 | em->generation = -1; | 1367 | em->generation = -1; |
@@ -1508,14 +1532,22 @@ static void btrfs_set_bit_hook(struct inode *inode, | |||
1508 | spin_unlock(&BTRFS_I(inode)->lock); | 1532 | spin_unlock(&BTRFS_I(inode)->lock); |
1509 | } | 1533 | } |
1510 | 1534 | ||
1511 | spin_lock(&root->fs_info->delalloc_lock); | 1535 | __percpu_counter_add(&root->fs_info->delalloc_bytes, len, |
1536 | root->fs_info->delalloc_batch); | ||
1537 | spin_lock(&BTRFS_I(inode)->lock); | ||
1512 | BTRFS_I(inode)->delalloc_bytes += len; | 1538 | BTRFS_I(inode)->delalloc_bytes += len; |
1513 | root->fs_info->delalloc_bytes += len; | 1539 | if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
1514 | if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1540 | &BTRFS_I(inode)->runtime_flags)) { |
1515 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | 1541 | spin_lock(&root->fs_info->delalloc_lock); |
1516 | &root->fs_info->delalloc_inodes); | 1542 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1543 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | ||
1544 | &root->fs_info->delalloc_inodes); | ||
1545 | set_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
1546 | &BTRFS_I(inode)->runtime_flags); | ||
1547 | } | ||
1548 | spin_unlock(&root->fs_info->delalloc_lock); | ||
1517 | } | 1549 | } |
1518 | spin_unlock(&root->fs_info->delalloc_lock); | 1550 | spin_unlock(&BTRFS_I(inode)->lock); |
1519 | } | 1551 | } |
1520 | } | 1552 | } |
1521 | 1553 | ||
@@ -1550,15 +1582,22 @@ static void btrfs_clear_bit_hook(struct inode *inode, | |||
1550 | && do_list) | 1582 | && do_list) |
1551 | btrfs_free_reserved_data_space(inode, len); | 1583 | btrfs_free_reserved_data_space(inode, len); |
1552 | 1584 | ||
1553 | spin_lock(&root->fs_info->delalloc_lock); | 1585 | __percpu_counter_add(&root->fs_info->delalloc_bytes, -len, |
1554 | root->fs_info->delalloc_bytes -= len; | 1586 | root->fs_info->delalloc_batch); |
1587 | spin_lock(&BTRFS_I(inode)->lock); | ||
1555 | BTRFS_I(inode)->delalloc_bytes -= len; | 1588 | BTRFS_I(inode)->delalloc_bytes -= len; |
1556 | |||
1557 | if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && | 1589 | if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && |
1558 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1590 | test_bit(BTRFS_INODE_IN_DELALLOC_LIST, |
1559 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | 1591 | &BTRFS_I(inode)->runtime_flags)) { |
1592 | spin_lock(&root->fs_info->delalloc_lock); | ||
1593 | if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | ||
1594 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | ||
1595 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
1596 | &BTRFS_I(inode)->runtime_flags); | ||
1597 | } | ||
1598 | spin_unlock(&root->fs_info->delalloc_lock); | ||
1560 | } | 1599 | } |
1561 | spin_unlock(&root->fs_info->delalloc_lock); | 1600 | spin_unlock(&BTRFS_I(inode)->lock); |
1562 | } | 1601 | } |
1563 | } | 1602 | } |
1564 | 1603 | ||
@@ -2001,11 +2040,23 @@ out: | |||
2001 | if (trans) | 2040 | if (trans) |
2002 | btrfs_end_transaction(trans, root); | 2041 | btrfs_end_transaction(trans, root); |
2003 | 2042 | ||
2004 | if (ret) | 2043 | if (ret) { |
2005 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, | 2044 | clear_extent_uptodate(io_tree, ordered_extent->file_offset, |
2006 | ordered_extent->file_offset + | 2045 | ordered_extent->file_offset + |
2007 | ordered_extent->len - 1, NULL, GFP_NOFS); | 2046 | ordered_extent->len - 1, NULL, GFP_NOFS); |
2008 | 2047 | ||
2048 | /* | ||
2049 | * If the ordered extent had an IOERR or something else went | ||
2050 | * wrong we need to return the space for this ordered extent | ||
2051 | * back to the allocator. | ||
2052 | */ | ||
2053 | if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) && | ||
2054 | !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) | ||
2055 | btrfs_free_reserved_extent(root, ordered_extent->start, | ||
2056 | ordered_extent->disk_len); | ||
2057 | } | ||
2058 | |||
2059 | |||
2009 | /* | 2060 | /* |
2010 | * This needs to be done to make sure anybody waiting knows we are done | 2061 | * This needs to be done to make sure anybody waiting knows we are done |
2011 | * updating everything for this ordered extent. | 2062 | * updating everything for this ordered extent. |
@@ -2062,7 +2113,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2062 | static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 2113 | static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
2063 | struct extent_state *state, int mirror) | 2114 | struct extent_state *state, int mirror) |
2064 | { | 2115 | { |
2065 | size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); | 2116 | size_t offset = start - page_offset(page); |
2066 | struct inode *inode = page->mapping->host; | 2117 | struct inode *inode = page->mapping->host; |
2067 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 2118 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2068 | char *kaddr; | 2119 | char *kaddr; |
@@ -2167,11 +2218,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
2167 | } | 2218 | } |
2168 | } | 2219 | } |
2169 | 2220 | ||
2170 | enum btrfs_orphan_cleanup_state { | ||
2171 | ORPHAN_CLEANUP_STARTED = 1, | ||
2172 | ORPHAN_CLEANUP_DONE = 2, | ||
2173 | }; | ||
2174 | |||
2175 | /* | 2221 | /* |
2176 | * This is called in transaction commit time. If there are no orphan | 2222 | * This is called in transaction commit time. If there are no orphan |
2177 | * files in the subvolume, it removes orphan item and frees block_rsv | 2223 | * files in the subvolume, it removes orphan item and frees block_rsv |
@@ -2469,6 +2515,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2469 | */ | 2515 | */ |
2470 | set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, | 2516 | set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, |
2471 | &BTRFS_I(inode)->runtime_flags); | 2517 | &BTRFS_I(inode)->runtime_flags); |
2518 | atomic_inc(&root->orphan_inodes); | ||
2472 | 2519 | ||
2473 | /* if we have links, this was a truncate, lets do that */ | 2520 | /* if we have links, this was a truncate, lets do that */ |
2474 | if (inode->i_nlink) { | 2521 | if (inode->i_nlink) { |
@@ -2491,6 +2538,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2491 | goto out; | 2538 | goto out; |
2492 | 2539 | ||
2493 | ret = btrfs_truncate(inode); | 2540 | ret = btrfs_truncate(inode); |
2541 | if (ret) | ||
2542 | btrfs_orphan_del(NULL, inode); | ||
2494 | } else { | 2543 | } else { |
2495 | nr_unlink++; | 2544 | nr_unlink++; |
2496 | } | 2545 | } |
@@ -2709,34 +2758,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, | |||
2709 | struct btrfs_inode_item *item, | 2758 | struct btrfs_inode_item *item, |
2710 | struct inode *inode) | 2759 | struct inode *inode) |
2711 | { | 2760 | { |
2712 | btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); | 2761 | struct btrfs_map_token token; |
2713 | btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); | 2762 | |
2714 | btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); | 2763 | btrfs_init_map_token(&token); |
2715 | btrfs_set_inode_mode(leaf, item, inode->i_mode); | ||
2716 | btrfs_set_inode_nlink(leaf, item, inode->i_nlink); | ||
2717 | 2764 | ||
2718 | btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), | 2765 | btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token); |
2719 | inode->i_atime.tv_sec); | 2766 | btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token); |
2720 | btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), | 2767 | btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size, |
2721 | inode->i_atime.tv_nsec); | 2768 | &token); |
2769 | btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token); | ||
2770 | btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token); | ||
2722 | 2771 | ||
2723 | btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), | 2772 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item), |
2724 | inode->i_mtime.tv_sec); | 2773 | inode->i_atime.tv_sec, &token); |
2725 | btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), | 2774 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item), |
2726 | inode->i_mtime.tv_nsec); | 2775 | inode->i_atime.tv_nsec, &token); |
2727 | 2776 | ||
2728 | btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), | 2777 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item), |
2729 | inode->i_ctime.tv_sec); | 2778 | inode->i_mtime.tv_sec, &token); |
2730 | btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), | 2779 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item), |
2731 | inode->i_ctime.tv_nsec); | 2780 | inode->i_mtime.tv_nsec, &token); |
2732 | 2781 | ||
2733 | btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); | 2782 | btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item), |
2734 | btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); | 2783 | inode->i_ctime.tv_sec, &token); |
2735 | btrfs_set_inode_sequence(leaf, item, inode->i_version); | 2784 | btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item), |
2736 | btrfs_set_inode_transid(leaf, item, trans->transid); | 2785 | inode->i_ctime.tv_nsec, &token); |
2737 | btrfs_set_inode_rdev(leaf, item, inode->i_rdev); | 2786 | |
2738 | btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); | 2787 | btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode), |
2739 | btrfs_set_inode_block_group(leaf, item, 0); | 2788 | &token); |
2789 | btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation, | ||
2790 | &token); | ||
2791 | btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token); | ||
2792 | btrfs_set_token_inode_transid(leaf, item, trans->transid, &token); | ||
2793 | btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token); | ||
2794 | btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token); | ||
2795 | btrfs_set_token_inode_block_group(leaf, item, 0, &token); | ||
2740 | } | 2796 | } |
2741 | 2797 | ||
2742 | /* | 2798 | /* |
@@ -3832,6 +3888,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr) | |||
3832 | 3888 | ||
3833 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ | 3889 | /* we don't support swapfiles, so vmtruncate shouldn't fail */ |
3834 | truncate_setsize(inode, newsize); | 3890 | truncate_setsize(inode, newsize); |
3891 | |||
3892 | /* Disable nonlocked read DIO to avoid the end less truncate */ | ||
3893 | btrfs_inode_block_unlocked_dio(inode); | ||
3894 | inode_dio_wait(inode); | ||
3895 | btrfs_inode_resume_unlocked_dio(inode); | ||
3896 | |||
3835 | ret = btrfs_truncate(inode); | 3897 | ret = btrfs_truncate(inode); |
3836 | if (ret && inode->i_nlink) | 3898 | if (ret && inode->i_nlink) |
3837 | btrfs_orphan_del(NULL, inode); | 3899 | btrfs_orphan_del(NULL, inode); |
@@ -3904,6 +3966,12 @@ void btrfs_evict_inode(struct inode *inode) | |||
3904 | goto no_delete; | 3966 | goto no_delete; |
3905 | } | 3967 | } |
3906 | 3968 | ||
3969 | ret = btrfs_commit_inode_delayed_inode(inode); | ||
3970 | if (ret) { | ||
3971 | btrfs_orphan_del(NULL, inode); | ||
3972 | goto no_delete; | ||
3973 | } | ||
3974 | |||
3907 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); | 3975 | rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); |
3908 | if (!rsv) { | 3976 | if (!rsv) { |
3909 | btrfs_orphan_del(NULL, inode); | 3977 | btrfs_orphan_del(NULL, inode); |
@@ -3941,7 +4009,7 @@ void btrfs_evict_inode(struct inode *inode) | |||
3941 | goto no_delete; | 4009 | goto no_delete; |
3942 | } | 4010 | } |
3943 | 4011 | ||
3944 | trans = btrfs_start_transaction_lflush(root, 1); | 4012 | trans = btrfs_join_transaction(root); |
3945 | if (IS_ERR(trans)) { | 4013 | if (IS_ERR(trans)) { |
3946 | btrfs_orphan_del(NULL, inode); | 4014 | btrfs_orphan_del(NULL, inode); |
3947 | btrfs_free_block_rsv(root, rsv); | 4015 | btrfs_free_block_rsv(root, rsv); |
@@ -3955,9 +4023,6 @@ void btrfs_evict_inode(struct inode *inode) | |||
3955 | break; | 4023 | break; |
3956 | 4024 | ||
3957 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 4025 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
3958 | ret = btrfs_update_inode(trans, root, inode); | ||
3959 | BUG_ON(ret); | ||
3960 | |||
3961 | btrfs_end_transaction(trans, root); | 4026 | btrfs_end_transaction(trans, root); |
3962 | trans = NULL; | 4027 | trans = NULL; |
3963 | btrfs_btree_balance_dirty(root); | 4028 | btrfs_btree_balance_dirty(root); |
@@ -5006,12 +5071,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
5006 | goto out_unlock; | 5071 | goto out_unlock; |
5007 | } | 5072 | } |
5008 | 5073 | ||
5009 | err = btrfs_update_inode(trans, root, inode); | ||
5010 | if (err) { | ||
5011 | drop_inode = 1; | ||
5012 | goto out_unlock; | ||
5013 | } | ||
5014 | |||
5015 | /* | 5074 | /* |
5016 | * If the active LSM wants to access the inode during | 5075 | * If the active LSM wants to access the inode during |
5017 | * d_instantiate it needs these. Smack checks to see | 5076 | * d_instantiate it needs these. Smack checks to see |
@@ -5949,6 +6008,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start, | |||
5949 | 6008 | ||
5950 | em->start = start; | 6009 | em->start = start; |
5951 | em->orig_start = orig_start; | 6010 | em->orig_start = orig_start; |
6011 | em->mod_start = start; | ||
6012 | em->mod_len = len; | ||
5952 | em->len = len; | 6013 | em->len = len; |
5953 | em->block_len = block_len; | 6014 | em->block_len = block_len; |
5954 | em->block_start = block_start; | 6015 | em->block_start = block_start; |
@@ -5990,16 +6051,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5990 | u64 len = bh_result->b_size; | 6051 | u64 len = bh_result->b_size; |
5991 | struct btrfs_trans_handle *trans; | 6052 | struct btrfs_trans_handle *trans; |
5992 | int unlock_bits = EXTENT_LOCKED; | 6053 | int unlock_bits = EXTENT_LOCKED; |
5993 | int ret; | 6054 | int ret = 0; |
5994 | 6055 | ||
5995 | if (create) { | 6056 | if (create) { |
5996 | ret = btrfs_delalloc_reserve_space(inode, len); | 6057 | spin_lock(&BTRFS_I(inode)->lock); |
5997 | if (ret) | 6058 | BTRFS_I(inode)->outstanding_extents++; |
5998 | return ret; | 6059 | spin_unlock(&BTRFS_I(inode)->lock); |
5999 | unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; | 6060 | unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; |
6000 | } else { | 6061 | } else |
6001 | len = min_t(u64, len, root->sectorsize); | 6062 | len = min_t(u64, len, root->sectorsize); |
6002 | } | ||
6003 | 6063 | ||
6004 | lockstart = start; | 6064 | lockstart = start; |
6005 | lockend = start + len - 1; | 6065 | lockend = start + len - 1; |
@@ -6011,14 +6071,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
6011 | if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) | 6071 | if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) |
6012 | return -ENOTBLK; | 6072 | return -ENOTBLK; |
6013 | 6073 | ||
6014 | if (create) { | ||
6015 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6016 | lockend, EXTENT_DELALLOC, NULL, | ||
6017 | &cached_state, GFP_NOFS); | ||
6018 | if (ret) | ||
6019 | goto unlock_err; | ||
6020 | } | ||
6021 | |||
6022 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 6074 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
6023 | if (IS_ERR(em)) { | 6075 | if (IS_ERR(em)) { |
6024 | ret = PTR_ERR(em); | 6076 | ret = PTR_ERR(em); |
@@ -6050,7 +6102,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
6050 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | 6102 | if (!create && (em->block_start == EXTENT_MAP_HOLE || |
6051 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 6103 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
6052 | free_extent_map(em); | 6104 | free_extent_map(em); |
6053 | ret = 0; | ||
6054 | goto unlock_err; | 6105 | goto unlock_err; |
6055 | } | 6106 | } |
6056 | 6107 | ||
@@ -6148,6 +6199,11 @@ unlock: | |||
6148 | */ | 6199 | */ |
6149 | if (start + len > i_size_read(inode)) | 6200 | if (start + len > i_size_read(inode)) |
6150 | i_size_write(inode, start + len); | 6201 | i_size_write(inode, start + len); |
6202 | |||
6203 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6204 | lockstart + len - 1, EXTENT_DELALLOC, NULL, | ||
6205 | &cached_state, GFP_NOFS); | ||
6206 | BUG_ON(ret); | ||
6151 | } | 6207 | } |
6152 | 6208 | ||
6153 | /* | 6209 | /* |
@@ -6156,24 +6212,9 @@ unlock: | |||
6156 | * aren't using if there is any left over space. | 6212 | * aren't using if there is any left over space. |
6157 | */ | 6213 | */ |
6158 | if (lockstart < lockend) { | 6214 | if (lockstart < lockend) { |
6159 | if (create && len < lockend - lockstart) { | 6215 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, |
6160 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | 6216 | lockend, unlock_bits, 1, 0, |
6161 | lockstart + len - 1, | 6217 | &cached_state, GFP_NOFS); |
6162 | unlock_bits | EXTENT_DEFRAG, 1, 0, | ||
6163 | &cached_state, GFP_NOFS); | ||
6164 | /* | ||
6165 | * Beside unlock, we also need to cleanup reserved space | ||
6166 | * for the left range by attaching EXTENT_DO_ACCOUNTING. | ||
6167 | */ | ||
6168 | clear_extent_bit(&BTRFS_I(inode)->io_tree, | ||
6169 | lockstart + len, lockend, | ||
6170 | unlock_bits | EXTENT_DO_ACCOUNTING | | ||
6171 | EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS); | ||
6172 | } else { | ||
6173 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6174 | lockend, unlock_bits, 1, 0, | ||
6175 | &cached_state, GFP_NOFS); | ||
6176 | } | ||
6177 | } else { | 6218 | } else { |
6178 | free_extent_state(cached_state); | 6219 | free_extent_state(cached_state); |
6179 | } | 6220 | } |
@@ -6183,9 +6224,6 @@ unlock: | |||
6183 | return 0; | 6224 | return 0; |
6184 | 6225 | ||
6185 | unlock_err: | 6226 | unlock_err: |
6186 | if (create) | ||
6187 | unlock_bits |= EXTENT_DO_ACCOUNTING; | ||
6188 | |||
6189 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 6227 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
6190 | unlock_bits, 1, 0, &cached_state, GFP_NOFS); | 6228 | unlock_bits, 1, 0, &cached_state, GFP_NOFS); |
6191 | return ret; | 6229 | return ret; |
@@ -6623,15 +6661,63 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
6623 | { | 6661 | { |
6624 | struct file *file = iocb->ki_filp; | 6662 | struct file *file = iocb->ki_filp; |
6625 | struct inode *inode = file->f_mapping->host; | 6663 | struct inode *inode = file->f_mapping->host; |
6664 | size_t count = 0; | ||
6665 | int flags = 0; | ||
6666 | bool wakeup = true; | ||
6667 | bool relock = false; | ||
6668 | ssize_t ret; | ||
6626 | 6669 | ||
6627 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | 6670 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, |
6628 | offset, nr_segs)) | 6671 | offset, nr_segs)) |
6629 | return 0; | 6672 | return 0; |
6630 | 6673 | ||
6631 | return __blockdev_direct_IO(rw, iocb, inode, | 6674 | atomic_inc(&inode->i_dio_count); |
6632 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | 6675 | smp_mb__after_atomic_inc(); |
6633 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | 6676 | |
6634 | btrfs_submit_direct, 0); | 6677 | if (rw & WRITE) { |
6678 | count = iov_length(iov, nr_segs); | ||
6679 | /* | ||
6680 | * If the write DIO is beyond the EOF, we need update | ||
6681 | * the isize, but it is protected by i_mutex. So we can | ||
6682 | * not unlock the i_mutex at this case. | ||
6683 | */ | ||
6684 | if (offset + count <= inode->i_size) { | ||
6685 | mutex_unlock(&inode->i_mutex); | ||
6686 | relock = true; | ||
6687 | } | ||
6688 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
6689 | if (ret) | ||
6690 | goto out; | ||
6691 | } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK, | ||
6692 | &BTRFS_I(inode)->runtime_flags))) { | ||
6693 | inode_dio_done(inode); | ||
6694 | flags = DIO_LOCKING | DIO_SKIP_HOLES; | ||
6695 | wakeup = false; | ||
6696 | } | ||
6697 | |||
6698 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
6699 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | ||
6700 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | ||
6701 | btrfs_submit_direct, flags); | ||
6702 | if (rw & WRITE) { | ||
6703 | if (ret < 0 && ret != -EIOCBQUEUED) | ||
6704 | btrfs_delalloc_release_space(inode, count); | ||
6705 | else if (ret > 0 && (size_t)ret < count) { | ||
6706 | spin_lock(&BTRFS_I(inode)->lock); | ||
6707 | BTRFS_I(inode)->outstanding_extents++; | ||
6708 | spin_unlock(&BTRFS_I(inode)->lock); | ||
6709 | btrfs_delalloc_release_space(inode, | ||
6710 | count - (size_t)ret); | ||
6711 | } | ||
6712 | btrfs_delalloc_release_metadata(inode, 0); | ||
6713 | } | ||
6714 | out: | ||
6715 | if (wakeup) | ||
6716 | inode_dio_done(inode); | ||
6717 | if (relock) | ||
6718 | mutex_lock(&inode->i_mutex); | ||
6719 | |||
6720 | return ret; | ||
6635 | } | 6721 | } |
6636 | 6722 | ||
6637 | #define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) | 6723 | #define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) |
@@ -6735,8 +6821,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset) | |||
6735 | return; | 6821 | return; |
6736 | } | 6822 | } |
6737 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state); | 6823 | lock_extent_bits(tree, page_start, page_end, 0, &cached_state); |
6738 | ordered = btrfs_lookup_ordered_extent(inode, | 6824 | ordered = btrfs_lookup_ordered_extent(inode, page_offset(page)); |
6739 | page_offset(page)); | ||
6740 | if (ordered) { | 6825 | if (ordered) { |
6741 | /* | 6826 | /* |
6742 | * IO on this page will never be started, so we need | 6827 | * IO on this page will never be started, so we need |
@@ -7216,8 +7301,9 @@ int btrfs_drop_inode(struct inode *inode) | |||
7216 | { | 7301 | { |
7217 | struct btrfs_root *root = BTRFS_I(inode)->root; | 7302 | struct btrfs_root *root = BTRFS_I(inode)->root; |
7218 | 7303 | ||
7304 | /* the snap/subvol tree is on deleting */ | ||
7219 | if (btrfs_root_refs(&root->root_item) == 0 && | 7305 | if (btrfs_root_refs(&root->root_item) == 0 && |
7220 | !btrfs_is_free_space_inode(inode)) | 7306 | root != root->fs_info->tree_root) |
7221 | return 1; | 7307 | return 1; |
7222 | else | 7308 | else |
7223 | return generic_drop_inode(inode); | 7309 | return generic_drop_inode(inode); |
@@ -7299,14 +7385,19 @@ fail: | |||
7299 | static int btrfs_getattr(struct vfsmount *mnt, | 7385 | static int btrfs_getattr(struct vfsmount *mnt, |
7300 | struct dentry *dentry, struct kstat *stat) | 7386 | struct dentry *dentry, struct kstat *stat) |
7301 | { | 7387 | { |
7388 | u64 delalloc_bytes; | ||
7302 | struct inode *inode = dentry->d_inode; | 7389 | struct inode *inode = dentry->d_inode; |
7303 | u32 blocksize = inode->i_sb->s_blocksize; | 7390 | u32 blocksize = inode->i_sb->s_blocksize; |
7304 | 7391 | ||
7305 | generic_fillattr(inode, stat); | 7392 | generic_fillattr(inode, stat); |
7306 | stat->dev = BTRFS_I(inode)->root->anon_dev; | 7393 | stat->dev = BTRFS_I(inode)->root->anon_dev; |
7307 | stat->blksize = PAGE_CACHE_SIZE; | 7394 | stat->blksize = PAGE_CACHE_SIZE; |
7395 | |||
7396 | spin_lock(&BTRFS_I(inode)->lock); | ||
7397 | delalloc_bytes = BTRFS_I(inode)->delalloc_bytes; | ||
7398 | spin_unlock(&BTRFS_I(inode)->lock); | ||
7308 | stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + | 7399 | stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + |
7309 | ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9; | 7400 | ALIGN(delalloc_bytes, blocksize)) >> 9; |
7310 | return 0; | 7401 | return 0; |
7311 | } | 7402 | } |
7312 | 7403 | ||
@@ -7583,7 +7674,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
7583 | 7674 | ||
7584 | INIT_LIST_HEAD(&works); | 7675 | INIT_LIST_HEAD(&works); |
7585 | INIT_LIST_HEAD(&splice); | 7676 | INIT_LIST_HEAD(&splice); |
7586 | again: | 7677 | |
7587 | spin_lock(&root->fs_info->delalloc_lock); | 7678 | spin_lock(&root->fs_info->delalloc_lock); |
7588 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); | 7679 | list_splice_init(&root->fs_info->delalloc_inodes, &splice); |
7589 | while (!list_empty(&splice)) { | 7680 | while (!list_empty(&splice)) { |
@@ -7593,8 +7684,11 @@ again: | |||
7593 | list_del_init(&binode->delalloc_inodes); | 7684 | list_del_init(&binode->delalloc_inodes); |
7594 | 7685 | ||
7595 | inode = igrab(&binode->vfs_inode); | 7686 | inode = igrab(&binode->vfs_inode); |
7596 | if (!inode) | 7687 | if (!inode) { |
7688 | clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, | ||
7689 | &binode->runtime_flags); | ||
7597 | continue; | 7690 | continue; |
7691 | } | ||
7598 | 7692 | ||
7599 | list_add_tail(&binode->delalloc_inodes, | 7693 | list_add_tail(&binode->delalloc_inodes, |
7600 | &root->fs_info->delalloc_inodes); | 7694 | &root->fs_info->delalloc_inodes); |
@@ -7619,13 +7713,6 @@ again: | |||
7619 | btrfs_wait_and_free_delalloc_work(work); | 7713 | btrfs_wait_and_free_delalloc_work(work); |
7620 | } | 7714 | } |
7621 | 7715 | ||
7622 | spin_lock(&root->fs_info->delalloc_lock); | ||
7623 | if (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
7624 | spin_unlock(&root->fs_info->delalloc_lock); | ||
7625 | goto again; | ||
7626 | } | ||
7627 | spin_unlock(&root->fs_info->delalloc_lock); | ||
7628 | |||
7629 | /* the filemap_flush will queue IO into the worker threads, but | 7716 | /* the filemap_flush will queue IO into the worker threads, but |
7630 | * we have to make sure the IO is actually started and that | 7717 | * we have to make sure the IO is actually started and that |
7631 | * ordered extents get created before we return | 7718 | * ordered extents get created before we return |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 338f2597bf7f..059546aa8fdf 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -42,12 +42,12 @@ | |||
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/blkdev.h> | 43 | #include <linux/blkdev.h> |
44 | #include <linux/uuid.h> | 44 | #include <linux/uuid.h> |
45 | #include <linux/btrfs.h> | ||
45 | #include "compat.h" | 46 | #include "compat.h" |
46 | #include "ctree.h" | 47 | #include "ctree.h" |
47 | #include "disk-io.h" | 48 | #include "disk-io.h" |
48 | #include "transaction.h" | 49 | #include "transaction.h" |
49 | #include "btrfs_inode.h" | 50 | #include "btrfs_inode.h" |
50 | #include "ioctl.h" | ||
51 | #include "print-tree.h" | 51 | #include "print-tree.h" |
52 | #include "volumes.h" | 52 | #include "volumes.h" |
53 | #include "locking.h" | 53 | #include "locking.h" |
@@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
367 | struct dentry *dentry, | 367 | struct dentry *dentry, |
368 | char *name, int namelen, | 368 | char *name, int namelen, |
369 | u64 *async_transid, | 369 | u64 *async_transid, |
370 | struct btrfs_qgroup_inherit **inherit) | 370 | struct btrfs_qgroup_inherit *inherit) |
371 | { | 371 | { |
372 | struct btrfs_trans_handle *trans; | 372 | struct btrfs_trans_handle *trans; |
373 | struct btrfs_key key; | 373 | struct btrfs_key key; |
@@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
401 | if (IS_ERR(trans)) | 401 | if (IS_ERR(trans)) |
402 | return PTR_ERR(trans); | 402 | return PTR_ERR(trans); |
403 | 403 | ||
404 | ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, | 404 | ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, inherit); |
405 | inherit ? *inherit : NULL); | ||
406 | if (ret) | 405 | if (ret) |
407 | goto fail; | 406 | goto fail; |
408 | 407 | ||
@@ -533,7 +532,7 @@ fail: | |||
533 | 532 | ||
534 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 533 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, |
535 | char *name, int namelen, u64 *async_transid, | 534 | char *name, int namelen, u64 *async_transid, |
536 | bool readonly, struct btrfs_qgroup_inherit **inherit) | 535 | bool readonly, struct btrfs_qgroup_inherit *inherit) |
537 | { | 536 | { |
538 | struct inode *inode; | 537 | struct inode *inode; |
539 | struct btrfs_pending_snapshot *pending_snapshot; | 538 | struct btrfs_pending_snapshot *pending_snapshot; |
@@ -552,10 +551,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
552 | pending_snapshot->dentry = dentry; | 551 | pending_snapshot->dentry = dentry; |
553 | pending_snapshot->root = root; | 552 | pending_snapshot->root = root; |
554 | pending_snapshot->readonly = readonly; | 553 | pending_snapshot->readonly = readonly; |
555 | if (inherit) { | 554 | pending_snapshot->inherit = inherit; |
556 | pending_snapshot->inherit = *inherit; | ||
557 | *inherit = NULL; /* take responsibility to free it */ | ||
558 | } | ||
559 | 555 | ||
560 | trans = btrfs_start_transaction(root->fs_info->extent_root, 6); | 556 | trans = btrfs_start_transaction(root->fs_info->extent_root, 6); |
561 | if (IS_ERR(trans)) { | 557 | if (IS_ERR(trans)) { |
@@ -695,7 +691,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
695 | char *name, int namelen, | 691 | char *name, int namelen, |
696 | struct btrfs_root *snap_src, | 692 | struct btrfs_root *snap_src, |
697 | u64 *async_transid, bool readonly, | 693 | u64 *async_transid, bool readonly, |
698 | struct btrfs_qgroup_inherit **inherit) | 694 | struct btrfs_qgroup_inherit *inherit) |
699 | { | 695 | { |
700 | struct inode *dir = parent->dentry->d_inode; | 696 | struct inode *dir = parent->dentry->d_inode; |
701 | struct dentry *dentry; | 697 | struct dentry *dentry; |
@@ -818,7 +814,7 @@ static int find_new_extents(struct btrfs_root *root, | |||
818 | 814 | ||
819 | while(1) { | 815 | while(1) { |
820 | ret = btrfs_search_forward(root, &min_key, &max_key, | 816 | ret = btrfs_search_forward(root, &min_key, &max_key, |
821 | path, 0, newer_than); | 817 | path, newer_than); |
822 | if (ret != 0) | 818 | if (ret != 0) |
823 | goto none; | 819 | goto none; |
824 | if (min_key.objectid != ino) | 820 | if (min_key.objectid != ino) |
@@ -1206,6 +1202,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1206 | if (!(inode->i_sb->s_flags & MS_ACTIVE)) | 1202 | if (!(inode->i_sb->s_flags & MS_ACTIVE)) |
1207 | break; | 1203 | break; |
1208 | 1204 | ||
1205 | if (btrfs_defrag_cancelled(root->fs_info)) { | ||
1206 | printk(KERN_DEBUG "btrfs: defrag_file cancelled\n"); | ||
1207 | ret = -EAGAIN; | ||
1208 | break; | ||
1209 | } | ||
1210 | |||
1209 | if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, | 1211 | if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, |
1210 | extent_thresh, &last_len, &skip, | 1212 | extent_thresh, &last_len, &skip, |
1211 | &defrag_end, range->flags & | 1213 | &defrag_end, range->flags & |
@@ -1329,9 +1331,6 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1329 | int ret = 0; | 1331 | int ret = 0; |
1330 | int mod = 0; | 1332 | int mod = 0; |
1331 | 1333 | ||
1332 | if (root->fs_info->sb->s_flags & MS_RDONLY) | ||
1333 | return -EROFS; | ||
1334 | |||
1335 | if (!capable(CAP_SYS_ADMIN)) | 1334 | if (!capable(CAP_SYS_ADMIN)) |
1336 | return -EPERM; | 1335 | return -EPERM; |
1337 | 1336 | ||
@@ -1363,6 +1362,10 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1363 | *devstr = '\0'; | 1362 | *devstr = '\0'; |
1364 | devstr = vol_args->name; | 1363 | devstr = vol_args->name; |
1365 | devid = simple_strtoull(devstr, &end, 10); | 1364 | devid = simple_strtoull(devstr, &end, 10); |
1365 | if (!devid) { | ||
1366 | ret = -EINVAL; | ||
1367 | goto out_free; | ||
1368 | } | ||
1366 | printk(KERN_INFO "btrfs: resizing devid %llu\n", | 1369 | printk(KERN_INFO "btrfs: resizing devid %llu\n", |
1367 | (unsigned long long)devid); | 1370 | (unsigned long long)devid); |
1368 | } | 1371 | } |
@@ -1371,7 +1374,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1371 | if (!device) { | 1374 | if (!device) { |
1372 | printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", | 1375 | printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", |
1373 | (unsigned long long)devid); | 1376 | (unsigned long long)devid); |
1374 | ret = -EINVAL; | 1377 | ret = -ENODEV; |
1375 | goto out_free; | 1378 | goto out_free; |
1376 | } | 1379 | } |
1377 | 1380 | ||
@@ -1379,7 +1382,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1379 | printk(KERN_INFO "btrfs: resizer unable to apply on " | 1382 | printk(KERN_INFO "btrfs: resizer unable to apply on " |
1380 | "readonly device %llu\n", | 1383 | "readonly device %llu\n", |
1381 | (unsigned long long)devid); | 1384 | (unsigned long long)devid); |
1382 | ret = -EINVAL; | 1385 | ret = -EPERM; |
1383 | goto out_free; | 1386 | goto out_free; |
1384 | } | 1387 | } |
1385 | 1388 | ||
@@ -1401,7 +1404,7 @@ static noinline int btrfs_ioctl_resize(struct file *file, | |||
1401 | } | 1404 | } |
1402 | 1405 | ||
1403 | if (device->is_tgtdev_for_dev_replace) { | 1406 | if (device->is_tgtdev_for_dev_replace) { |
1404 | ret = -EINVAL; | 1407 | ret = -EPERM; |
1405 | goto out_free; | 1408 | goto out_free; |
1406 | } | 1409 | } |
1407 | 1410 | ||
@@ -1457,7 +1460,7 @@ out: | |||
1457 | static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | 1460 | static noinline int btrfs_ioctl_snap_create_transid(struct file *file, |
1458 | char *name, unsigned long fd, int subvol, | 1461 | char *name, unsigned long fd, int subvol, |
1459 | u64 *transid, bool readonly, | 1462 | u64 *transid, bool readonly, |
1460 | struct btrfs_qgroup_inherit **inherit) | 1463 | struct btrfs_qgroup_inherit *inherit) |
1461 | { | 1464 | { |
1462 | int namelen; | 1465 | int namelen; |
1463 | int ret = 0; | 1466 | int ret = 0; |
@@ -1566,7 +1569,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file, | |||
1566 | 1569 | ||
1567 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, | 1570 | ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, |
1568 | vol_args->fd, subvol, ptr, | 1571 | vol_args->fd, subvol, ptr, |
1569 | readonly, &inherit); | 1572 | readonly, inherit); |
1570 | 1573 | ||
1571 | if (ret == 0 && ptr && | 1574 | if (ret == 0 && ptr && |
1572 | copy_to_user(arg + | 1575 | copy_to_user(arg + |
@@ -1863,7 +1866,7 @@ static noinline int search_ioctl(struct inode *inode, | |||
1863 | path->keep_locks = 1; | 1866 | path->keep_locks = 1; |
1864 | 1867 | ||
1865 | while(1) { | 1868 | while(1) { |
1866 | ret = btrfs_search_forward(root, &key, &max_key, path, 0, | 1869 | ret = btrfs_search_forward(root, &key, &max_key, path, |
1867 | sk->min_transid); | 1870 | sk->min_transid); |
1868 | if (ret != 0) { | 1871 | if (ret != 0) { |
1869 | if (ret > 0) | 1872 | if (ret > 0) |
@@ -2171,6 +2174,12 @@ out_unlock: | |||
2171 | shrink_dcache_sb(root->fs_info->sb); | 2174 | shrink_dcache_sb(root->fs_info->sb); |
2172 | btrfs_invalidate_inodes(dest); | 2175 | btrfs_invalidate_inodes(dest); |
2173 | d_delete(dentry); | 2176 | d_delete(dentry); |
2177 | |||
2178 | /* the last ref */ | ||
2179 | if (dest->cache_inode) { | ||
2180 | iput(dest->cache_inode); | ||
2181 | dest->cache_inode = NULL; | ||
2182 | } | ||
2174 | } | 2183 | } |
2175 | out_dput: | 2184 | out_dput: |
2176 | dput(dentry); | 2185 | dput(dentry); |
@@ -2211,10 +2220,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
2211 | ret = -EPERM; | 2220 | ret = -EPERM; |
2212 | goto out; | 2221 | goto out; |
2213 | } | 2222 | } |
2214 | ret = btrfs_defrag_root(root, 0); | 2223 | ret = btrfs_defrag_root(root); |
2215 | if (ret) | 2224 | if (ret) |
2216 | goto out; | 2225 | goto out; |
2217 | ret = btrfs_defrag_root(root->fs_info->extent_root, 0); | 2226 | ret = btrfs_defrag_root(root->fs_info->extent_root); |
2218 | break; | 2227 | break; |
2219 | case S_IFREG: | 2228 | case S_IFREG: |
2220 | if (!(file->f_mode & FMODE_WRITE)) { | 2229 | if (!(file->f_mode & FMODE_WRITE)) { |
@@ -3111,7 +3120,7 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, | |||
3111 | u64 transid; | 3120 | u64 transid; |
3112 | int ret; | 3121 | int ret; |
3113 | 3122 | ||
3114 | trans = btrfs_attach_transaction(root); | 3123 | trans = btrfs_attach_transaction_barrier(root); |
3115 | if (IS_ERR(trans)) { | 3124 | if (IS_ERR(trans)) { |
3116 | if (PTR_ERR(trans) != -ENOENT) | 3125 | if (PTR_ERR(trans) != -ENOENT) |
3117 | return PTR_ERR(trans); | 3126 | return PTR_ERR(trans); |
@@ -3289,7 +3298,7 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) | |||
3289 | struct inode_fs_paths *ipath = NULL; | 3298 | struct inode_fs_paths *ipath = NULL; |
3290 | struct btrfs_path *path; | 3299 | struct btrfs_path *path; |
3291 | 3300 | ||
3292 | if (!capable(CAP_SYS_ADMIN)) | 3301 | if (!capable(CAP_DAC_READ_SEARCH)) |
3293 | return -EPERM; | 3302 | return -EPERM; |
3294 | 3303 | ||
3295 | path = btrfs_alloc_path(); | 3304 | path = btrfs_alloc_path(); |
@@ -3914,6 +3923,65 @@ out: | |||
3914 | return ret; | 3923 | return ret; |
3915 | } | 3924 | } |
3916 | 3925 | ||
3926 | static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) | ||
3927 | { | ||
3928 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
3929 | const char *label = root->fs_info->super_copy->label; | ||
3930 | size_t len = strnlen(label, BTRFS_LABEL_SIZE); | ||
3931 | int ret; | ||
3932 | |||
3933 | if (len == BTRFS_LABEL_SIZE) { | ||
3934 | pr_warn("btrfs: label is too long, return the first %zu bytes\n", | ||
3935 | --len); | ||
3936 | } | ||
3937 | |||
3938 | mutex_lock(&root->fs_info->volume_mutex); | ||
3939 | ret = copy_to_user(arg, label, len); | ||
3940 | mutex_unlock(&root->fs_info->volume_mutex); | ||
3941 | |||
3942 | return ret ? -EFAULT : 0; | ||
3943 | } | ||
3944 | |||
3945 | static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) | ||
3946 | { | ||
3947 | struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; | ||
3948 | struct btrfs_super_block *super_block = root->fs_info->super_copy; | ||
3949 | struct btrfs_trans_handle *trans; | ||
3950 | char label[BTRFS_LABEL_SIZE]; | ||
3951 | int ret; | ||
3952 | |||
3953 | if (!capable(CAP_SYS_ADMIN)) | ||
3954 | return -EPERM; | ||
3955 | |||
3956 | if (copy_from_user(label, arg, sizeof(label))) | ||
3957 | return -EFAULT; | ||
3958 | |||
3959 | if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { | ||
3960 | pr_err("btrfs: unable to set label with more than %d bytes\n", | ||
3961 | BTRFS_LABEL_SIZE - 1); | ||
3962 | return -EINVAL; | ||
3963 | } | ||
3964 | |||
3965 | ret = mnt_want_write_file(file); | ||
3966 | if (ret) | ||
3967 | return ret; | ||
3968 | |||
3969 | mutex_lock(&root->fs_info->volume_mutex); | ||
3970 | trans = btrfs_start_transaction(root, 0); | ||
3971 | if (IS_ERR(trans)) { | ||
3972 | ret = PTR_ERR(trans); | ||
3973 | goto out_unlock; | ||
3974 | } | ||
3975 | |||
3976 | strcpy(super_block->label, label); | ||
3977 | ret = btrfs_end_transaction(trans, root); | ||
3978 | |||
3979 | out_unlock: | ||
3980 | mutex_unlock(&root->fs_info->volume_mutex); | ||
3981 | mnt_drop_write_file(file); | ||
3982 | return ret; | ||
3983 | } | ||
3984 | |||
3917 | long btrfs_ioctl(struct file *file, unsigned int | 3985 | long btrfs_ioctl(struct file *file, unsigned int |
3918 | cmd, unsigned long arg) | 3986 | cmd, unsigned long arg) |
3919 | { | 3987 | { |
@@ -4014,6 +4082,10 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
4014 | return btrfs_ioctl_qgroup_limit(file, argp); | 4082 | return btrfs_ioctl_qgroup_limit(file, argp); |
4015 | case BTRFS_IOC_DEV_REPLACE: | 4083 | case BTRFS_IOC_DEV_REPLACE: |
4016 | return btrfs_ioctl_dev_replace(root, argp); | 4084 | return btrfs_ioctl_dev_replace(root, argp); |
4085 | case BTRFS_IOC_GET_FSLABEL: | ||
4086 | return btrfs_ioctl_get_fslabel(file, argp); | ||
4087 | case BTRFS_IOC_SET_FSLABEL: | ||
4088 | return btrfs_ioctl_set_fslabel(file, argp); | ||
4017 | } | 4089 | } |
4018 | 4090 | ||
4019 | return -ENOTTY; | 4091 | return -ENOTTY; |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 2a1762c66041..e95df435d897 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -113,11 +113,10 @@ again: | |||
113 | read_unlock(&eb->lock); | 113 | read_unlock(&eb->lock); |
114 | return; | 114 | return; |
115 | } | 115 | } |
116 | read_unlock(&eb->lock); | ||
117 | wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); | ||
118 | read_lock(&eb->lock); | ||
119 | if (atomic_read(&eb->blocking_writers)) { | 116 | if (atomic_read(&eb->blocking_writers)) { |
120 | read_unlock(&eb->lock); | 117 | read_unlock(&eb->lock); |
118 | wait_event(eb->write_lock_wq, | ||
119 | atomic_read(&eb->blocking_writers) == 0); | ||
121 | goto again; | 120 | goto again; |
122 | } | 121 | } |
123 | atomic_inc(&eb->read_locks); | 122 | atomic_inc(&eb->read_locks); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index e5ed56729607..dc08d77b717e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -196,6 +196,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
196 | entry->file_offset = file_offset; | 196 | entry->file_offset = file_offset; |
197 | entry->start = start; | 197 | entry->start = start; |
198 | entry->len = len; | 198 | entry->len = len; |
199 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) && | ||
200 | !(type == BTRFS_ORDERED_NOCOW)) | ||
201 | entry->csum_bytes_left = disk_len; | ||
199 | entry->disk_len = disk_len; | 202 | entry->disk_len = disk_len; |
200 | entry->bytes_left = len; | 203 | entry->bytes_left = len; |
201 | entry->inode = igrab(inode); | 204 | entry->inode = igrab(inode); |
@@ -213,6 +216,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
213 | INIT_LIST_HEAD(&entry->root_extent_list); | 216 | INIT_LIST_HEAD(&entry->root_extent_list); |
214 | INIT_LIST_HEAD(&entry->work_list); | 217 | INIT_LIST_HEAD(&entry->work_list); |
215 | init_completion(&entry->completion); | 218 | init_completion(&entry->completion); |
219 | INIT_LIST_HEAD(&entry->log_list); | ||
216 | 220 | ||
217 | trace_btrfs_ordered_extent_add(inode, entry); | 221 | trace_btrfs_ordered_extent_add(inode, entry); |
218 | 222 | ||
@@ -270,6 +274,10 @@ void btrfs_add_ordered_sum(struct inode *inode, | |||
270 | tree = &BTRFS_I(inode)->ordered_tree; | 274 | tree = &BTRFS_I(inode)->ordered_tree; |
271 | spin_lock_irq(&tree->lock); | 275 | spin_lock_irq(&tree->lock); |
272 | list_add_tail(&sum->list, &entry->list); | 276 | list_add_tail(&sum->list, &entry->list); |
277 | WARN_ON(entry->csum_bytes_left < sum->len); | ||
278 | entry->csum_bytes_left -= sum->len; | ||
279 | if (entry->csum_bytes_left == 0) | ||
280 | wake_up(&entry->wait); | ||
273 | spin_unlock_irq(&tree->lock); | 281 | spin_unlock_irq(&tree->lock); |
274 | } | 282 | } |
275 | 283 | ||
@@ -405,6 +413,66 @@ out: | |||
405 | return ret == 0; | 413 | return ret == 0; |
406 | } | 414 | } |
407 | 415 | ||
416 | /* Needs to either be called under a log transaction or the log_mutex */ | ||
417 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode) | ||
418 | { | ||
419 | struct btrfs_ordered_inode_tree *tree; | ||
420 | struct btrfs_ordered_extent *ordered; | ||
421 | struct rb_node *n; | ||
422 | int index = log->log_transid % 2; | ||
423 | |||
424 | tree = &BTRFS_I(inode)->ordered_tree; | ||
425 | spin_lock_irq(&tree->lock); | ||
426 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { | ||
427 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); | ||
428 | spin_lock(&log->log_extents_lock[index]); | ||
429 | if (list_empty(&ordered->log_list)) { | ||
430 | list_add_tail(&ordered->log_list, &log->logged_list[index]); | ||
431 | atomic_inc(&ordered->refs); | ||
432 | } | ||
433 | spin_unlock(&log->log_extents_lock[index]); | ||
434 | } | ||
435 | spin_unlock_irq(&tree->lock); | ||
436 | } | ||
437 | |||
438 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) | ||
439 | { | ||
440 | struct btrfs_ordered_extent *ordered; | ||
441 | int index = transid % 2; | ||
442 | |||
443 | spin_lock_irq(&log->log_extents_lock[index]); | ||
444 | while (!list_empty(&log->logged_list[index])) { | ||
445 | ordered = list_first_entry(&log->logged_list[index], | ||
446 | struct btrfs_ordered_extent, | ||
447 | log_list); | ||
448 | list_del_init(&ordered->log_list); | ||
449 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
450 | wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE, | ||
451 | &ordered->flags)); | ||
452 | btrfs_put_ordered_extent(ordered); | ||
453 | spin_lock_irq(&log->log_extents_lock[index]); | ||
454 | } | ||
455 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
456 | } | ||
457 | |||
458 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid) | ||
459 | { | ||
460 | struct btrfs_ordered_extent *ordered; | ||
461 | int index = transid % 2; | ||
462 | |||
463 | spin_lock_irq(&log->log_extents_lock[index]); | ||
464 | while (!list_empty(&log->logged_list[index])) { | ||
465 | ordered = list_first_entry(&log->logged_list[index], | ||
466 | struct btrfs_ordered_extent, | ||
467 | log_list); | ||
468 | list_del_init(&ordered->log_list); | ||
469 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
470 | btrfs_put_ordered_extent(ordered); | ||
471 | spin_lock_irq(&log->log_extents_lock[index]); | ||
472 | } | ||
473 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
474 | } | ||
475 | |||
408 | /* | 476 | /* |
409 | * used to drop a reference on an ordered extent. This will free | 477 | * used to drop a reference on an ordered extent. This will free |
410 | * the extent if the last reference is dropped | 478 | * the extent if the last reference is dropped |
@@ -544,10 +612,12 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput) | |||
544 | * extra check to make sure the ordered operation list really is empty | 612 | * extra check to make sure the ordered operation list really is empty |
545 | * before we return | 613 | * before we return |
546 | */ | 614 | */ |
547 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) | 615 | int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, |
616 | struct btrfs_root *root, int wait) | ||
548 | { | 617 | { |
549 | struct btrfs_inode *btrfs_inode; | 618 | struct btrfs_inode *btrfs_inode; |
550 | struct inode *inode; | 619 | struct inode *inode; |
620 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
551 | struct list_head splice; | 621 | struct list_head splice; |
552 | struct list_head works; | 622 | struct list_head works; |
553 | struct btrfs_delalloc_work *work, *next; | 623 | struct btrfs_delalloc_work *work, *next; |
@@ -558,14 +628,10 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) | |||
558 | 628 | ||
559 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 629 | mutex_lock(&root->fs_info->ordered_operations_mutex); |
560 | spin_lock(&root->fs_info->ordered_extent_lock); | 630 | spin_lock(&root->fs_info->ordered_extent_lock); |
561 | again: | 631 | list_splice_init(&cur_trans->ordered_operations, &splice); |
562 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
563 | |||
564 | while (!list_empty(&splice)) { | 632 | while (!list_empty(&splice)) { |
565 | |||
566 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | 633 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, |
567 | ordered_operations); | 634 | ordered_operations); |
568 | |||
569 | inode = &btrfs_inode->vfs_inode; | 635 | inode = &btrfs_inode->vfs_inode; |
570 | 636 | ||
571 | list_del_init(&btrfs_inode->ordered_operations); | 637 | list_del_init(&btrfs_inode->ordered_operations); |
@@ -574,24 +640,22 @@ again: | |||
574 | * the inode may be getting freed (in sys_unlink path). | 640 | * the inode may be getting freed (in sys_unlink path). |
575 | */ | 641 | */ |
576 | inode = igrab(inode); | 642 | inode = igrab(inode); |
577 | |||
578 | if (!wait && inode) { | ||
579 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
580 | &root->fs_info->ordered_operations); | ||
581 | } | ||
582 | |||
583 | if (!inode) | 643 | if (!inode) |
584 | continue; | 644 | continue; |
645 | |||
646 | if (!wait) | ||
647 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
648 | &cur_trans->ordered_operations); | ||
585 | spin_unlock(&root->fs_info->ordered_extent_lock); | 649 | spin_unlock(&root->fs_info->ordered_extent_lock); |
586 | 650 | ||
587 | work = btrfs_alloc_delalloc_work(inode, wait, 1); | 651 | work = btrfs_alloc_delalloc_work(inode, wait, 1); |
588 | if (!work) { | 652 | if (!work) { |
653 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
589 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) | 654 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) |
590 | list_add_tail(&btrfs_inode->ordered_operations, | 655 | list_add_tail(&btrfs_inode->ordered_operations, |
591 | &splice); | 656 | &splice); |
592 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
593 | list_splice_tail(&splice, | 657 | list_splice_tail(&splice, |
594 | &root->fs_info->ordered_operations); | 658 | &cur_trans->ordered_operations); |
595 | spin_unlock(&root->fs_info->ordered_extent_lock); | 659 | spin_unlock(&root->fs_info->ordered_extent_lock); |
596 | ret = -ENOMEM; | 660 | ret = -ENOMEM; |
597 | goto out; | 661 | goto out; |
@@ -603,9 +667,6 @@ again: | |||
603 | cond_resched(); | 667 | cond_resched(); |
604 | spin_lock(&root->fs_info->ordered_extent_lock); | 668 | spin_lock(&root->fs_info->ordered_extent_lock); |
605 | } | 669 | } |
606 | if (wait && !list_empty(&root->fs_info->ordered_operations)) | ||
607 | goto again; | ||
608 | |||
609 | spin_unlock(&root->fs_info->ordered_extent_lock); | 670 | spin_unlock(&root->fs_info->ordered_extent_lock); |
610 | out: | 671 | out: |
611 | list_for_each_entry_safe(work, next, &works, list) { | 672 | list_for_each_entry_safe(work, next, &works, list) { |
@@ -974,6 +1035,7 @@ out: | |||
974 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 1035 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
975 | struct btrfs_root *root, struct inode *inode) | 1036 | struct btrfs_root *root, struct inode *inode) |
976 | { | 1037 | { |
1038 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
977 | u64 last_mod; | 1039 | u64 last_mod; |
978 | 1040 | ||
979 | last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); | 1041 | last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); |
@@ -988,7 +1050,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
988 | spin_lock(&root->fs_info->ordered_extent_lock); | 1050 | spin_lock(&root->fs_info->ordered_extent_lock); |
989 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { | 1051 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { |
990 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | 1052 | list_add_tail(&BTRFS_I(inode)->ordered_operations, |
991 | &root->fs_info->ordered_operations); | 1053 | &cur_trans->ordered_operations); |
992 | } | 1054 | } |
993 | spin_unlock(&root->fs_info->ordered_extent_lock); | 1055 | spin_unlock(&root->fs_info->ordered_extent_lock); |
994 | } | 1056 | } |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index f29d4bf5fbe7..8eadfe406cdd 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -79,6 +79,8 @@ struct btrfs_ordered_sum { | |||
79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent | 79 | #define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent |
80 | * has done its due diligence in updating | 80 | * has done its due diligence in updating |
81 | * the isize. */ | 81 | * the isize. */ |
82 | #define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered | ||
83 | ordered extent */ | ||
82 | 84 | ||
83 | struct btrfs_ordered_extent { | 85 | struct btrfs_ordered_extent { |
84 | /* logical offset in the file */ | 86 | /* logical offset in the file */ |
@@ -96,6 +98,9 @@ struct btrfs_ordered_extent { | |||
96 | /* number of bytes that still need writing */ | 98 | /* number of bytes that still need writing */ |
97 | u64 bytes_left; | 99 | u64 bytes_left; |
98 | 100 | ||
101 | /* number of bytes that still need csumming */ | ||
102 | u64 csum_bytes_left; | ||
103 | |||
99 | /* | 104 | /* |
100 | * the end of the ordered extent which is behind it but | 105 | * the end of the ordered extent which is behind it but |
101 | * didn't update disk_i_size. Please see the comment of | 106 | * didn't update disk_i_size. Please see the comment of |
@@ -118,6 +123,9 @@ struct btrfs_ordered_extent { | |||
118 | /* list of checksums for insertion when the extent io is done */ | 123 | /* list of checksums for insertion when the extent io is done */ |
119 | struct list_head list; | 124 | struct list_head list; |
120 | 125 | ||
126 | /* If we need to wait on this to be done */ | ||
127 | struct list_head log_list; | ||
128 | |||
121 | /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ | 129 | /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ |
122 | wait_queue_head_t wait; | 130 | wait_queue_head_t wait; |
123 | 131 | ||
@@ -189,11 +197,15 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | |||
189 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 197 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
190 | struct btrfs_ordered_extent *ordered); | 198 | struct btrfs_ordered_extent *ordered); |
191 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 199 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
192 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | 200 | int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, |
201 | struct btrfs_root *root, int wait); | ||
193 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | 202 | void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, |
194 | struct btrfs_root *root, | 203 | struct btrfs_root *root, |
195 | struct inode *inode); | 204 | struct inode *inode); |
196 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); | 205 | void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); |
206 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); | ||
207 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | ||
208 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); | ||
197 | int __init ordered_data_init(void); | 209 | int __init ordered_data_init(void); |
198 | void ordered_data_exit(void); | 210 | void ordered_data_exit(void); |
199 | #endif | 211 | #endif |
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 50d95fd190a5..920957ecb27e 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c | |||
@@ -294,6 +294,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l) | |||
294 | btrfs_dev_extent_chunk_offset(l, dev_extent), | 294 | btrfs_dev_extent_chunk_offset(l, dev_extent), |
295 | (unsigned long long) | 295 | (unsigned long long) |
296 | btrfs_dev_extent_length(l, dev_extent)); | 296 | btrfs_dev_extent_length(l, dev_extent)); |
297 | break; | ||
297 | case BTRFS_DEV_STATS_KEY: | 298 | case BTRFS_DEV_STATS_KEY: |
298 | printk(KERN_INFO "\t\tdevice stats\n"); | 299 | printk(KERN_INFO "\t\tdevice stats\n"); |
299 | break; | 300 | break; |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index a5c856234323..88ab785bbd73 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -23,13 +23,13 @@ | |||
23 | #include <linux/rbtree.h> | 23 | #include <linux/rbtree.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/workqueue.h> | 25 | #include <linux/workqueue.h> |
26 | #include <linux/btrfs.h> | ||
26 | 27 | ||
27 | #include "ctree.h" | 28 | #include "ctree.h" |
28 | #include "transaction.h" | 29 | #include "transaction.h" |
29 | #include "disk-io.h" | 30 | #include "disk-io.h" |
30 | #include "locking.h" | 31 | #include "locking.h" |
31 | #include "ulist.h" | 32 | #include "ulist.h" |
32 | #include "ioctl.h" | ||
33 | #include "backref.h" | 33 | #include "backref.h" |
34 | 34 | ||
35 | /* TODO XXX FIXME | 35 | /* TODO XXX FIXME |
@@ -847,6 +847,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, | |||
847 | int ret = 0; | 847 | int ret = 0; |
848 | 848 | ||
849 | spin_lock(&fs_info->qgroup_lock); | 849 | spin_lock(&fs_info->qgroup_lock); |
850 | if (!fs_info->quota_root) { | ||
851 | spin_unlock(&fs_info->qgroup_lock); | ||
852 | return 0; | ||
853 | } | ||
850 | fs_info->quota_enabled = 0; | 854 | fs_info->quota_enabled = 0; |
851 | fs_info->pending_quota_state = 0; | 855 | fs_info->pending_quota_state = 0; |
852 | quota_root = fs_info->quota_root; | 856 | quota_root = fs_info->quota_root; |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 300e09ac3659..ba5a3210da9a 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -3017,7 +3017,7 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
3017 | } | 3017 | } |
3018 | } | 3018 | } |
3019 | 3019 | ||
3020 | page_start = (u64)page->index << PAGE_CACHE_SHIFT; | 3020 | page_start = page_offset(page); |
3021 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 3021 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
3022 | 3022 | ||
3023 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); | 3023 | lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 67783e03d121..c78b2a3fc335 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -2708,7 +2708,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, | |||
2708 | int ret; | 2708 | int ret; |
2709 | struct btrfs_root *root = sctx->dev_root; | 2709 | struct btrfs_root *root = sctx->dev_root; |
2710 | 2710 | ||
2711 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 2711 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
2712 | return -EIO; | 2712 | return -EIO; |
2713 | 2713 | ||
2714 | gen = root->fs_info->last_trans_committed; | 2714 | gen = root->fs_info->last_trans_committed; |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 321b7fb4e441..68da757615ae 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -85,6 +85,7 @@ struct send_ctx { | |||
85 | u32 send_max_size; | 85 | u32 send_max_size; |
86 | u64 total_send_size; | 86 | u64 total_send_size; |
87 | u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; | 87 | u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; |
88 | u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */ | ||
88 | 89 | ||
89 | struct vfsmount *mnt; | 90 | struct vfsmount *mnt; |
90 | 91 | ||
@@ -3709,6 +3710,39 @@ out: | |||
3709 | return ret; | 3710 | return ret; |
3710 | } | 3711 | } |
3711 | 3712 | ||
3713 | /* | ||
3714 | * Send an update extent command to user space. | ||
3715 | */ | ||
3716 | static int send_update_extent(struct send_ctx *sctx, | ||
3717 | u64 offset, u32 len) | ||
3718 | { | ||
3719 | int ret = 0; | ||
3720 | struct fs_path *p; | ||
3721 | |||
3722 | p = fs_path_alloc(sctx); | ||
3723 | if (!p) | ||
3724 | return -ENOMEM; | ||
3725 | |||
3726 | ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT); | ||
3727 | if (ret < 0) | ||
3728 | goto out; | ||
3729 | |||
3730 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p); | ||
3731 | if (ret < 0) | ||
3732 | goto out; | ||
3733 | |||
3734 | TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p); | ||
3735 | TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset); | ||
3736 | TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len); | ||
3737 | |||
3738 | ret = send_cmd(sctx); | ||
3739 | |||
3740 | tlv_put_failure: | ||
3741 | out: | ||
3742 | fs_path_free(sctx, p); | ||
3743 | return ret; | ||
3744 | } | ||
3745 | |||
3712 | static int send_write_or_clone(struct send_ctx *sctx, | 3746 | static int send_write_or_clone(struct send_ctx *sctx, |
3713 | struct btrfs_path *path, | 3747 | struct btrfs_path *path, |
3714 | struct btrfs_key *key, | 3748 | struct btrfs_key *key, |
@@ -3744,7 +3778,11 @@ static int send_write_or_clone(struct send_ctx *sctx, | |||
3744 | goto out; | 3778 | goto out; |
3745 | } | 3779 | } |
3746 | 3780 | ||
3747 | if (!clone_root) { | 3781 | if (clone_root) { |
3782 | ret = send_clone(sctx, offset, len, clone_root); | ||
3783 | } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) { | ||
3784 | ret = send_update_extent(sctx, offset, len); | ||
3785 | } else { | ||
3748 | while (pos < len) { | 3786 | while (pos < len) { |
3749 | l = len - pos; | 3787 | l = len - pos; |
3750 | if (l > BTRFS_SEND_READ_SIZE) | 3788 | if (l > BTRFS_SEND_READ_SIZE) |
@@ -3757,10 +3795,7 @@ static int send_write_or_clone(struct send_ctx *sctx, | |||
3757 | pos += ret; | 3795 | pos += ret; |
3758 | } | 3796 | } |
3759 | ret = 0; | 3797 | ret = 0; |
3760 | } else { | ||
3761 | ret = send_clone(sctx, offset, len, clone_root); | ||
3762 | } | 3798 | } |
3763 | |||
3764 | out: | 3799 | out: |
3765 | return ret; | 3800 | return ret; |
3766 | } | 3801 | } |
@@ -4536,7 +4571,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
4536 | struct btrfs_fs_info *fs_info; | 4571 | struct btrfs_fs_info *fs_info; |
4537 | struct btrfs_ioctl_send_args *arg = NULL; | 4572 | struct btrfs_ioctl_send_args *arg = NULL; |
4538 | struct btrfs_key key; | 4573 | struct btrfs_key key; |
4539 | struct file *filp = NULL; | ||
4540 | struct send_ctx *sctx = NULL; | 4574 | struct send_ctx *sctx = NULL; |
4541 | u32 i; | 4575 | u32 i; |
4542 | u64 *clone_sources_tmp = NULL; | 4576 | u64 *clone_sources_tmp = NULL; |
@@ -4561,6 +4595,11 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
4561 | goto out; | 4595 | goto out; |
4562 | } | 4596 | } |
4563 | 4597 | ||
4598 | if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) { | ||
4599 | ret = -EINVAL; | ||
4600 | goto out; | ||
4601 | } | ||
4602 | |||
4564 | sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); | 4603 | sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); |
4565 | if (!sctx) { | 4604 | if (!sctx) { |
4566 | ret = -ENOMEM; | 4605 | ret = -ENOMEM; |
@@ -4572,6 +4611,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
4572 | INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); | 4611 | INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); |
4573 | INIT_LIST_HEAD(&sctx->name_cache_list); | 4612 | INIT_LIST_HEAD(&sctx->name_cache_list); |
4574 | 4613 | ||
4614 | sctx->flags = arg->flags; | ||
4615 | |||
4575 | sctx->send_filp = fget(arg->send_fd); | 4616 | sctx->send_filp = fget(arg->send_fd); |
4576 | if (IS_ERR(sctx->send_filp)) { | 4617 | if (IS_ERR(sctx->send_filp)) { |
4577 | ret = PTR_ERR(sctx->send_filp); | 4618 | ret = PTR_ERR(sctx->send_filp); |
@@ -4673,8 +4714,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
4673 | goto out; | 4714 | goto out; |
4674 | 4715 | ||
4675 | out: | 4716 | out: |
4676 | if (filp) | ||
4677 | fput(filp); | ||
4678 | kfree(arg); | 4717 | kfree(arg); |
4679 | vfree(clone_sources_tmp); | 4718 | vfree(clone_sources_tmp); |
4680 | 4719 | ||
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h index 1bf4f32fd4ef..8bb18f7ccaa6 100644 --- a/fs/btrfs/send.h +++ b/fs/btrfs/send.h | |||
@@ -86,6 +86,7 @@ enum btrfs_send_cmd { | |||
86 | BTRFS_SEND_C_UTIMES, | 86 | BTRFS_SEND_C_UTIMES, |
87 | 87 | ||
88 | BTRFS_SEND_C_END, | 88 | BTRFS_SEND_C_END, |
89 | BTRFS_SEND_C_UPDATE_EXTENT, | ||
89 | __BTRFS_SEND_C_MAX, | 90 | __BTRFS_SEND_C_MAX, |
90 | }; | 91 | }; |
91 | #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1) | 92 | #define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1) |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d8982e9601d3..db1ba9a2ed64 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -41,13 +41,13 @@ | |||
41 | #include <linux/slab.h> | 41 | #include <linux/slab.h> |
42 | #include <linux/cleancache.h> | 42 | #include <linux/cleancache.h> |
43 | #include <linux/ratelimit.h> | 43 | #include <linux/ratelimit.h> |
44 | #include <linux/btrfs.h> | ||
44 | #include "compat.h" | 45 | #include "compat.h" |
45 | #include "delayed-inode.h" | 46 | #include "delayed-inode.h" |
46 | #include "ctree.h" | 47 | #include "ctree.h" |
47 | #include "disk-io.h" | 48 | #include "disk-io.h" |
48 | #include "transaction.h" | 49 | #include "transaction.h" |
49 | #include "btrfs_inode.h" | 50 | #include "btrfs_inode.h" |
50 | #include "ioctl.h" | ||
51 | #include "print-tree.h" | 51 | #include "print-tree.h" |
52 | #include "xattr.h" | 52 | #include "xattr.h" |
53 | #include "volumes.h" | 53 | #include "volumes.h" |
@@ -63,8 +63,7 @@ | |||
63 | static const struct super_operations btrfs_super_ops; | 63 | static const struct super_operations btrfs_super_ops; |
64 | static struct file_system_type btrfs_fs_type; | 64 | static struct file_system_type btrfs_fs_type; |
65 | 65 | ||
66 | static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, | 66 | static const char *btrfs_decode_error(int errno, char nbuf[16]) |
67 | char nbuf[16]) | ||
68 | { | 67 | { |
69 | char *errstr = NULL; | 68 | char *errstr = NULL; |
70 | 69 | ||
@@ -98,7 +97,7 @@ static void __save_error_info(struct btrfs_fs_info *fs_info) | |||
98 | * today we only save the error info into ram. Long term we'll | 97 | * today we only save the error info into ram. Long term we'll |
99 | * also send it down to the disk | 98 | * also send it down to the disk |
100 | */ | 99 | */ |
101 | fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; | 100 | set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state); |
102 | } | 101 | } |
103 | 102 | ||
104 | static void save_error_info(struct btrfs_fs_info *fs_info) | 103 | static void save_error_info(struct btrfs_fs_info *fs_info) |
@@ -114,7 +113,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info) | |||
114 | if (sb->s_flags & MS_RDONLY) | 113 | if (sb->s_flags & MS_RDONLY) |
115 | return; | 114 | return; |
116 | 115 | ||
117 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 116 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { |
118 | sb->s_flags |= MS_RDONLY; | 117 | sb->s_flags |= MS_RDONLY; |
119 | printk(KERN_INFO "btrfs is forced readonly\n"); | 118 | printk(KERN_INFO "btrfs is forced readonly\n"); |
120 | /* | 119 | /* |
@@ -142,8 +141,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | |||
142 | struct super_block *sb = fs_info->sb; | 141 | struct super_block *sb = fs_info->sb; |
143 | char nbuf[16]; | 142 | char nbuf[16]; |
144 | const char *errstr; | 143 | const char *errstr; |
145 | va_list args; | ||
146 | va_start(args, fmt); | ||
147 | 144 | ||
148 | /* | 145 | /* |
149 | * Special case: if the error is EROFS, and we're already | 146 | * Special case: if the error is EROFS, and we're already |
@@ -152,15 +149,18 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | |||
152 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) | 149 | if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) |
153 | return; | 150 | return; |
154 | 151 | ||
155 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | 152 | errstr = btrfs_decode_error(errno, nbuf); |
156 | if (fmt) { | 153 | if (fmt) { |
157 | struct va_format vaf = { | 154 | struct va_format vaf; |
158 | .fmt = fmt, | 155 | va_list args; |
159 | .va = &args, | 156 | |
160 | }; | 157 | va_start(args, fmt); |
158 | vaf.fmt = fmt; | ||
159 | vaf.va = &args; | ||
161 | 160 | ||
162 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n", | 161 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n", |
163 | sb->s_id, function, line, errstr, &vaf); | 162 | sb->s_id, function, line, errstr, &vaf); |
163 | va_end(args); | ||
164 | } else { | 164 | } else { |
165 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", | 165 | printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", |
166 | sb->s_id, function, line, errstr); | 166 | sb->s_id, function, line, errstr); |
@@ -171,7 +171,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function, | |||
171 | save_error_info(fs_info); | 171 | save_error_info(fs_info); |
172 | btrfs_handle_error(fs_info); | 172 | btrfs_handle_error(fs_info); |
173 | } | 173 | } |
174 | va_end(args); | ||
175 | } | 174 | } |
176 | 175 | ||
177 | static const char * const logtypes[] = { | 176 | static const char * const logtypes[] = { |
@@ -261,7 +260,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans, | |||
261 | char nbuf[16]; | 260 | char nbuf[16]; |
262 | const char *errstr; | 261 | const char *errstr; |
263 | 262 | ||
264 | errstr = btrfs_decode_error(root->fs_info, errno, nbuf); | 263 | errstr = btrfs_decode_error(errno, nbuf); |
265 | btrfs_printk(root->fs_info, | 264 | btrfs_printk(root->fs_info, |
266 | "%s:%d: Aborting unused transaction(%s).\n", | 265 | "%s:%d: Aborting unused transaction(%s).\n", |
267 | function, line, errstr); | 266 | function, line, errstr); |
@@ -289,8 +288,8 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, | |||
289 | va_start(args, fmt); | 288 | va_start(args, fmt); |
290 | vaf.va = &args; | 289 | vaf.va = &args; |
291 | 290 | ||
292 | errstr = btrfs_decode_error(fs_info, errno, nbuf); | 291 | errstr = btrfs_decode_error(errno, nbuf); |
293 | if (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR) | 292 | if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)) |
294 | panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n", | 293 | panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n", |
295 | s_id, function, line, &vaf, errstr); | 294 | s_id, function, line, &vaf, errstr); |
296 | 295 | ||
@@ -438,6 +437,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
438 | case Opt_compress_force: | 437 | case Opt_compress_force: |
439 | case Opt_compress_force_type: | 438 | case Opt_compress_force_type: |
440 | compress_force = true; | 439 | compress_force = true; |
440 | /* Fallthrough */ | ||
441 | case Opt_compress: | 441 | case Opt_compress: |
442 | case Opt_compress_type: | 442 | case Opt_compress_type: |
443 | if (token == Opt_compress || | 443 | if (token == Opt_compress || |
@@ -519,7 +519,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options) | |||
519 | case Opt_alloc_start: | 519 | case Opt_alloc_start: |
520 | num = match_strdup(&args[0]); | 520 | num = match_strdup(&args[0]); |
521 | if (num) { | 521 | if (num) { |
522 | mutex_lock(&info->chunk_mutex); | ||
522 | info->alloc_start = memparse(num, NULL); | 523 | info->alloc_start = memparse(num, NULL); |
524 | mutex_unlock(&info->chunk_mutex); | ||
523 | kfree(num); | 525 | kfree(num); |
524 | printk(KERN_INFO | 526 | printk(KERN_INFO |
525 | "btrfs: allocations start at %llu\n", | 527 | "btrfs: allocations start at %llu\n", |
@@ -876,7 +878,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
876 | 878 | ||
877 | btrfs_wait_ordered_extents(root, 0); | 879 | btrfs_wait_ordered_extents(root, 0); |
878 | 880 | ||
879 | trans = btrfs_attach_transaction(root); | 881 | trans = btrfs_attach_transaction_barrier(root); |
880 | if (IS_ERR(trans)) { | 882 | if (IS_ERR(trans)) { |
881 | /* no transaction, don't bother */ | 883 | /* no transaction, don't bother */ |
882 | if (PTR_ERR(trans) == -ENOENT) | 884 | if (PTR_ERR(trans) == -ENOENT) |
@@ -1289,7 +1291,9 @@ restore: | |||
1289 | fs_info->mount_opt = old_opts; | 1291 | fs_info->mount_opt = old_opts; |
1290 | fs_info->compress_type = old_compress_type; | 1292 | fs_info->compress_type = old_compress_type; |
1291 | fs_info->max_inline = old_max_inline; | 1293 | fs_info->max_inline = old_max_inline; |
1294 | mutex_lock(&fs_info->chunk_mutex); | ||
1292 | fs_info->alloc_start = old_alloc_start; | 1295 | fs_info->alloc_start = old_alloc_start; |
1296 | mutex_unlock(&fs_info->chunk_mutex); | ||
1293 | btrfs_resize_thread_pool(fs_info, | 1297 | btrfs_resize_thread_pool(fs_info, |
1294 | old_thread_pool_size, fs_info->thread_pool_size); | 1298 | old_thread_pool_size, fs_info->thread_pool_size); |
1295 | fs_info->metadata_ratio = old_metadata_ratio; | 1299 | fs_info->metadata_ratio = old_metadata_ratio; |
@@ -1559,7 +1563,7 @@ static int btrfs_freeze(struct super_block *sb) | |||
1559 | struct btrfs_trans_handle *trans; | 1563 | struct btrfs_trans_handle *trans; |
1560 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; | 1564 | struct btrfs_root *root = btrfs_sb(sb)->tree_root; |
1561 | 1565 | ||
1562 | trans = btrfs_attach_transaction(root); | 1566 | trans = btrfs_attach_transaction_barrier(root); |
1563 | if (IS_ERR(trans)) { | 1567 | if (IS_ERR(trans)) { |
1564 | /* no transaction, don't bother */ | 1568 | /* no transaction, don't bother */ |
1565 | if (PTR_ERR(trans) == -ENOENT) | 1569 | if (PTR_ERR(trans) == -ENOENT) |
@@ -1684,10 +1688,14 @@ static int __init init_btrfs_fs(void) | |||
1684 | if (err) | 1688 | if (err) |
1685 | goto free_delayed_inode; | 1689 | goto free_delayed_inode; |
1686 | 1690 | ||
1687 | err = btrfs_interface_init(); | 1691 | err = btrfs_delayed_ref_init(); |
1688 | if (err) | 1692 | if (err) |
1689 | goto free_auto_defrag; | 1693 | goto free_auto_defrag; |
1690 | 1694 | ||
1695 | err = btrfs_interface_init(); | ||
1696 | if (err) | ||
1697 | goto free_delayed_ref; | ||
1698 | |||
1691 | err = register_filesystem(&btrfs_fs_type); | 1699 | err = register_filesystem(&btrfs_fs_type); |
1692 | if (err) | 1700 | if (err) |
1693 | goto unregister_ioctl; | 1701 | goto unregister_ioctl; |
@@ -1699,6 +1707,8 @@ static int __init init_btrfs_fs(void) | |||
1699 | 1707 | ||
1700 | unregister_ioctl: | 1708 | unregister_ioctl: |
1701 | btrfs_interface_exit(); | 1709 | btrfs_interface_exit(); |
1710 | free_delayed_ref: | ||
1711 | btrfs_delayed_ref_exit(); | ||
1702 | free_auto_defrag: | 1712 | free_auto_defrag: |
1703 | btrfs_auto_defrag_exit(); | 1713 | btrfs_auto_defrag_exit(); |
1704 | free_delayed_inode: | 1714 | free_delayed_inode: |
@@ -1720,6 +1730,7 @@ free_compress: | |||
1720 | static void __exit exit_btrfs_fs(void) | 1730 | static void __exit exit_btrfs_fs(void) |
1721 | { | 1731 | { |
1722 | btrfs_destroy_cachep(); | 1732 | btrfs_destroy_cachep(); |
1733 | btrfs_delayed_ref_exit(); | ||
1723 | btrfs_auto_defrag_exit(); | 1734 | btrfs_auto_defrag_exit(); |
1724 | btrfs_delayed_inode_exit(); | 1735 | btrfs_delayed_inode_exit(); |
1725 | ordered_data_exit(); | 1736 | ordered_data_exit(); |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index fc03aa60b684..955204ca0447 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -40,7 +40,6 @@ void put_transaction(struct btrfs_transaction *transaction) | |||
40 | if (atomic_dec_and_test(&transaction->use_count)) { | 40 | if (atomic_dec_and_test(&transaction->use_count)) { |
41 | BUG_ON(!list_empty(&transaction->list)); | 41 | BUG_ON(!list_empty(&transaction->list)); |
42 | WARN_ON(transaction->delayed_refs.root.rb_node); | 42 | WARN_ON(transaction->delayed_refs.root.rb_node); |
43 | memset(transaction, 0, sizeof(*transaction)); | ||
44 | kmem_cache_free(btrfs_transaction_cachep, transaction); | 43 | kmem_cache_free(btrfs_transaction_cachep, transaction); |
45 | } | 44 | } |
46 | } | 45 | } |
@@ -51,6 +50,14 @@ static noinline void switch_commit_root(struct btrfs_root *root) | |||
51 | root->commit_root = btrfs_root_node(root); | 50 | root->commit_root = btrfs_root_node(root); |
52 | } | 51 | } |
53 | 52 | ||
53 | static inline int can_join_transaction(struct btrfs_transaction *trans, | ||
54 | int type) | ||
55 | { | ||
56 | return !(trans->in_commit && | ||
57 | type != TRANS_JOIN && | ||
58 | type != TRANS_JOIN_NOLOCK); | ||
59 | } | ||
60 | |||
54 | /* | 61 | /* |
55 | * either allocate a new transaction or hop into the existing one | 62 | * either allocate a new transaction or hop into the existing one |
56 | */ | 63 | */ |
@@ -62,7 +69,7 @@ static noinline int join_transaction(struct btrfs_root *root, int type) | |||
62 | spin_lock(&fs_info->trans_lock); | 69 | spin_lock(&fs_info->trans_lock); |
63 | loop: | 70 | loop: |
64 | /* The file system has been taken offline. No new transactions. */ | 71 | /* The file system has been taken offline. No new transactions. */ |
65 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 72 | if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { |
66 | spin_unlock(&fs_info->trans_lock); | 73 | spin_unlock(&fs_info->trans_lock); |
67 | return -EROFS; | 74 | return -EROFS; |
68 | } | 75 | } |
@@ -86,6 +93,10 @@ loop: | |||
86 | spin_unlock(&fs_info->trans_lock); | 93 | spin_unlock(&fs_info->trans_lock); |
87 | return cur_trans->aborted; | 94 | return cur_trans->aborted; |
88 | } | 95 | } |
96 | if (!can_join_transaction(cur_trans, type)) { | ||
97 | spin_unlock(&fs_info->trans_lock); | ||
98 | return -EBUSY; | ||
99 | } | ||
89 | atomic_inc(&cur_trans->use_count); | 100 | atomic_inc(&cur_trans->use_count); |
90 | atomic_inc(&cur_trans->num_writers); | 101 | atomic_inc(&cur_trans->num_writers); |
91 | cur_trans->num_joined++; | 102 | cur_trans->num_joined++; |
@@ -114,7 +125,7 @@ loop: | |||
114 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 125 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
115 | cur_trans = fs_info->running_transaction; | 126 | cur_trans = fs_info->running_transaction; |
116 | goto loop; | 127 | goto loop; |
117 | } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 128 | } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) { |
118 | spin_unlock(&fs_info->trans_lock); | 129 | spin_unlock(&fs_info->trans_lock); |
119 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | 130 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); |
120 | return -EROFS; | 131 | return -EROFS; |
@@ -158,6 +169,7 @@ loop: | |||
158 | spin_lock_init(&cur_trans->delayed_refs.lock); | 169 | spin_lock_init(&cur_trans->delayed_refs.lock); |
159 | 170 | ||
160 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 171 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
172 | INIT_LIST_HEAD(&cur_trans->ordered_operations); | ||
161 | list_add_tail(&cur_trans->list, &fs_info->trans_list); | 173 | list_add_tail(&cur_trans->list, &fs_info->trans_list); |
162 | extent_io_tree_init(&cur_trans->dirty_pages, | 174 | extent_io_tree_init(&cur_trans->dirty_pages, |
163 | fs_info->btree_inode->i_mapping); | 175 | fs_info->btree_inode->i_mapping); |
@@ -302,7 +314,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type, | |||
302 | int ret; | 314 | int ret; |
303 | u64 qgroup_reserved = 0; | 315 | u64 qgroup_reserved = 0; |
304 | 316 | ||
305 | if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 317 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
306 | return ERR_PTR(-EROFS); | 318 | return ERR_PTR(-EROFS); |
307 | 319 | ||
308 | if (current->journal_info) { | 320 | if (current->journal_info) { |
@@ -360,8 +372,11 @@ again: | |||
360 | 372 | ||
361 | do { | 373 | do { |
362 | ret = join_transaction(root, type); | 374 | ret = join_transaction(root, type); |
363 | if (ret == -EBUSY) | 375 | if (ret == -EBUSY) { |
364 | wait_current_trans(root); | 376 | wait_current_trans(root); |
377 | if (unlikely(type == TRANS_ATTACH)) | ||
378 | ret = -ENOENT; | ||
379 | } | ||
365 | } while (ret == -EBUSY); | 380 | } while (ret == -EBUSY); |
366 | 381 | ||
367 | if (ret < 0) { | 382 | if (ret < 0) { |
@@ -383,9 +398,10 @@ again: | |||
383 | h->block_rsv = NULL; | 398 | h->block_rsv = NULL; |
384 | h->orig_rsv = NULL; | 399 | h->orig_rsv = NULL; |
385 | h->aborted = 0; | 400 | h->aborted = 0; |
386 | h->qgroup_reserved = qgroup_reserved; | 401 | h->qgroup_reserved = 0; |
387 | h->delayed_ref_elem.seq = 0; | 402 | h->delayed_ref_elem.seq = 0; |
388 | h->type = type; | 403 | h->type = type; |
404 | h->allocating_chunk = false; | ||
389 | INIT_LIST_HEAD(&h->qgroup_ref_list); | 405 | INIT_LIST_HEAD(&h->qgroup_ref_list); |
390 | INIT_LIST_HEAD(&h->new_bgs); | 406 | INIT_LIST_HEAD(&h->new_bgs); |
391 | 407 | ||
@@ -401,6 +417,7 @@ again: | |||
401 | h->block_rsv = &root->fs_info->trans_block_rsv; | 417 | h->block_rsv = &root->fs_info->trans_block_rsv; |
402 | h->bytes_reserved = num_bytes; | 418 | h->bytes_reserved = num_bytes; |
403 | } | 419 | } |
420 | h->qgroup_reserved = qgroup_reserved; | ||
404 | 421 | ||
405 | got_it: | 422 | got_it: |
406 | btrfs_record_root_in_trans(h, root); | 423 | btrfs_record_root_in_trans(h, root); |
@@ -452,11 +469,43 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root | |||
452 | return start_transaction(root, 0, TRANS_USERSPACE, 0); | 469 | return start_transaction(root, 0, TRANS_USERSPACE, 0); |
453 | } | 470 | } |
454 | 471 | ||
472 | /* | ||
473 | * btrfs_attach_transaction() - catch the running transaction | ||
474 | * | ||
475 | * It is used when we want to commit the current the transaction, but | ||
476 | * don't want to start a new one. | ||
477 | * | ||
478 | * Note: If this function return -ENOENT, it just means there is no | ||
479 | * running transaction. But it is possible that the inactive transaction | ||
480 | * is still in the memory, not fully on disk. If you hope there is no | ||
481 | * inactive transaction in the fs when -ENOENT is returned, you should | ||
482 | * invoke | ||
483 | * btrfs_attach_transaction_barrier() | ||
484 | */ | ||
455 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) | 485 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) |
456 | { | 486 | { |
457 | return start_transaction(root, 0, TRANS_ATTACH, 0); | 487 | return start_transaction(root, 0, TRANS_ATTACH, 0); |
458 | } | 488 | } |
459 | 489 | ||
490 | /* | ||
491 | * btrfs_attach_transaction() - catch the running transaction | ||
492 | * | ||
493 | * It is similar to the above function, the differentia is this one | ||
494 | * will wait for all the inactive transactions until they fully | ||
495 | * complete. | ||
496 | */ | ||
497 | struct btrfs_trans_handle * | ||
498 | btrfs_attach_transaction_barrier(struct btrfs_root *root) | ||
499 | { | ||
500 | struct btrfs_trans_handle *trans; | ||
501 | |||
502 | trans = start_transaction(root, 0, TRANS_ATTACH, 0); | ||
503 | if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT) | ||
504 | btrfs_wait_for_commit(root, 0); | ||
505 | |||
506 | return trans; | ||
507 | } | ||
508 | |||
460 | /* wait for a transaction commit to be fully complete */ | 509 | /* wait for a transaction commit to be fully complete */ |
461 | static noinline void wait_for_commit(struct btrfs_root *root, | 510 | static noinline void wait_for_commit(struct btrfs_root *root, |
462 | struct btrfs_transaction *commit) | 511 | struct btrfs_transaction *commit) |
@@ -645,12 +694,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
645 | btrfs_run_delayed_iputs(root); | 694 | btrfs_run_delayed_iputs(root); |
646 | 695 | ||
647 | if (trans->aborted || | 696 | if (trans->aborted || |
648 | root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { | 697 | test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
649 | err = -EIO; | 698 | err = -EIO; |
650 | } | ||
651 | assert_qgroups_uptodate(trans); | 699 | assert_qgroups_uptodate(trans); |
652 | 700 | ||
653 | memset(trans, 0, sizeof(*trans)); | ||
654 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 701 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
655 | return err; | 702 | return err; |
656 | } | 703 | } |
@@ -961,10 +1008,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
961 | } | 1008 | } |
962 | 1009 | ||
963 | /* | 1010 | /* |
964 | * defrag a given btree. If cacheonly == 1, this won't read from the disk, | 1011 | * defrag a given btree. |
965 | * otherwise every leaf in the btree is read and defragged. | 1012 | * Every leaf in the btree is read and defragged. |
966 | */ | 1013 | */ |
967 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | 1014 | int btrfs_defrag_root(struct btrfs_root *root) |
968 | { | 1015 | { |
969 | struct btrfs_fs_info *info = root->fs_info; | 1016 | struct btrfs_fs_info *info = root->fs_info; |
970 | struct btrfs_trans_handle *trans; | 1017 | struct btrfs_trans_handle *trans; |
@@ -978,7 +1025,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
978 | if (IS_ERR(trans)) | 1025 | if (IS_ERR(trans)) |
979 | return PTR_ERR(trans); | 1026 | return PTR_ERR(trans); |
980 | 1027 | ||
981 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | 1028 | ret = btrfs_defrag_leaves(trans, root); |
982 | 1029 | ||
983 | btrfs_end_transaction(trans, root); | 1030 | btrfs_end_transaction(trans, root); |
984 | btrfs_btree_balance_dirty(info->tree_root); | 1031 | btrfs_btree_balance_dirty(info->tree_root); |
@@ -986,6 +1033,12 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
986 | 1033 | ||
987 | if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) | 1034 | if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) |
988 | break; | 1035 | break; |
1036 | |||
1037 | if (btrfs_defrag_cancelled(root->fs_info)) { | ||
1038 | printk(KERN_DEBUG "btrfs: defrag_root cancelled\n"); | ||
1039 | ret = -EAGAIN; | ||
1040 | break; | ||
1041 | } | ||
989 | } | 1042 | } |
990 | root->defrag_running = 0; | 1043 | root->defrag_running = 0; |
991 | return ret; | 1044 | return ret; |
@@ -1307,13 +1360,13 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, | |||
1307 | struct btrfs_async_commit { | 1360 | struct btrfs_async_commit { |
1308 | struct btrfs_trans_handle *newtrans; | 1361 | struct btrfs_trans_handle *newtrans; |
1309 | struct btrfs_root *root; | 1362 | struct btrfs_root *root; |
1310 | struct delayed_work work; | 1363 | struct work_struct work; |
1311 | }; | 1364 | }; |
1312 | 1365 | ||
1313 | static void do_async_commit(struct work_struct *work) | 1366 | static void do_async_commit(struct work_struct *work) |
1314 | { | 1367 | { |
1315 | struct btrfs_async_commit *ac = | 1368 | struct btrfs_async_commit *ac = |
1316 | container_of(work, struct btrfs_async_commit, work.work); | 1369 | container_of(work, struct btrfs_async_commit, work); |
1317 | 1370 | ||
1318 | /* | 1371 | /* |
1319 | * We've got freeze protection passed with the transaction. | 1372 | * We've got freeze protection passed with the transaction. |
@@ -1341,7 +1394,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1341 | if (!ac) | 1394 | if (!ac) |
1342 | return -ENOMEM; | 1395 | return -ENOMEM; |
1343 | 1396 | ||
1344 | INIT_DELAYED_WORK(&ac->work, do_async_commit); | 1397 | INIT_WORK(&ac->work, do_async_commit); |
1345 | ac->root = root; | 1398 | ac->root = root; |
1346 | ac->newtrans = btrfs_join_transaction(root); | 1399 | ac->newtrans = btrfs_join_transaction(root); |
1347 | if (IS_ERR(ac->newtrans)) { | 1400 | if (IS_ERR(ac->newtrans)) { |
@@ -1365,7 +1418,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, | |||
1365 | &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], | 1418 | &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], |
1366 | 1, _THIS_IP_); | 1419 | 1, _THIS_IP_); |
1367 | 1420 | ||
1368 | schedule_delayed_work(&ac->work, 0); | 1421 | schedule_work(&ac->work); |
1369 | 1422 | ||
1370 | /* wait for transaction to start and unblock */ | 1423 | /* wait for transaction to start and unblock */ |
1371 | if (wait_for_unblock) | 1424 | if (wait_for_unblock) |
@@ -1428,7 +1481,9 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
1428 | } | 1481 | } |
1429 | 1482 | ||
1430 | if (flush_on_commit || snap_pending) { | 1483 | if (flush_on_commit || snap_pending) { |
1431 | btrfs_start_delalloc_inodes(root, 1); | 1484 | ret = btrfs_start_delalloc_inodes(root, 1); |
1485 | if (ret) | ||
1486 | return ret; | ||
1432 | btrfs_wait_ordered_extents(root, 1); | 1487 | btrfs_wait_ordered_extents(root, 1); |
1433 | } | 1488 | } |
1434 | 1489 | ||
@@ -1450,9 +1505,9 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
1450 | * it here and no for sure that nothing new will be added | 1505 | * it here and no for sure that nothing new will be added |
1451 | * to the list | 1506 | * to the list |
1452 | */ | 1507 | */ |
1453 | btrfs_run_ordered_operations(root, 1); | 1508 | ret = btrfs_run_ordered_operations(trans, root, 1); |
1454 | 1509 | ||
1455 | return 0; | 1510 | return ret; |
1456 | } | 1511 | } |
1457 | 1512 | ||
1458 | /* | 1513 | /* |
@@ -1473,27 +1528,35 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1473 | int should_grow = 0; | 1528 | int should_grow = 0; |
1474 | unsigned long now = get_seconds(); | 1529 | unsigned long now = get_seconds(); |
1475 | 1530 | ||
1476 | ret = btrfs_run_ordered_operations(root, 0); | 1531 | ret = btrfs_run_ordered_operations(trans, root, 0); |
1477 | if (ret) { | 1532 | if (ret) { |
1478 | btrfs_abort_transaction(trans, root, ret); | 1533 | btrfs_abort_transaction(trans, root, ret); |
1479 | goto cleanup_transaction; | 1534 | btrfs_end_transaction(trans, root); |
1535 | return ret; | ||
1480 | } | 1536 | } |
1481 | 1537 | ||
1482 | /* Stop the commit early if ->aborted is set */ | 1538 | /* Stop the commit early if ->aborted is set */ |
1483 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { | 1539 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { |
1484 | ret = cur_trans->aborted; | 1540 | ret = cur_trans->aborted; |
1485 | goto cleanup_transaction; | 1541 | btrfs_end_transaction(trans, root); |
1542 | return ret; | ||
1486 | } | 1543 | } |
1487 | 1544 | ||
1488 | /* make a pass through all the delayed refs we have so far | 1545 | /* make a pass through all the delayed refs we have so far |
1489 | * any runnings procs may add more while we are here | 1546 | * any runnings procs may add more while we are here |
1490 | */ | 1547 | */ |
1491 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1548 | ret = btrfs_run_delayed_refs(trans, root, 0); |
1492 | if (ret) | 1549 | if (ret) { |
1493 | goto cleanup_transaction; | 1550 | btrfs_end_transaction(trans, root); |
1551 | return ret; | ||
1552 | } | ||
1494 | 1553 | ||
1495 | btrfs_trans_release_metadata(trans, root); | 1554 | btrfs_trans_release_metadata(trans, root); |
1496 | trans->block_rsv = NULL; | 1555 | trans->block_rsv = NULL; |
1556 | if (trans->qgroup_reserved) { | ||
1557 | btrfs_qgroup_free(root, trans->qgroup_reserved); | ||
1558 | trans->qgroup_reserved = 0; | ||
1559 | } | ||
1497 | 1560 | ||
1498 | cur_trans = trans->transaction; | 1561 | cur_trans = trans->transaction; |
1499 | 1562 | ||
@@ -1507,8 +1570,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1507 | btrfs_create_pending_block_groups(trans, root); | 1570 | btrfs_create_pending_block_groups(trans, root); |
1508 | 1571 | ||
1509 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1572 | ret = btrfs_run_delayed_refs(trans, root, 0); |
1510 | if (ret) | 1573 | if (ret) { |
1511 | goto cleanup_transaction; | 1574 | btrfs_end_transaction(trans, root); |
1575 | return ret; | ||
1576 | } | ||
1512 | 1577 | ||
1513 | spin_lock(&cur_trans->commit_lock); | 1578 | spin_lock(&cur_trans->commit_lock); |
1514 | if (cur_trans->in_commit) { | 1579 | if (cur_trans->in_commit) { |
@@ -1772,6 +1837,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1772 | cleanup_transaction: | 1837 | cleanup_transaction: |
1773 | btrfs_trans_release_metadata(trans, root); | 1838 | btrfs_trans_release_metadata(trans, root); |
1774 | trans->block_rsv = NULL; | 1839 | trans->block_rsv = NULL; |
1840 | if (trans->qgroup_reserved) { | ||
1841 | btrfs_qgroup_free(root, trans->qgroup_reserved); | ||
1842 | trans->qgroup_reserved = 0; | ||
1843 | } | ||
1775 | btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); | 1844 | btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); |
1776 | // WARN_ON(1); | 1845 | // WARN_ON(1); |
1777 | if (current->journal_info == trans) | 1846 | if (current->journal_info == trans) |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 0e8aa1e6c287..5afd7b1dceac 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -43,6 +43,7 @@ struct btrfs_transaction { | |||
43 | wait_queue_head_t writer_wait; | 43 | wait_queue_head_t writer_wait; |
44 | wait_queue_head_t commit_wait; | 44 | wait_queue_head_t commit_wait; |
45 | struct list_head pending_snapshots; | 45 | struct list_head pending_snapshots; |
46 | struct list_head ordered_operations; | ||
46 | struct btrfs_delayed_ref_root delayed_refs; | 47 | struct btrfs_delayed_ref_root delayed_refs; |
47 | int aborted; | 48 | int aborted; |
48 | }; | 49 | }; |
@@ -68,6 +69,7 @@ struct btrfs_trans_handle { | |||
68 | struct btrfs_block_rsv *orig_rsv; | 69 | struct btrfs_block_rsv *orig_rsv; |
69 | short aborted; | 70 | short aborted; |
70 | short adding_csums; | 71 | short adding_csums; |
72 | bool allocating_chunk; | ||
71 | enum btrfs_trans_type type; | 73 | enum btrfs_trans_type type; |
72 | /* | 74 | /* |
73 | * this root is only needed to validate that the root passed to | 75 | * this root is only needed to validate that the root passed to |
@@ -110,13 +112,15 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush( | |||
110 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); | 112 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); |
111 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); | 113 | struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); |
112 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); | 114 | struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); |
115 | struct btrfs_trans_handle *btrfs_attach_transaction_barrier( | ||
116 | struct btrfs_root *root); | ||
113 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); | 117 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); |
114 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); | 118 | int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); |
115 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 119 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
116 | struct btrfs_root *root); | 120 | struct btrfs_root *root); |
117 | 121 | ||
118 | int btrfs_add_dead_root(struct btrfs_root *root); | 122 | int btrfs_add_dead_root(struct btrfs_root *root); |
119 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); | 123 | int btrfs_defrag_root(struct btrfs_root *root); |
120 | int btrfs_clean_old_snapshots(struct btrfs_root *root); | 124 | int btrfs_clean_old_snapshots(struct btrfs_root *root); |
121 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 125 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
122 | struct btrfs_root *root); | 126 | struct btrfs_root *root); |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 3b580ee8ab1d..94e05c1f118a 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -23,13 +23,14 @@ | |||
23 | #include "transaction.h" | 23 | #include "transaction.h" |
24 | #include "locking.h" | 24 | #include "locking.h" |
25 | 25 | ||
26 | /* defrag all the leaves in a given btree. If cache_only == 1, don't read | 26 | /* |
27 | * things from disk, otherwise read all the leaves and try to get key order to | 27 | * Defrag all the leaves in a given btree. |
28 | * Read all the leaves and try to get key order to | ||
28 | * better reflect disk order | 29 | * better reflect disk order |
29 | */ | 30 | */ |
30 | 31 | ||
31 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | 32 | int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, |
32 | struct btrfs_root *root, int cache_only) | 33 | struct btrfs_root *root) |
33 | { | 34 | { |
34 | struct btrfs_path *path = NULL; | 35 | struct btrfs_path *path = NULL; |
35 | struct btrfs_key key; | 36 | struct btrfs_key key; |
@@ -41,9 +42,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
41 | u64 last_ret = 0; | 42 | u64 last_ret = 0; |
42 | u64 min_trans = 0; | 43 | u64 min_trans = 0; |
43 | 44 | ||
44 | if (cache_only) | ||
45 | goto out; | ||
46 | |||
47 | if (root->fs_info->extent_root == root) { | 45 | if (root->fs_info->extent_root == root) { |
48 | /* | 46 | /* |
49 | * there's recursion here right now in the tree locking, | 47 | * there's recursion here right now in the tree locking, |
@@ -86,11 +84,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
86 | } | 84 | } |
87 | 85 | ||
88 | path->keep_locks = 1; | 86 | path->keep_locks = 1; |
89 | if (cache_only) | ||
90 | min_trans = root->defrag_trans_start; | ||
91 | 87 | ||
92 | ret = btrfs_search_forward(root, &key, NULL, path, | 88 | ret = btrfs_search_forward(root, &key, NULL, path, min_trans); |
93 | cache_only, min_trans); | ||
94 | if (ret < 0) | 89 | if (ret < 0) |
95 | goto out; | 90 | goto out; |
96 | if (ret > 0) { | 91 | if (ret > 0) { |
@@ -109,11 +104,11 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
109 | goto out; | 104 | goto out; |
110 | } | 105 | } |
111 | path->slots[1] = btrfs_header_nritems(path->nodes[1]); | 106 | path->slots[1] = btrfs_header_nritems(path->nodes[1]); |
112 | next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only, | 107 | next_key_ret = btrfs_find_next_key(root, path, &key, 1, |
113 | min_trans); | 108 | min_trans); |
114 | ret = btrfs_realloc_node(trans, root, | 109 | ret = btrfs_realloc_node(trans, root, |
115 | path->nodes[1], 0, | 110 | path->nodes[1], 0, |
116 | cache_only, &last_ret, | 111 | &last_ret, |
117 | &root->defrag_progress); | 112 | &root->defrag_progress); |
118 | if (ret) { | 113 | if (ret) { |
119 | WARN_ON(ret == -EAGAIN); | 114 | WARN_ON(ret == -EAGAIN); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9027bb1e7466..1a79087c4575 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -278,8 +278,7 @@ static int process_one_buffer(struct btrfs_root *log, | |||
278 | struct walk_control *wc, u64 gen) | 278 | struct walk_control *wc, u64 gen) |
279 | { | 279 | { |
280 | if (wc->pin) | 280 | if (wc->pin) |
281 | btrfs_pin_extent_for_log_replay(wc->trans, | 281 | btrfs_pin_extent_for_log_replay(log->fs_info->extent_root, |
282 | log->fs_info->extent_root, | ||
283 | eb->start, eb->len); | 282 | eb->start, eb->len); |
284 | 283 | ||
285 | if (btrfs_buffer_uptodate(eb, gen, 0)) { | 284 | if (btrfs_buffer_uptodate(eb, gen, 0)) { |
@@ -2281,6 +2280,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2281 | unsigned long log_transid = 0; | 2280 | unsigned long log_transid = 0; |
2282 | 2281 | ||
2283 | mutex_lock(&root->log_mutex); | 2282 | mutex_lock(&root->log_mutex); |
2283 | log_transid = root->log_transid; | ||
2284 | index1 = root->log_transid % 2; | 2284 | index1 = root->log_transid % 2; |
2285 | if (atomic_read(&root->log_commit[index1])) { | 2285 | if (atomic_read(&root->log_commit[index1])) { |
2286 | wait_log_commit(trans, root, root->log_transid); | 2286 | wait_log_commit(trans, root, root->log_transid); |
@@ -2308,11 +2308,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2308 | /* bail out if we need to do a full commit */ | 2308 | /* bail out if we need to do a full commit */ |
2309 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2309 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { |
2310 | ret = -EAGAIN; | 2310 | ret = -EAGAIN; |
2311 | btrfs_free_logged_extents(log, log_transid); | ||
2311 | mutex_unlock(&root->log_mutex); | 2312 | mutex_unlock(&root->log_mutex); |
2312 | goto out; | 2313 | goto out; |
2313 | } | 2314 | } |
2314 | 2315 | ||
2315 | log_transid = root->log_transid; | ||
2316 | if (log_transid % 2 == 0) | 2316 | if (log_transid % 2 == 0) |
2317 | mark = EXTENT_DIRTY; | 2317 | mark = EXTENT_DIRTY; |
2318 | else | 2318 | else |
@@ -2324,6 +2324,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2324 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); | 2324 | ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); |
2325 | if (ret) { | 2325 | if (ret) { |
2326 | btrfs_abort_transaction(trans, root, ret); | 2326 | btrfs_abort_transaction(trans, root, ret); |
2327 | btrfs_free_logged_extents(log, log_transid); | ||
2327 | mutex_unlock(&root->log_mutex); | 2328 | mutex_unlock(&root->log_mutex); |
2328 | goto out; | 2329 | goto out; |
2329 | } | 2330 | } |
@@ -2363,6 +2364,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2363 | } | 2364 | } |
2364 | root->fs_info->last_trans_log_full_commit = trans->transid; | 2365 | root->fs_info->last_trans_log_full_commit = trans->transid; |
2365 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2366 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2367 | btrfs_free_logged_extents(log, log_transid); | ||
2366 | mutex_unlock(&log_root_tree->log_mutex); | 2368 | mutex_unlock(&log_root_tree->log_mutex); |
2367 | ret = -EAGAIN; | 2369 | ret = -EAGAIN; |
2368 | goto out; | 2370 | goto out; |
@@ -2373,6 +2375,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2373 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2375 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2374 | wait_log_commit(trans, log_root_tree, | 2376 | wait_log_commit(trans, log_root_tree, |
2375 | log_root_tree->log_transid); | 2377 | log_root_tree->log_transid); |
2378 | btrfs_free_logged_extents(log, log_transid); | ||
2376 | mutex_unlock(&log_root_tree->log_mutex); | 2379 | mutex_unlock(&log_root_tree->log_mutex); |
2377 | ret = 0; | 2380 | ret = 0; |
2378 | goto out; | 2381 | goto out; |
@@ -2392,6 +2395,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2392 | */ | 2395 | */ |
2393 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2396 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { |
2394 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2397 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2398 | btrfs_free_logged_extents(log, log_transid); | ||
2395 | mutex_unlock(&log_root_tree->log_mutex); | 2399 | mutex_unlock(&log_root_tree->log_mutex); |
2396 | ret = -EAGAIN; | 2400 | ret = -EAGAIN; |
2397 | goto out_wake_log_root; | 2401 | goto out_wake_log_root; |
@@ -2402,10 +2406,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2402 | EXTENT_DIRTY | EXTENT_NEW); | 2406 | EXTENT_DIRTY | EXTENT_NEW); |
2403 | if (ret) { | 2407 | if (ret) { |
2404 | btrfs_abort_transaction(trans, root, ret); | 2408 | btrfs_abort_transaction(trans, root, ret); |
2409 | btrfs_free_logged_extents(log, log_transid); | ||
2405 | mutex_unlock(&log_root_tree->log_mutex); | 2410 | mutex_unlock(&log_root_tree->log_mutex); |
2406 | goto out_wake_log_root; | 2411 | goto out_wake_log_root; |
2407 | } | 2412 | } |
2408 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2413 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2414 | btrfs_wait_logged_extents(log, log_transid); | ||
2409 | 2415 | ||
2410 | btrfs_set_super_log_root(root->fs_info->super_for_commit, | 2416 | btrfs_set_super_log_root(root->fs_info->super_for_commit, |
2411 | log_root_tree->node->start); | 2417 | log_root_tree->node->start); |
@@ -2475,6 +2481,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans, | |||
2475 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | 2481 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
2476 | } | 2482 | } |
2477 | 2483 | ||
2484 | /* | ||
2485 | * We may have short-circuited the log tree with the full commit logic | ||
2486 | * and left ordered extents on our list, so clear these out to keep us | ||
2487 | * from leaking inodes and memory. | ||
2488 | */ | ||
2489 | btrfs_free_logged_extents(log, 0); | ||
2490 | btrfs_free_logged_extents(log, 1); | ||
2491 | |||
2478 | free_extent_buffer(log->node); | 2492 | free_extent_buffer(log->node); |
2479 | kfree(log); | 2493 | kfree(log); |
2480 | } | 2494 | } |
@@ -2724,7 +2738,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2724 | path->keep_locks = 1; | 2738 | path->keep_locks = 1; |
2725 | 2739 | ||
2726 | ret = btrfs_search_forward(root, &min_key, &max_key, | 2740 | ret = btrfs_search_forward(root, &min_key, &max_key, |
2727 | path, 0, trans->transid); | 2741 | path, trans->transid); |
2728 | 2742 | ||
2729 | /* | 2743 | /* |
2730 | * we didn't find anything from this transaction, see if there | 2744 | * we didn't find anything from this transaction, see if there |
@@ -3271,14 +3285,18 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3271 | struct btrfs_root *log = root->log_root; | 3285 | struct btrfs_root *log = root->log_root; |
3272 | struct btrfs_file_extent_item *fi; | 3286 | struct btrfs_file_extent_item *fi; |
3273 | struct extent_buffer *leaf; | 3287 | struct extent_buffer *leaf; |
3288 | struct btrfs_ordered_extent *ordered; | ||
3274 | struct list_head ordered_sums; | 3289 | struct list_head ordered_sums; |
3275 | struct btrfs_map_token token; | 3290 | struct btrfs_map_token token; |
3276 | struct btrfs_key key; | 3291 | struct btrfs_key key; |
3277 | u64 csum_offset = em->mod_start - em->start; | 3292 | u64 mod_start = em->mod_start; |
3278 | u64 csum_len = em->mod_len; | 3293 | u64 mod_len = em->mod_len; |
3294 | u64 csum_offset; | ||
3295 | u64 csum_len; | ||
3279 | u64 extent_offset = em->start - em->orig_start; | 3296 | u64 extent_offset = em->start - em->orig_start; |
3280 | u64 block_len; | 3297 | u64 block_len; |
3281 | int ret; | 3298 | int ret; |
3299 | int index = log->log_transid % 2; | ||
3282 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 3300 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
3283 | 3301 | ||
3284 | INIT_LIST_HEAD(&ordered_sums); | 3302 | INIT_LIST_HEAD(&ordered_sums); |
@@ -3362,6 +3380,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3362 | csum_len = block_len; | 3380 | csum_len = block_len; |
3363 | } | 3381 | } |
3364 | 3382 | ||
3383 | /* | ||
3384 | * First check and see if our csums are on our outstanding ordered | ||
3385 | * extents. | ||
3386 | */ | ||
3387 | again: | ||
3388 | spin_lock_irq(&log->log_extents_lock[index]); | ||
3389 | list_for_each_entry(ordered, &log->logged_list[index], log_list) { | ||
3390 | struct btrfs_ordered_sum *sum; | ||
3391 | |||
3392 | if (!mod_len) | ||
3393 | break; | ||
3394 | |||
3395 | if (ordered->inode != inode) | ||
3396 | continue; | ||
3397 | |||
3398 | if (ordered->file_offset + ordered->len <= mod_start || | ||
3399 | mod_start + mod_len <= ordered->file_offset) | ||
3400 | continue; | ||
3401 | |||
3402 | /* | ||
3403 | * We are going to copy all the csums on this ordered extent, so | ||
3404 | * go ahead and adjust mod_start and mod_len in case this | ||
3405 | * ordered extent has already been logged. | ||
3406 | */ | ||
3407 | if (ordered->file_offset > mod_start) { | ||
3408 | if (ordered->file_offset + ordered->len >= | ||
3409 | mod_start + mod_len) | ||
3410 | mod_len = ordered->file_offset - mod_start; | ||
3411 | /* | ||
3412 | * If we have this case | ||
3413 | * | ||
3414 | * |--------- logged extent ---------| | ||
3415 | * |----- ordered extent ----| | ||
3416 | * | ||
3417 | * Just don't mess with mod_start and mod_len, we'll | ||
3418 | * just end up logging more csums than we need and it | ||
3419 | * will be ok. | ||
3420 | */ | ||
3421 | } else { | ||
3422 | if (ordered->file_offset + ordered->len < | ||
3423 | mod_start + mod_len) { | ||
3424 | mod_len = (mod_start + mod_len) - | ||
3425 | (ordered->file_offset + ordered->len); | ||
3426 | mod_start = ordered->file_offset + | ||
3427 | ordered->len; | ||
3428 | } else { | ||
3429 | mod_len = 0; | ||
3430 | } | ||
3431 | } | ||
3432 | |||
3433 | /* | ||
3434 | * To keep us from looping for the above case of an ordered | ||
3435 | * extent that falls inside of the logged extent. | ||
3436 | */ | ||
3437 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, | ||
3438 | &ordered->flags)) | ||
3439 | continue; | ||
3440 | atomic_inc(&ordered->refs); | ||
3441 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
3442 | /* | ||
3443 | * we've dropped the lock, we must either break or | ||
3444 | * start over after this. | ||
3445 | */ | ||
3446 | |||
3447 | wait_event(ordered->wait, ordered->csum_bytes_left == 0); | ||
3448 | |||
3449 | list_for_each_entry(sum, &ordered->list, list) { | ||
3450 | ret = btrfs_csum_file_blocks(trans, log, sum); | ||
3451 | if (ret) { | ||
3452 | btrfs_put_ordered_extent(ordered); | ||
3453 | goto unlocked; | ||
3454 | } | ||
3455 | } | ||
3456 | btrfs_put_ordered_extent(ordered); | ||
3457 | goto again; | ||
3458 | |||
3459 | } | ||
3460 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
3461 | unlocked: | ||
3462 | |||
3463 | if (!mod_len || ret) | ||
3464 | return ret; | ||
3465 | |||
3466 | csum_offset = mod_start - em->start; | ||
3467 | csum_len = mod_len; | ||
3468 | |||
3365 | /* block start is already adjusted for the file extent offset. */ | 3469 | /* block start is already adjusted for the file extent offset. */ |
3366 | ret = btrfs_lookup_csums_range(log->fs_info->csum_root, | 3470 | ret = btrfs_lookup_csums_range(log->fs_info->csum_root, |
3367 | em->block_start + csum_offset, | 3471 | em->block_start + csum_offset, |
@@ -3393,6 +3497,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
3393 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; | 3497 | struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; |
3394 | u64 test_gen; | 3498 | u64 test_gen; |
3395 | int ret = 0; | 3499 | int ret = 0; |
3500 | int num = 0; | ||
3396 | 3501 | ||
3397 | INIT_LIST_HEAD(&extents); | 3502 | INIT_LIST_HEAD(&extents); |
3398 | 3503 | ||
@@ -3401,16 +3506,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | |||
3401 | 3506 | ||
3402 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) { | 3507 | list_for_each_entry_safe(em, n, &tree->modified_extents, list) { |
3403 | list_del_init(&em->list); | 3508 | list_del_init(&em->list); |
3509 | |||
3510 | /* | ||
3511 | * Just an arbitrary number, this can be really CPU intensive | ||
3512 | * once we start getting a lot of extents, and really once we | ||
3513 | * have a bunch of extents we just want to commit since it will | ||
3514 | * be faster. | ||
3515 | */ | ||
3516 | if (++num > 32768) { | ||
3517 | list_del_init(&tree->modified_extents); | ||
3518 | ret = -EFBIG; | ||
3519 | goto process; | ||
3520 | } | ||
3521 | |||
3404 | if (em->generation <= test_gen) | 3522 | if (em->generation <= test_gen) |
3405 | continue; | 3523 | continue; |
3406 | /* Need a ref to keep it from getting evicted from cache */ | 3524 | /* Need a ref to keep it from getting evicted from cache */ |
3407 | atomic_inc(&em->refs); | 3525 | atomic_inc(&em->refs); |
3408 | set_bit(EXTENT_FLAG_LOGGING, &em->flags); | 3526 | set_bit(EXTENT_FLAG_LOGGING, &em->flags); |
3409 | list_add_tail(&em->list, &extents); | 3527 | list_add_tail(&em->list, &extents); |
3528 | num++; | ||
3410 | } | 3529 | } |
3411 | 3530 | ||
3412 | list_sort(NULL, &extents, extent_cmp); | 3531 | list_sort(NULL, &extents, extent_cmp); |
3413 | 3532 | ||
3533 | process: | ||
3414 | while (!list_empty(&extents)) { | 3534 | while (!list_empty(&extents)) { |
3415 | em = list_entry(extents.next, struct extent_map, list); | 3535 | em = list_entry(extents.next, struct extent_map, list); |
3416 | 3536 | ||
@@ -3513,6 +3633,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3513 | 3633 | ||
3514 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3634 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
3515 | 3635 | ||
3636 | btrfs_get_logged_extents(log, inode); | ||
3637 | |||
3516 | /* | 3638 | /* |
3517 | * a brute force approach to making sure we get the most uptodate | 3639 | * a brute force approach to making sure we get the most uptodate |
3518 | * copies of everything. | 3640 | * copies of everything. |
@@ -3558,7 +3680,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3558 | while (1) { | 3680 | while (1) { |
3559 | ins_nr = 0; | 3681 | ins_nr = 0; |
3560 | ret = btrfs_search_forward(root, &min_key, &max_key, | 3682 | ret = btrfs_search_forward(root, &min_key, &max_key, |
3561 | path, 0, trans->transid); | 3683 | path, trans->transid); |
3562 | if (ret != 0) | 3684 | if (ret != 0) |
3563 | break; | 3685 | break; |
3564 | again: | 3686 | again: |
@@ -3656,6 +3778,8 @@ log_extents: | |||
3656 | BTRFS_I(inode)->logged_trans = trans->transid; | 3778 | BTRFS_I(inode)->logged_trans = trans->transid; |
3657 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 3779 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
3658 | out_unlock: | 3780 | out_unlock: |
3781 | if (err) | ||
3782 | btrfs_free_logged_extents(log, log->log_transid); | ||
3659 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 3783 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
3660 | 3784 | ||
3661 | btrfs_free_path(path); | 3785 | btrfs_free_path(path); |
@@ -3822,7 +3946,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
3822 | end_trans: | 3946 | end_trans: |
3823 | dput(old_parent); | 3947 | dput(old_parent); |
3824 | if (ret < 0) { | 3948 | if (ret < 0) { |
3825 | WARN_ON(ret != -ENOSPC); | ||
3826 | root->fs_info->last_trans_log_full_commit = trans->transid; | 3949 | root->fs_info->last_trans_log_full_commit = trans->transid; |
3827 | ret = 1; | 3950 | ret = 1; |
3828 | } | 3951 | } |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 5cbb7f4b1672..72b1cf1b2b5e 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -792,26 +792,76 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, | |||
792 | return ret; | 792 | return ret; |
793 | } | 793 | } |
794 | 794 | ||
795 | /* | ||
796 | * Look for a btrfs signature on a device. This may be called out of the mount path | ||
797 | * and we are not allowed to call set_blocksize during the scan. The superblock | ||
798 | * is read via pagecache | ||
799 | */ | ||
795 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | 800 | int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, |
796 | struct btrfs_fs_devices **fs_devices_ret) | 801 | struct btrfs_fs_devices **fs_devices_ret) |
797 | { | 802 | { |
798 | struct btrfs_super_block *disk_super; | 803 | struct btrfs_super_block *disk_super; |
799 | struct block_device *bdev; | 804 | struct block_device *bdev; |
800 | struct buffer_head *bh; | 805 | struct page *page; |
801 | int ret; | 806 | void *p; |
807 | int ret = -EINVAL; | ||
802 | u64 devid; | 808 | u64 devid; |
803 | u64 transid; | 809 | u64 transid; |
804 | u64 total_devices; | 810 | u64 total_devices; |
811 | u64 bytenr; | ||
812 | pgoff_t index; | ||
805 | 813 | ||
814 | /* | ||
815 | * we would like to check all the supers, but that would make | ||
816 | * a btrfs mount succeed after a mkfs from a different FS. | ||
817 | * So, we need to add a special mount option to scan for | ||
818 | * later supers, using BTRFS_SUPER_MIRROR_MAX instead | ||
819 | */ | ||
820 | bytenr = btrfs_sb_offset(0); | ||
806 | flags |= FMODE_EXCL; | 821 | flags |= FMODE_EXCL; |
807 | mutex_lock(&uuid_mutex); | 822 | mutex_lock(&uuid_mutex); |
808 | ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh); | 823 | |
809 | if (ret) | 824 | bdev = blkdev_get_by_path(path, flags, holder); |
825 | |||
826 | if (IS_ERR(bdev)) { | ||
827 | ret = PTR_ERR(bdev); | ||
828 | printk(KERN_INFO "btrfs: open %s failed\n", path); | ||
810 | goto error; | 829 | goto error; |
811 | disk_super = (struct btrfs_super_block *)bh->b_data; | 830 | } |
831 | |||
832 | /* make sure our super fits in the device */ | ||
833 | if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode)) | ||
834 | goto error_bdev_put; | ||
835 | |||
836 | /* make sure our super fits in the page */ | ||
837 | if (sizeof(*disk_super) > PAGE_CACHE_SIZE) | ||
838 | goto error_bdev_put; | ||
839 | |||
840 | /* make sure our super doesn't straddle pages on disk */ | ||
841 | index = bytenr >> PAGE_CACHE_SHIFT; | ||
842 | if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index) | ||
843 | goto error_bdev_put; | ||
844 | |||
845 | /* pull in the page with our super */ | ||
846 | page = read_cache_page_gfp(bdev->bd_inode->i_mapping, | ||
847 | index, GFP_NOFS); | ||
848 | |||
849 | if (IS_ERR_OR_NULL(page)) | ||
850 | goto error_bdev_put; | ||
851 | |||
852 | p = kmap(page); | ||
853 | |||
854 | /* align our pointer to the offset of the super block */ | ||
855 | disk_super = p + (bytenr & ~PAGE_CACHE_MASK); | ||
856 | |||
857 | if (btrfs_super_bytenr(disk_super) != bytenr || | ||
858 | disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) | ||
859 | goto error_unmap; | ||
860 | |||
812 | devid = btrfs_stack_device_id(&disk_super->dev_item); | 861 | devid = btrfs_stack_device_id(&disk_super->dev_item); |
813 | transid = btrfs_super_generation(disk_super); | 862 | transid = btrfs_super_generation(disk_super); |
814 | total_devices = btrfs_super_num_devices(disk_super); | 863 | total_devices = btrfs_super_num_devices(disk_super); |
864 | |||
815 | if (disk_super->label[0]) { | 865 | if (disk_super->label[0]) { |
816 | if (disk_super->label[BTRFS_LABEL_SIZE - 1]) | 866 | if (disk_super->label[BTRFS_LABEL_SIZE - 1]) |
817 | disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; | 867 | disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; |
@@ -819,12 +869,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, | |||
819 | } else { | 869 | } else { |
820 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); | 870 | printk(KERN_INFO "device fsid %pU ", disk_super->fsid); |
821 | } | 871 | } |
872 | |||
822 | printk(KERN_CONT "devid %llu transid %llu %s\n", | 873 | printk(KERN_CONT "devid %llu transid %llu %s\n", |
823 | (unsigned long long)devid, (unsigned long long)transid, path); | 874 | (unsigned long long)devid, (unsigned long long)transid, path); |
875 | |||
824 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); | 876 | ret = device_list_add(path, disk_super, devid, fs_devices_ret); |
825 | if (!ret && fs_devices_ret) | 877 | if (!ret && fs_devices_ret) |
826 | (*fs_devices_ret)->total_devices = total_devices; | 878 | (*fs_devices_ret)->total_devices = total_devices; |
827 | brelse(bh); | 879 | |
880 | error_unmap: | ||
881 | kunmap(page); | ||
882 | page_cache_release(page); | ||
883 | |||
884 | error_bdev_put: | ||
828 | blkdev_put(bdev, flags); | 885 | blkdev_put(bdev, flags); |
829 | error: | 886 | error: |
830 | mutex_unlock(&uuid_mutex); | 887 | mutex_unlock(&uuid_mutex); |
@@ -1372,14 +1429,19 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) | |||
1372 | u64 devid; | 1429 | u64 devid; |
1373 | u64 num_devices; | 1430 | u64 num_devices; |
1374 | u8 *dev_uuid; | 1431 | u8 *dev_uuid; |
1432 | unsigned seq; | ||
1375 | int ret = 0; | 1433 | int ret = 0; |
1376 | bool clear_super = false; | 1434 | bool clear_super = false; |
1377 | 1435 | ||
1378 | mutex_lock(&uuid_mutex); | 1436 | mutex_lock(&uuid_mutex); |
1379 | 1437 | ||
1380 | all_avail = root->fs_info->avail_data_alloc_bits | | 1438 | do { |
1381 | root->fs_info->avail_system_alloc_bits | | 1439 | seq = read_seqbegin(&root->fs_info->profiles_lock); |
1382 | root->fs_info->avail_metadata_alloc_bits; | 1440 | |
1441 | all_avail = root->fs_info->avail_data_alloc_bits | | ||
1442 | root->fs_info->avail_system_alloc_bits | | ||
1443 | root->fs_info->avail_metadata_alloc_bits; | ||
1444 | } while (read_seqretry(&root->fs_info->profiles_lock, seq)); | ||
1383 | 1445 | ||
1384 | num_devices = root->fs_info->fs_devices->num_devices; | 1446 | num_devices = root->fs_info->fs_devices->num_devices; |
1385 | btrfs_dev_replace_lock(&root->fs_info->dev_replace); | 1447 | btrfs_dev_replace_lock(&root->fs_info->dev_replace); |
@@ -2616,7 +2678,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, | |||
2616 | chunk_used = btrfs_block_group_used(&cache->item); | 2678 | chunk_used = btrfs_block_group_used(&cache->item); |
2617 | 2679 | ||
2618 | if (bargs->usage == 0) | 2680 | if (bargs->usage == 0) |
2619 | user_thresh = 0; | 2681 | user_thresh = 1; |
2620 | else if (bargs->usage > 100) | 2682 | else if (bargs->usage > 100) |
2621 | user_thresh = cache->key.offset; | 2683 | user_thresh = cache->key.offset; |
2622 | else | 2684 | else |
@@ -2985,6 +3047,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
2985 | int mixed = 0; | 3047 | int mixed = 0; |
2986 | int ret; | 3048 | int ret; |
2987 | u64 num_devices; | 3049 | u64 num_devices; |
3050 | unsigned seq; | ||
2988 | 3051 | ||
2989 | if (btrfs_fs_closing(fs_info) || | 3052 | if (btrfs_fs_closing(fs_info) || |
2990 | atomic_read(&fs_info->balance_pause_req) || | 3053 | atomic_read(&fs_info->balance_pause_req) || |
@@ -3068,22 +3131,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
3068 | /* allow to reduce meta or sys integrity only if force set */ | 3131 | /* allow to reduce meta or sys integrity only if force set */ |
3069 | allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | 3132 | allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | |
3070 | BTRFS_BLOCK_GROUP_RAID10; | 3133 | BTRFS_BLOCK_GROUP_RAID10; |
3071 | if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && | 3134 | do { |
3072 | (fs_info->avail_system_alloc_bits & allowed) && | 3135 | seq = read_seqbegin(&fs_info->profiles_lock); |
3073 | !(bctl->sys.target & allowed)) || | 3136 | |
3074 | ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && | 3137 | if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && |
3075 | (fs_info->avail_metadata_alloc_bits & allowed) && | 3138 | (fs_info->avail_system_alloc_bits & allowed) && |
3076 | !(bctl->meta.target & allowed))) { | 3139 | !(bctl->sys.target & allowed)) || |
3077 | if (bctl->flags & BTRFS_BALANCE_FORCE) { | 3140 | ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && |
3078 | printk(KERN_INFO "btrfs: force reducing metadata " | 3141 | (fs_info->avail_metadata_alloc_bits & allowed) && |
3079 | "integrity\n"); | 3142 | !(bctl->meta.target & allowed))) { |
3080 | } else { | 3143 | if (bctl->flags & BTRFS_BALANCE_FORCE) { |
3081 | printk(KERN_ERR "btrfs: balance will reduce metadata " | 3144 | printk(KERN_INFO "btrfs: force reducing metadata " |
3082 | "integrity, use force if you want this\n"); | 3145 | "integrity\n"); |
3083 | ret = -EINVAL; | 3146 | } else { |
3084 | goto out; | 3147 | printk(KERN_ERR "btrfs: balance will reduce metadata " |
3148 | "integrity, use force if you want this\n"); | ||
3149 | ret = -EINVAL; | ||
3150 | goto out; | ||
3151 | } | ||
3085 | } | 3152 | } |
3086 | } | 3153 | } while (read_seqretry(&fs_info->profiles_lock, seq)); |
3087 | 3154 | ||
3088 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | 3155 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { |
3089 | int num_tolerated_disk_barrier_failures; | 3156 | int num_tolerated_disk_barrier_failures; |
@@ -3127,6 +3194,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
3127 | mutex_lock(&fs_info->balance_mutex); | 3194 | mutex_lock(&fs_info->balance_mutex); |
3128 | atomic_dec(&fs_info->balance_running); | 3195 | atomic_dec(&fs_info->balance_running); |
3129 | 3196 | ||
3197 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
3198 | fs_info->num_tolerated_disk_barrier_failures = | ||
3199 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
3200 | } | ||
3201 | |||
3130 | if (bargs) { | 3202 | if (bargs) { |
3131 | memset(bargs, 0, sizeof(*bargs)); | 3203 | memset(bargs, 0, sizeof(*bargs)); |
3132 | update_ioctl_balance_args(fs_info, 0, bargs); | 3204 | update_ioctl_balance_args(fs_info, 0, bargs); |
@@ -3137,11 +3209,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl, | |||
3137 | __cancel_balance(fs_info); | 3209 | __cancel_balance(fs_info); |
3138 | } | 3210 | } |
3139 | 3211 | ||
3140 | if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { | ||
3141 | fs_info->num_tolerated_disk_barrier_failures = | ||
3142 | btrfs_calc_num_tolerated_disk_barrier_failures(fs_info); | ||
3143 | } | ||
3144 | |||
3145 | wake_up(&fs_info->balance_wait_q); | 3212 | wake_up(&fs_info->balance_wait_q); |
3146 | 3213 | ||
3147 | return ret; | 3214 | return ret; |
@@ -3504,13 +3571,48 @@ static int btrfs_cmp_device_info(const void *a, const void *b) | |||
3504 | } | 3571 | } |
3505 | 3572 | ||
3506 | struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { | 3573 | struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { |
3507 | { 2, 1, 0, 4, 2, 2 /* raid10 */ }, | 3574 | [BTRFS_RAID_RAID10] = { |
3508 | { 1, 1, 2, 2, 2, 2 /* raid1 */ }, | 3575 | .sub_stripes = 2, |
3509 | { 1, 2, 1, 1, 1, 2 /* dup */ }, | 3576 | .dev_stripes = 1, |
3510 | { 1, 1, 0, 2, 1, 1 /* raid0 */ }, | 3577 | .devs_max = 0, /* 0 == as many as possible */ |
3511 | { 1, 1, 1, 1, 1, 1 /* single */ }, | 3578 | .devs_min = 4, |
3579 | .devs_increment = 2, | ||
3580 | .ncopies = 2, | ||
3581 | }, | ||
3582 | [BTRFS_RAID_RAID1] = { | ||
3583 | .sub_stripes = 1, | ||
3584 | .dev_stripes = 1, | ||
3585 | .devs_max = 2, | ||
3586 | .devs_min = 2, | ||
3587 | .devs_increment = 2, | ||
3588 | .ncopies = 2, | ||
3589 | }, | ||
3590 | [BTRFS_RAID_DUP] = { | ||
3591 | .sub_stripes = 1, | ||
3592 | .dev_stripes = 2, | ||
3593 | .devs_max = 1, | ||
3594 | .devs_min = 1, | ||
3595 | .devs_increment = 1, | ||
3596 | .ncopies = 2, | ||
3597 | }, | ||
3598 | [BTRFS_RAID_RAID0] = { | ||
3599 | .sub_stripes = 1, | ||
3600 | .dev_stripes = 1, | ||
3601 | .devs_max = 0, | ||
3602 | .devs_min = 2, | ||
3603 | .devs_increment = 1, | ||
3604 | .ncopies = 1, | ||
3605 | }, | ||
3606 | [BTRFS_RAID_SINGLE] = { | ||
3607 | .sub_stripes = 1, | ||
3608 | .dev_stripes = 1, | ||
3609 | .devs_max = 1, | ||
3610 | .devs_min = 1, | ||
3611 | .devs_increment = 1, | ||
3612 | .ncopies = 1, | ||
3613 | }, | ||
3512 | }; | 3614 | }; |
3513 | 3615 | ||
3514 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | 3616 | static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, |
3515 | struct btrfs_root *extent_root, | 3617 | struct btrfs_root *extent_root, |
3516 | struct map_lookup **map_ret, | 3618 | struct map_lookup **map_ret, |
@@ -3631,12 +3733,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3631 | if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) | 3733 | if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) |
3632 | continue; | 3734 | continue; |
3633 | 3735 | ||
3736 | if (ndevs == fs_devices->rw_devices) { | ||
3737 | WARN(1, "%s: found more than %llu devices\n", | ||
3738 | __func__, fs_devices->rw_devices); | ||
3739 | break; | ||
3740 | } | ||
3634 | devices_info[ndevs].dev_offset = dev_offset; | 3741 | devices_info[ndevs].dev_offset = dev_offset; |
3635 | devices_info[ndevs].max_avail = max_avail; | 3742 | devices_info[ndevs].max_avail = max_avail; |
3636 | devices_info[ndevs].total_avail = total_avail; | 3743 | devices_info[ndevs].total_avail = total_avail; |
3637 | devices_info[ndevs].dev = device; | 3744 | devices_info[ndevs].dev = device; |
3638 | ++ndevs; | 3745 | ++ndevs; |
3639 | WARN_ON(ndevs > fs_devices->rw_devices); | ||
3640 | } | 3746 | } |
3641 | 3747 | ||
3642 | /* | 3748 | /* |
@@ -3718,15 +3824,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3718 | write_lock(&em_tree->lock); | 3824 | write_lock(&em_tree->lock); |
3719 | ret = add_extent_mapping(em_tree, em); | 3825 | ret = add_extent_mapping(em_tree, em); |
3720 | write_unlock(&em_tree->lock); | 3826 | write_unlock(&em_tree->lock); |
3721 | free_extent_map(em); | 3827 | if (ret) { |
3722 | if (ret) | 3828 | free_extent_map(em); |
3723 | goto error; | ||
3724 | |||
3725 | ret = btrfs_make_block_group(trans, extent_root, 0, type, | ||
3726 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | ||
3727 | start, num_bytes); | ||
3728 | if (ret) | ||
3729 | goto error; | 3829 | goto error; |
3830 | } | ||
3730 | 3831 | ||
3731 | for (i = 0; i < map->num_stripes; ++i) { | 3832 | for (i = 0; i < map->num_stripes; ++i) { |
3732 | struct btrfs_device *device; | 3833 | struct btrfs_device *device; |
@@ -3739,15 +3840,42 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, | |||
3739 | info->chunk_root->root_key.objectid, | 3840 | info->chunk_root->root_key.objectid, |
3740 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | 3841 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, |
3741 | start, dev_offset, stripe_size); | 3842 | start, dev_offset, stripe_size); |
3742 | if (ret) { | 3843 | if (ret) |
3743 | btrfs_abort_transaction(trans, extent_root, ret); | 3844 | goto error_dev_extent; |
3744 | goto error; | ||
3745 | } | ||
3746 | } | 3845 | } |
3747 | 3846 | ||
3847 | ret = btrfs_make_block_group(trans, extent_root, 0, type, | ||
3848 | BTRFS_FIRST_CHUNK_TREE_OBJECTID, | ||
3849 | start, num_bytes); | ||
3850 | if (ret) { | ||
3851 | i = map->num_stripes - 1; | ||
3852 | goto error_dev_extent; | ||
3853 | } | ||
3854 | |||
3855 | free_extent_map(em); | ||
3748 | kfree(devices_info); | 3856 | kfree(devices_info); |
3749 | return 0; | 3857 | return 0; |
3750 | 3858 | ||
3859 | error_dev_extent: | ||
3860 | for (; i >= 0; i--) { | ||
3861 | struct btrfs_device *device; | ||
3862 | int err; | ||
3863 | |||
3864 | device = map->stripes[i].dev; | ||
3865 | err = btrfs_free_dev_extent(trans, device, start); | ||
3866 | if (err) { | ||
3867 | btrfs_abort_transaction(trans, extent_root, err); | ||
3868 | break; | ||
3869 | } | ||
3870 | } | ||
3871 | write_lock(&em_tree->lock); | ||
3872 | remove_extent_mapping(em_tree, em); | ||
3873 | write_unlock(&em_tree->lock); | ||
3874 | |||
3875 | /* One for our allocation */ | ||
3876 | free_extent_map(em); | ||
3877 | /* One for the tree reference */ | ||
3878 | free_extent_map(em); | ||
3751 | error: | 3879 | error: |
3752 | kfree(map); | 3880 | kfree(map); |
3753 | kfree(devices_info); | 3881 | kfree(devices_info); |
@@ -3887,10 +4015,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
3887 | if (ret) | 4015 | if (ret) |
3888 | return ret; | 4016 | return ret; |
3889 | 4017 | ||
3890 | alloc_profile = BTRFS_BLOCK_GROUP_METADATA | | 4018 | alloc_profile = btrfs_get_alloc_profile(extent_root, 0); |
3891 | fs_info->avail_metadata_alloc_bits; | ||
3892 | alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); | ||
3893 | |||
3894 | ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, | 4019 | ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, |
3895 | &stripe_size, chunk_offset, alloc_profile); | 4020 | &stripe_size, chunk_offset, alloc_profile); |
3896 | if (ret) | 4021 | if (ret) |
@@ -3898,10 +4023,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans, | |||
3898 | 4023 | ||
3899 | sys_chunk_offset = chunk_offset + chunk_size; | 4024 | sys_chunk_offset = chunk_offset + chunk_size; |
3900 | 4025 | ||
3901 | alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM | | 4026 | alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0); |
3902 | fs_info->avail_system_alloc_bits; | ||
3903 | alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile); | ||
3904 | |||
3905 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, | 4027 | ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, |
3906 | &sys_chunk_size, &sys_stripe_size, | 4028 | &sys_chunk_size, &sys_stripe_size, |
3907 | sys_chunk_offset, alloc_profile); | 4029 | sys_chunk_offset, alloc_profile); |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index d3c3939ac751..12bb84166a5f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -21,8 +21,8 @@ | |||
21 | 21 | ||
22 | #include <linux/bio.h> | 22 | #include <linux/bio.h> |
23 | #include <linux/sort.h> | 23 | #include <linux/sort.h> |
24 | #include <linux/btrfs.h> | ||
24 | #include "async-thread.h" | 25 | #include "async-thread.h" |
25 | #include "ioctl.h" | ||
26 | 26 | ||
27 | #define BTRFS_STRIPE_LEN (64 * 1024) | 27 | #define BTRFS_STRIPE_LEN (64 * 1024) |
28 | 28 | ||
diff --git a/include/linux/btrfs.h b/include/linux/btrfs.h new file mode 100644 index 000000000000..22d799147db2 --- /dev/null +++ b/include/linux/btrfs.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef _LINUX_BTRFS_H | ||
2 | #define _LINUX_BTRFS_H | ||
3 | |||
4 | #include <uapi/linux/btrfs.h> | ||
5 | |||
6 | #endif /* _LINUX_BTRFS_H */ | ||
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 4e67194fd2c3..5c8a1d25e21c 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild | |||
@@ -68,6 +68,7 @@ header-y += blkpg.h | |||
68 | header-y += blktrace_api.h | 68 | header-y += blktrace_api.h |
69 | header-y += bpqether.h | 69 | header-y += bpqether.h |
70 | header-y += bsg.h | 70 | header-y += bsg.h |
71 | header-y += btrfs.h | ||
71 | header-y += can.h | 72 | header-y += can.h |
72 | header-y += capability.h | 73 | header-y += capability.h |
73 | header-y += capi.h | 74 | header-y += capi.h |
diff --git a/fs/btrfs/ioctl.h b/include/uapi/linux/btrfs.h index dabca9cc8c2e..fa3a5f9338fc 100644 --- a/fs/btrfs/ioctl.h +++ b/include/uapi/linux/btrfs.h | |||
@@ -16,8 +16,9 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #ifndef __IOCTL_ | 19 | #ifndef _UAPI_LINUX_BTRFS_H |
20 | #define __IOCTL_ | 20 | #define _UAPI_LINUX_BTRFS_H |
21 | #include <linux/types.h> | ||
21 | #include <linux/ioctl.h> | 22 | #include <linux/ioctl.h> |
22 | 23 | ||
23 | #define BTRFS_IOCTL_MAGIC 0x94 | 24 | #define BTRFS_IOCTL_MAGIC 0x94 |
@@ -406,6 +407,13 @@ struct btrfs_ioctl_received_subvol_args { | |||
406 | __u64 reserved[16]; /* in */ | 407 | __u64 reserved[16]; /* in */ |
407 | }; | 408 | }; |
408 | 409 | ||
410 | /* | ||
411 | * Caller doesn't want file data in the send stream, even if the | ||
412 | * search of clone sources doesn't find an extent. UPDATE_EXTENT | ||
413 | * commands will be sent instead of WRITE commands. | ||
414 | */ | ||
415 | #define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1 | ||
416 | |||
409 | struct btrfs_ioctl_send_args { | 417 | struct btrfs_ioctl_send_args { |
410 | __s64 send_fd; /* in */ | 418 | __s64 send_fd; /* in */ |
411 | __u64 clone_sources_count; /* in */ | 419 | __u64 clone_sources_count; /* in */ |
@@ -494,9 +502,13 @@ struct btrfs_ioctl_send_args { | |||
494 | struct btrfs_ioctl_qgroup_create_args) | 502 | struct btrfs_ioctl_qgroup_create_args) |
495 | #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ | 503 | #define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ |
496 | struct btrfs_ioctl_qgroup_limit_args) | 504 | struct btrfs_ioctl_qgroup_limit_args) |
505 | #define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \ | ||
506 | char[BTRFS_LABEL_SIZE]) | ||
507 | #define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \ | ||
508 | char[BTRFS_LABEL_SIZE]) | ||
497 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ | 509 | #define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ |
498 | struct btrfs_ioctl_get_dev_stats) | 510 | struct btrfs_ioctl_get_dev_stats) |
499 | #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ | 511 | #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ |
500 | struct btrfs_ioctl_dev_replace_args) | 512 | struct btrfs_ioctl_dev_replace_args) |
501 | 513 | ||
502 | #endif | 514 | #endif /* _UAPI_LINUX_BTRFS_H */ |