aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChris Mason <chris.mason@fusionio.com>2013-02-20 14:05:45 -0500
committerChris Mason <chris.mason@fusionio.com>2013-02-20 14:05:45 -0500
commitb2c6b3e0611c58fbeb6b9c0892b6249f7bdfaf6b (patch)
treede7cf0825605aa6acf33a8d107003efd7aedbe72
parent19f949f52599ba7c3f67a5897ac6be14bfcb1200 (diff)
parent272d26d0ad8c0e326689f2fa3cdc6a5fcc8e74e0 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/josef/btrfs-next into for-linus-3.9
Signed-off-by: Chris Mason <chris.mason@fusionio.com> Conflicts: fs/btrfs/disk-io.c
-rw-r--r--fs/btrfs/backref.h2
-rw-r--r--fs/btrfs/btrfs_inode.h20
-rw-r--r--fs/btrfs/check-integrity.c3
-rw-r--r--fs/btrfs/ctree.c68
-rw-r--r--fs/btrfs/ctree.h95
-rw-r--r--fs/btrfs/delayed-inode.c147
-rw-r--r--fs/btrfs/delayed-inode.h1
-rw-r--r--fs/btrfs/delayed-ref.c82
-rw-r--r--fs/btrfs/delayed-ref.h43
-rw-r--r--fs/btrfs/dev-replace.c6
-rw-r--r--fs/btrfs/disk-io.c160
-rw-r--r--fs/btrfs/extent-tree.c284
-rw-r--r--fs/btrfs/extent_io.c68
-rw-r--r--fs/btrfs/extent_io.h3
-rw-r--r--fs/btrfs/file-item.c67
-rw-r--r--fs/btrfs/file.c49
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/inode.c321
-rw-r--r--fs/btrfs/ioctl.c120
-rw-r--r--fs/btrfs/locking.c5
-rw-r--r--fs/btrfs/ordered-data.c98
-rw-r--r--fs/btrfs/ordered-data.h14
-rw-r--r--fs/btrfs/print-tree.c1
-rw-r--r--fs/btrfs/qgroup.c6
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/scrub.c2
-rw-r--r--fs/btrfs/send.c53
-rw-r--r--fs/btrfs/send.h1
-rw-r--r--fs/btrfs/super.c49
-rw-r--r--fs/btrfs/transaction.c123
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/tree-defrag.c19
-rw-r--r--fs/btrfs/tree-log.c139
-rw-r--r--fs/btrfs/volumes.c236
-rw-r--r--fs/btrfs/volumes.h2
-rw-r--r--include/linux/btrfs.h6
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/btrfs.h (renamed from fs/btrfs/ioctl.h)18
38 files changed, 1630 insertions, 702 deletions
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index d61feca79455..310a7f6d09b1 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -19,7 +19,7 @@
19#ifndef __BTRFS_BACKREF__ 19#ifndef __BTRFS_BACKREF__
20#define __BTRFS_BACKREF__ 20#define __BTRFS_BACKREF__
21 21
22#include "ioctl.h" 22#include <linux/btrfs.h>
23#include "ulist.h" 23#include "ulist.h"
24#include "extent_io.h" 24#include "extent_io.h"
25 25
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 2a8c242bc4f5..d9b97d4960e6 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -40,6 +40,8 @@
40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6 40#define BTRFS_INODE_HAS_ASYNC_EXTENT 6
41#define BTRFS_INODE_NEEDS_FULL_SYNC 7 41#define BTRFS_INODE_NEEDS_FULL_SYNC 7
42#define BTRFS_INODE_COPY_EVERYTHING 8 42#define BTRFS_INODE_COPY_EVERYTHING 8
43#define BTRFS_INODE_IN_DELALLOC_LIST 9
44#define BTRFS_INODE_READDIO_NEED_LOCK 10
43 45
44/* in memory btrfs inode */ 46/* in memory btrfs inode */
45struct btrfs_inode { 47struct btrfs_inode {
@@ -216,4 +218,22 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
216 return 0; 218 return 0;
217} 219}
218 220
221/*
222 * Disable DIO read nolock optimization, so new dio readers will be forced
223 * to grab i_mutex. It is used to avoid the endless truncate due to
224 * nonlocked dio read.
225 */
226static inline void btrfs_inode_block_unlocked_dio(struct inode *inode)
227{
228 set_bit(BTRFS_INODE_READDIO_NEED_LOCK, &BTRFS_I(inode)->runtime_flags);
229 smp_mb();
230}
231
232static inline void btrfs_inode_resume_unlocked_dio(struct inode *inode)
233{
234 smp_mb__before_clear_bit();
235 clear_bit(BTRFS_INODE_READDIO_NEED_LOCK,
236 &BTRFS_I(inode)->runtime_flags);
237}
238
219#endif 239#endif
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 11d47bfb62b4..18af6f48781a 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -813,8 +813,7 @@ static int btrfsic_process_superblock_dev_mirror(
813 (bh->b_data + (dev_bytenr & 4095)); 813 (bh->b_data + (dev_bytenr & 4095));
814 814
815 if (btrfs_super_bytenr(super_tmp) != dev_bytenr || 815 if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
816 strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC, 816 super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) ||
817 sizeof(super_tmp->magic)) ||
818 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) || 817 memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
819 btrfs_super_nodesize(super_tmp) != state->metablock_size || 818 btrfs_super_nodesize(super_tmp) != state->metablock_size ||
820 btrfs_super_leafsize(super_tmp) != state->metablock_size || 819 btrfs_super_leafsize(super_tmp) != state->metablock_size ||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index eea5da7a2b9a..ecd25a1b4e51 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1138,6 +1138,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
1138 switch (tm->op) { 1138 switch (tm->op) {
1139 case MOD_LOG_KEY_REMOVE_WHILE_FREEING: 1139 case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
1140 BUG_ON(tm->slot < n); 1140 BUG_ON(tm->slot < n);
1141 /* Fallthrough */
1141 case MOD_LOG_KEY_REMOVE_WHILE_MOVING: 1142 case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
1142 case MOD_LOG_KEY_REMOVE: 1143 case MOD_LOG_KEY_REMOVE:
1143 btrfs_set_node_key(eb, &tm->key, tm->slot); 1144 btrfs_set_node_key(eb, &tm->key, tm->slot);
@@ -1222,7 +1223,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
1222 1223
1223 __tree_mod_log_rewind(eb_rewin, time_seq, tm); 1224 __tree_mod_log_rewind(eb_rewin, time_seq, tm);
1224 WARN_ON(btrfs_header_nritems(eb_rewin) > 1225 WARN_ON(btrfs_header_nritems(eb_rewin) >
1225 BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root)); 1226 BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
1226 1227
1227 return eb_rewin; 1228 return eb_rewin;
1228} 1229}
@@ -1441,7 +1442,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
1441 */ 1442 */
1442int btrfs_realloc_node(struct btrfs_trans_handle *trans, 1443int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1443 struct btrfs_root *root, struct extent_buffer *parent, 1444 struct btrfs_root *root, struct extent_buffer *parent,
1444 int start_slot, int cache_only, u64 *last_ret, 1445 int start_slot, u64 *last_ret,
1445 struct btrfs_key *progress) 1446 struct btrfs_key *progress)
1446{ 1447{
1447 struct extent_buffer *cur; 1448 struct extent_buffer *cur;
@@ -1461,8 +1462,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1461 struct btrfs_disk_key disk_key; 1462 struct btrfs_disk_key disk_key;
1462 1463
1463 parent_level = btrfs_header_level(parent); 1464 parent_level = btrfs_header_level(parent);
1464 if (cache_only && parent_level != 1)
1465 return 0;
1466 1465
1467 WARN_ON(trans->transaction != root->fs_info->running_transaction); 1466 WARN_ON(trans->transaction != root->fs_info->running_transaction);
1468 WARN_ON(trans->transid != root->fs_info->generation); 1467 WARN_ON(trans->transid != root->fs_info->generation);
@@ -1508,10 +1507,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
1508 else 1507 else
1509 uptodate = 0; 1508 uptodate = 0;
1510 if (!cur || !uptodate) { 1509 if (!cur || !uptodate) {
1511 if (cache_only) {
1512 free_extent_buffer(cur);
1513 continue;
1514 }
1515 if (!cur) { 1510 if (!cur) {
1516 cur = read_tree_block(root, blocknr, 1511 cur = read_tree_block(root, blocknr,
1517 blocksize, gen); 1512 blocksize, gen);
@@ -4825,8 +4820,8 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4825 4820
4826/* 4821/*
4827 * A helper function to walk down the tree starting at min_key, and looking 4822 * A helper function to walk down the tree starting at min_key, and looking
4828 * for nodes or leaves that are either in cache or have a minimum 4823 * for nodes or leaves that are have a minimum transaction id.
4829 * transaction id. This is used by the btree defrag code, and tree logging 4824 * This is used by the btree defrag code, and tree logging
4830 * 4825 *
4831 * This does not cow, but it does stuff the starting key it finds back 4826 * This does not cow, but it does stuff the starting key it finds back
4832 * into min_key, so you can call btrfs_search_slot with cow=1 on the 4827 * into min_key, so you can call btrfs_search_slot with cow=1 on the
@@ -4847,7 +4842,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
4847 */ 4842 */
4848int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 4843int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
4849 struct btrfs_key *max_key, 4844 struct btrfs_key *max_key,
4850 struct btrfs_path *path, int cache_only, 4845 struct btrfs_path *path,
4851 u64 min_trans) 4846 u64 min_trans)
4852{ 4847{
4853 struct extent_buffer *cur; 4848 struct extent_buffer *cur;
@@ -4887,15 +4882,12 @@ again:
4887 if (sret && slot > 0) 4882 if (sret && slot > 0)
4888 slot--; 4883 slot--;
4889 /* 4884 /*
4890 * check this node pointer against the cache_only and 4885 * check this node pointer against the min_trans parameters.
4891 * min_trans parameters. If it isn't in cache or is too 4886 * If it is too old, old, skip to the next one.
4892 * old, skip to the next one.
4893 */ 4887 */
4894 while (slot < nritems) { 4888 while (slot < nritems) {
4895 u64 blockptr; 4889 u64 blockptr;
4896 u64 gen; 4890 u64 gen;
4897 struct extent_buffer *tmp;
4898 struct btrfs_disk_key disk_key;
4899 4891
4900 blockptr = btrfs_node_blockptr(cur, slot); 4892 blockptr = btrfs_node_blockptr(cur, slot);
4901 gen = btrfs_node_ptr_generation(cur, slot); 4893 gen = btrfs_node_ptr_generation(cur, slot);
@@ -4903,27 +4895,7 @@ again:
4903 slot++; 4895 slot++;
4904 continue; 4896 continue;
4905 } 4897 }
4906 if (!cache_only) 4898 break;
4907 break;
4908
4909 if (max_key) {
4910 btrfs_node_key(cur, &disk_key, slot);
4911 if (comp_keys(&disk_key, max_key) >= 0) {
4912 ret = 1;
4913 goto out;
4914 }
4915 }
4916
4917 tmp = btrfs_find_tree_block(root, blockptr,
4918 btrfs_level_size(root, level - 1));
4919
4920 if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
4921 free_extent_buffer(tmp);
4922 break;
4923 }
4924 if (tmp)
4925 free_extent_buffer(tmp);
4926 slot++;
4927 } 4899 }
4928find_next_key: 4900find_next_key:
4929 /* 4901 /*
@@ -4934,7 +4906,7 @@ find_next_key:
4934 path->slots[level] = slot; 4906 path->slots[level] = slot;
4935 btrfs_set_path_blocking(path); 4907 btrfs_set_path_blocking(path);
4936 sret = btrfs_find_next_key(root, path, min_key, level, 4908 sret = btrfs_find_next_key(root, path, min_key, level,
4937 cache_only, min_trans); 4909 min_trans);
4938 if (sret == 0) { 4910 if (sret == 0) {
4939 btrfs_release_path(path); 4911 btrfs_release_path(path);
4940 goto again; 4912 goto again;
@@ -5399,8 +5371,7 @@ out:
5399/* 5371/*
5400 * this is similar to btrfs_next_leaf, but does not try to preserve 5372 * this is similar to btrfs_next_leaf, but does not try to preserve
5401 * and fixup the path. It looks for and returns the next key in the 5373 * and fixup the path. It looks for and returns the next key in the
5402 * tree based on the current path and the cache_only and min_trans 5374 * tree based on the current path and the min_trans parameters.
5403 * parameters.
5404 * 5375 *
5405 * 0 is returned if another key is found, < 0 if there are any errors 5376 * 0 is returned if another key is found, < 0 if there are any errors
5406 * and 1 is returned if there are no higher keys in the tree 5377 * and 1 is returned if there are no higher keys in the tree
@@ -5409,8 +5380,7 @@ out:
5409 * calling this function. 5380 * calling this function.
5410 */ 5381 */
5411int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 5382int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
5412 struct btrfs_key *key, int level, 5383 struct btrfs_key *key, int level, u64 min_trans)
5413 int cache_only, u64 min_trans)
5414{ 5384{
5415 int slot; 5385 int slot;
5416 struct extent_buffer *c; 5386 struct extent_buffer *c;
@@ -5461,22 +5431,8 @@ next:
5461 if (level == 0) 5431 if (level == 0)
5462 btrfs_item_key_to_cpu(c, key, slot); 5432 btrfs_item_key_to_cpu(c, key, slot);
5463 else { 5433 else {
5464 u64 blockptr = btrfs_node_blockptr(c, slot);
5465 u64 gen = btrfs_node_ptr_generation(c, slot); 5434 u64 gen = btrfs_node_ptr_generation(c, slot);
5466 5435
5467 if (cache_only) {
5468 struct extent_buffer *cur;
5469 cur = btrfs_find_tree_block(root, blockptr,
5470 btrfs_level_size(root, level - 1));
5471 if (!cur ||
5472 btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
5473 slot++;
5474 if (cur)
5475 free_extent_buffer(cur);
5476 goto next;
5477 }
5478 free_extent_buffer(cur);
5479 }
5480 if (gen < min_trans) { 5436 if (gen < min_trans) {
5481 slot++; 5437 slot++;
5482 goto next; 5438 goto next;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 547b7b05727f..1679051f4d39 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -31,10 +31,10 @@
31#include <trace/events/btrfs.h> 31#include <trace/events/btrfs.h>
32#include <asm/kmap_types.h> 32#include <asm/kmap_types.h>
33#include <linux/pagemap.h> 33#include <linux/pagemap.h>
34#include <linux/btrfs.h>
34#include "extent_io.h" 35#include "extent_io.h"
35#include "extent_map.h" 36#include "extent_map.h"
36#include "async-thread.h" 37#include "async-thread.h"
37#include "ioctl.h"
38 38
39struct btrfs_trans_handle; 39struct btrfs_trans_handle;
40struct btrfs_transaction; 40struct btrfs_transaction;
@@ -46,7 +46,7 @@ extern struct kmem_cache *btrfs_path_cachep;
46extern struct kmem_cache *btrfs_free_space_cachep; 46extern struct kmem_cache *btrfs_free_space_cachep;
47struct btrfs_ordered_sum; 47struct btrfs_ordered_sum;
48 48
49#define BTRFS_MAGIC "_BHRfS_M" 49#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
50 50
51#define BTRFS_MAX_MIRRORS 3 51#define BTRFS_MAX_MIRRORS 3
52 52
@@ -191,6 +191,8 @@ static int btrfs_csum_sizes[] = { 4, 0 };
191/* ioprio of readahead is set to idle */ 191/* ioprio of readahead is set to idle */
192#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0)) 192#define BTRFS_IOPRIO_READA (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0))
193 193
194#define BTRFS_DIRTY_METADATA_THRESH (32 * 1024 * 1024)
195
194/* 196/*
195 * The key defines the order in the tree, and so it also defines (optimal) 197 * The key defines the order in the tree, and so it also defines (optimal)
196 * block layout. 198 * block layout.
@@ -336,7 +338,9 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
336/* 338/*
337 * File system states 339 * File system states
338 */ 340 */
341#define BTRFS_FS_STATE_ERROR 0
339 342
343/* Super block flags */
340/* Errors detected */ 344/* Errors detected */
341#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2) 345#define BTRFS_SUPER_FLAG_ERROR (1ULL << 2)
342 346
@@ -953,7 +957,15 @@ struct btrfs_dev_replace_item {
953#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5) 957#define BTRFS_BLOCK_GROUP_DUP (1ULL << 5)
954#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6) 958#define BTRFS_BLOCK_GROUP_RAID10 (1ULL << 6)
955#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE 959#define BTRFS_BLOCK_GROUP_RESERVED BTRFS_AVAIL_ALLOC_BIT_SINGLE
956#define BTRFS_NR_RAID_TYPES 5 960
961enum btrfs_raid_types {
962 BTRFS_RAID_RAID10,
963 BTRFS_RAID_RAID1,
964 BTRFS_RAID_DUP,
965 BTRFS_RAID_RAID0,
966 BTRFS_RAID_SINGLE,
967 BTRFS_NR_RAID_TYPES
968};
957 969
958#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \ 970#define BTRFS_BLOCK_GROUP_TYPE_MASK (BTRFS_BLOCK_GROUP_DATA | \
959 BTRFS_BLOCK_GROUP_SYSTEM | \ 971 BTRFS_BLOCK_GROUP_SYSTEM | \
@@ -1225,6 +1237,11 @@ struct seq_list {
1225 u64 seq; 1237 u64 seq;
1226}; 1238};
1227 1239
1240enum btrfs_orphan_cleanup_state {
1241 ORPHAN_CLEANUP_STARTED = 1,
1242 ORPHAN_CLEANUP_DONE = 2,
1243};
1244
1228/* fs_info */ 1245/* fs_info */
1229struct reloc_control; 1246struct reloc_control;
1230struct btrfs_device; 1247struct btrfs_device;
@@ -1250,6 +1267,7 @@ struct btrfs_fs_info {
1250 1267
1251 /* block group cache stuff */ 1268 /* block group cache stuff */
1252 spinlock_t block_group_cache_lock; 1269 spinlock_t block_group_cache_lock;
1270 u64 first_logical_byte;
1253 struct rb_root block_group_cache_tree; 1271 struct rb_root block_group_cache_tree;
1254 1272
1255 /* keep track of unallocated space */ 1273 /* keep track of unallocated space */
@@ -1288,7 +1306,23 @@ struct btrfs_fs_info {
1288 u64 last_trans_log_full_commit; 1306 u64 last_trans_log_full_commit;
1289 unsigned long mount_opt; 1307 unsigned long mount_opt;
1290 unsigned long compress_type:4; 1308 unsigned long compress_type:4;
1309 /*
1310 * It is a suggestive number, the read side is safe even it gets a
1311 * wrong number because we will write out the data into a regular
1312 * extent. The write side(mount/remount) is under ->s_umount lock,
1313 * so it is also safe.
1314 */
1291 u64 max_inline; 1315 u64 max_inline;
1316 /*
1317 * Protected by ->chunk_mutex and sb->s_umount.
1318 *
1319 * The reason that we use two lock to protect it is because only
1320 * remount and mount operations can change it and these two operations
1321 * are under sb->s_umount, but the read side (chunk allocation) can not
1322 * acquire sb->s_umount or the deadlock would happen. So we use two
1323 * locks to protect it. On the write side, we must acquire two locks,
1324 * and on the read side, we just need acquire one of them.
1325 */
1292 u64 alloc_start; 1326 u64 alloc_start;
1293 struct btrfs_transaction *running_transaction; 1327 struct btrfs_transaction *running_transaction;
1294 wait_queue_head_t transaction_throttle; 1328 wait_queue_head_t transaction_throttle;
@@ -1365,6 +1399,7 @@ struct btrfs_fs_info {
1365 */ 1399 */
1366 struct list_head ordered_extents; 1400 struct list_head ordered_extents;
1367 1401
1402 spinlock_t delalloc_lock;
1368 /* 1403 /*
1369 * all of the inodes that have delalloc bytes. It is possible for 1404 * all of the inodes that have delalloc bytes. It is possible for
1370 * this list to be empty even when there is still dirty data=ordered 1405 * this list to be empty even when there is still dirty data=ordered
@@ -1373,13 +1408,6 @@ struct btrfs_fs_info {
1373 struct list_head delalloc_inodes; 1408 struct list_head delalloc_inodes;
1374 1409
1375 /* 1410 /*
1376 * special rename and truncate targets that must be on disk before
1377 * we're allowed to commit. This is basically the ext3 style
1378 * data=ordered list.
1379 */
1380 struct list_head ordered_operations;
1381
1382 /*
1383 * there is a pool of worker threads for checksumming during writes 1411 * there is a pool of worker threads for checksumming during writes
1384 * and a pool for checksumming after reads. This is because readers 1412 * and a pool for checksumming after reads. This is because readers
1385 * can run with FS locks held, and the writers may be waiting for 1413 * can run with FS locks held, and the writers may be waiting for
@@ -1423,10 +1451,12 @@ struct btrfs_fs_info {
1423 1451
1424 u64 total_pinned; 1452 u64 total_pinned;
1425 1453
1426 /* protected by the delalloc lock, used to keep from writing 1454 /* used to keep from writing metadata until there is a nice batch */
1427 * metadata until there is a nice batch 1455 struct percpu_counter dirty_metadata_bytes;
1428 */ 1456 struct percpu_counter delalloc_bytes;
1429 u64 dirty_metadata_bytes; 1457 s32 dirty_metadata_batch;
1458 s32 delalloc_batch;
1459
1430 struct list_head dirty_cowonly_roots; 1460 struct list_head dirty_cowonly_roots;
1431 1461
1432 struct btrfs_fs_devices *fs_devices; 1462 struct btrfs_fs_devices *fs_devices;
@@ -1442,9 +1472,6 @@ struct btrfs_fs_info {
1442 1472
1443 struct reloc_control *reloc_ctl; 1473 struct reloc_control *reloc_ctl;
1444 1474
1445 spinlock_t delalloc_lock;
1446 u64 delalloc_bytes;
1447
1448 /* data_alloc_cluster is only used in ssd mode */ 1475 /* data_alloc_cluster is only used in ssd mode */
1449 struct btrfs_free_cluster data_alloc_cluster; 1476 struct btrfs_free_cluster data_alloc_cluster;
1450 1477
@@ -1456,6 +1483,8 @@ struct btrfs_fs_info {
1456 struct rb_root defrag_inodes; 1483 struct rb_root defrag_inodes;
1457 atomic_t defrag_running; 1484 atomic_t defrag_running;
1458 1485
1486 /* Used to protect avail_{data, metadata, system}_alloc_bits */
1487 seqlock_t profiles_lock;
1459 /* 1488 /*
1460 * these three are in extended format (availability of single 1489 * these three are in extended format (availability of single
1461 * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other 1490 * chunks is denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other
@@ -1520,7 +1549,7 @@ struct btrfs_fs_info {
1520 u64 qgroup_seq; 1549 u64 qgroup_seq;
1521 1550
1522 /* filesystem state */ 1551 /* filesystem state */
1523 u64 fs_state; 1552 unsigned long fs_state;
1524 1553
1525 struct btrfs_delayed_root *delayed_root; 1554 struct btrfs_delayed_root *delayed_root;
1526 1555
@@ -1623,6 +1652,9 @@ struct btrfs_root {
1623 1652
1624 struct list_head root_list; 1653 struct list_head root_list;
1625 1654
1655 spinlock_t log_extents_lock[2];
1656 struct list_head logged_list[2];
1657
1626 spinlock_t orphan_lock; 1658 spinlock_t orphan_lock;
1627 atomic_t orphan_inodes; 1659 atomic_t orphan_inodes;
1628 struct btrfs_block_rsv *orphan_block_rsv; 1660 struct btrfs_block_rsv *orphan_block_rsv;
@@ -2936,8 +2968,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
2936 u64 num_bytes, u64 *refs, u64 *flags); 2968 u64 num_bytes, u64 *refs, u64 *flags);
2937int btrfs_pin_extent(struct btrfs_root *root, 2969int btrfs_pin_extent(struct btrfs_root *root,
2938 u64 bytenr, u64 num, int reserved); 2970 u64 bytenr, u64 num, int reserved);
2939int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, 2971int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
2940 struct btrfs_root *root,
2941 u64 bytenr, u64 num_bytes); 2972 u64 bytenr, u64 num_bytes);
2942int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2973int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2943 struct btrfs_root *root, 2974 struct btrfs_root *root,
@@ -3092,10 +3123,10 @@ struct extent_buffer *btrfs_root_node(struct btrfs_root *root);
3092struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root); 3123struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
3093int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path, 3124int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
3094 struct btrfs_key *key, int lowest_level, 3125 struct btrfs_key *key, int lowest_level,
3095 int cache_only, u64 min_trans); 3126 u64 min_trans);
3096int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, 3127int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
3097 struct btrfs_key *max_key, 3128 struct btrfs_key *max_key,
3098 struct btrfs_path *path, int cache_only, 3129 struct btrfs_path *path,
3099 u64 min_trans); 3130 u64 min_trans);
3100enum btrfs_compare_tree_result { 3131enum btrfs_compare_tree_result {
3101 BTRFS_COMPARE_TREE_NEW, 3132 BTRFS_COMPARE_TREE_NEW,
@@ -3148,7 +3179,7 @@ int btrfs_search_slot_for_read(struct btrfs_root *root,
3148 int find_higher, int return_any); 3179 int find_higher, int return_any);
3149int btrfs_realloc_node(struct btrfs_trans_handle *trans, 3180int btrfs_realloc_node(struct btrfs_trans_handle *trans,
3150 struct btrfs_root *root, struct extent_buffer *parent, 3181 struct btrfs_root *root, struct extent_buffer *parent,
3151 int start_slot, int cache_only, u64 *last_ret, 3182 int start_slot, u64 *last_ret,
3152 struct btrfs_key *progress); 3183 struct btrfs_key *progress);
3153void btrfs_release_path(struct btrfs_path *p); 3184void btrfs_release_path(struct btrfs_path *p);
3154struct btrfs_path *btrfs_alloc_path(void); 3185struct btrfs_path *btrfs_alloc_path(void);
@@ -3543,7 +3574,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
3543 3574
3544/* tree-defrag.c */ 3575/* tree-defrag.c */
3545int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 3576int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
3546 struct btrfs_root *root, int cache_only); 3577 struct btrfs_root *root);
3547 3578
3548/* sysfs.c */ 3579/* sysfs.c */
3549int btrfs_init_sysfs(void); 3580int btrfs_init_sysfs(void);
@@ -3620,11 +3651,14 @@ __printf(5, 6)
3620void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function, 3651void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
3621 unsigned int line, int errno, const char *fmt, ...); 3652 unsigned int line, int errno, const char *fmt, ...);
3622 3653
3654/*
3655 * If BTRFS_MOUNT_PANIC_ON_FATAL_ERROR is in mount_opt, __btrfs_panic
3656 * will panic(). Otherwise we BUG() here.
3657 */
3623#define btrfs_panic(fs_info, errno, fmt, args...) \ 3658#define btrfs_panic(fs_info, errno, fmt, args...) \
3624do { \ 3659do { \
3625 struct btrfs_fs_info *_i = (fs_info); \ 3660 __btrfs_panic(fs_info, __func__, __LINE__, errno, fmt, ##args); \
3626 __btrfs_panic(_i, __func__, __LINE__, errno, fmt, ##args); \ 3661 BUG(); \
3627 BUG_ON(!(_i->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR)); \
3628} while (0) 3662} while (0)
3629 3663
3630/* acl.c */ 3664/* acl.c */
@@ -3745,4 +3779,11 @@ static inline int is_fstree(u64 rootid)
3745 return 1; 3779 return 1;
3746 return 0; 3780 return 0;
3747} 3781}
3782
3783static inline int btrfs_defrag_cancelled(struct btrfs_fs_info *fs_info)
3784{
3785 return signal_pending(current);
3786}
3787
3788
3748#endif 3789#endif
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 34836036f01b..0b278b117cbe 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -875,7 +875,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
875 struct btrfs_delayed_item *delayed_item) 875 struct btrfs_delayed_item *delayed_item)
876{ 876{
877 struct extent_buffer *leaf; 877 struct extent_buffer *leaf;
878 struct btrfs_item *item;
879 char *ptr; 878 char *ptr;
880 int ret; 879 int ret;
881 880
@@ -886,7 +885,6 @@ static int btrfs_insert_delayed_item(struct btrfs_trans_handle *trans,
886 885
887 leaf = path->nodes[0]; 886 leaf = path->nodes[0];
888 887
889 item = btrfs_item_nr(leaf, path->slots[0]);
890 ptr = btrfs_item_ptr(leaf, path->slots[0], char); 888 ptr = btrfs_item_ptr(leaf, path->slots[0], char);
891 889
892 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr, 890 write_extent_buffer(leaf, delayed_item->data, (unsigned long)ptr,
@@ -1065,32 +1063,25 @@ static void btrfs_release_delayed_inode(struct btrfs_delayed_node *delayed_node)
1065 } 1063 }
1066} 1064}
1067 1065
1068static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans, 1066static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1069 struct btrfs_root *root, 1067 struct btrfs_root *root,
1070 struct btrfs_path *path, 1068 struct btrfs_path *path,
1071 struct btrfs_delayed_node *node) 1069 struct btrfs_delayed_node *node)
1072{ 1070{
1073 struct btrfs_key key; 1071 struct btrfs_key key;
1074 struct btrfs_inode_item *inode_item; 1072 struct btrfs_inode_item *inode_item;
1075 struct extent_buffer *leaf; 1073 struct extent_buffer *leaf;
1076 int ret; 1074 int ret;
1077 1075
1078 mutex_lock(&node->mutex);
1079 if (!node->inode_dirty) {
1080 mutex_unlock(&node->mutex);
1081 return 0;
1082 }
1083
1084 key.objectid = node->inode_id; 1076 key.objectid = node->inode_id;
1085 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 1077 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1086 key.offset = 0; 1078 key.offset = 0;
1079
1087 ret = btrfs_lookup_inode(trans, root, path, &key, 1); 1080 ret = btrfs_lookup_inode(trans, root, path, &key, 1);
1088 if (ret > 0) { 1081 if (ret > 0) {
1089 btrfs_release_path(path); 1082 btrfs_release_path(path);
1090 mutex_unlock(&node->mutex);
1091 return -ENOENT; 1083 return -ENOENT;
1092 } else if (ret < 0) { 1084 } else if (ret < 0) {
1093 mutex_unlock(&node->mutex);
1094 return ret; 1085 return ret;
1095 } 1086 }
1096 1087
@@ -1105,11 +1096,47 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1105 1096
1106 btrfs_delayed_inode_release_metadata(root, node); 1097 btrfs_delayed_inode_release_metadata(root, node);
1107 btrfs_release_delayed_inode(node); 1098 btrfs_release_delayed_inode(node);
1108 mutex_unlock(&node->mutex);
1109 1099
1110 return 0; 1100 return 0;
1111} 1101}
1112 1102
1103static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1104 struct btrfs_root *root,
1105 struct btrfs_path *path,
1106 struct btrfs_delayed_node *node)
1107{
1108 int ret;
1109
1110 mutex_lock(&node->mutex);
1111 if (!node->inode_dirty) {
1112 mutex_unlock(&node->mutex);
1113 return 0;
1114 }
1115
1116 ret = __btrfs_update_delayed_inode(trans, root, path, node);
1117 mutex_unlock(&node->mutex);
1118 return ret;
1119}
1120
1121static inline int
1122__btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1123 struct btrfs_path *path,
1124 struct btrfs_delayed_node *node)
1125{
1126 int ret;
1127
1128 ret = btrfs_insert_delayed_items(trans, path, node->root, node);
1129 if (ret)
1130 return ret;
1131
1132 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1133 if (ret)
1134 return ret;
1135
1136 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1137 return ret;
1138}
1139
1113/* 1140/*
1114 * Called when committing the transaction. 1141 * Called when committing the transaction.
1115 * Returns 0 on success. 1142 * Returns 0 on success.
@@ -1119,7 +1146,6 @@ static int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
1119static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans, 1146static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1120 struct btrfs_root *root, int nr) 1147 struct btrfs_root *root, int nr)
1121{ 1148{
1122 struct btrfs_root *curr_root = root;
1123 struct btrfs_delayed_root *delayed_root; 1149 struct btrfs_delayed_root *delayed_root;
1124 struct btrfs_delayed_node *curr_node, *prev_node; 1150 struct btrfs_delayed_node *curr_node, *prev_node;
1125 struct btrfs_path *path; 1151 struct btrfs_path *path;
@@ -1142,15 +1168,8 @@ static int __btrfs_run_delayed_items(struct btrfs_trans_handle *trans,
1142 1168
1143 curr_node = btrfs_first_delayed_node(delayed_root); 1169 curr_node = btrfs_first_delayed_node(delayed_root);
1144 while (curr_node && (!count || (count && nr--))) { 1170 while (curr_node && (!count || (count && nr--))) {
1145 curr_root = curr_node->root; 1171 ret = __btrfs_commit_inode_delayed_items(trans, path,
1146 ret = btrfs_insert_delayed_items(trans, path, curr_root, 1172 curr_node);
1147 curr_node);
1148 if (!ret)
1149 ret = btrfs_delete_delayed_items(trans, path,
1150 curr_root, curr_node);
1151 if (!ret)
1152 ret = btrfs_update_delayed_inode(trans, curr_root,
1153 path, curr_node);
1154 if (ret) { 1173 if (ret) {
1155 btrfs_release_delayed_node(curr_node); 1174 btrfs_release_delayed_node(curr_node);
1156 curr_node = NULL; 1175 curr_node = NULL;
@@ -1183,51 +1202,93 @@ int btrfs_run_delayed_items_nr(struct btrfs_trans_handle *trans,
1183 return __btrfs_run_delayed_items(trans, root, nr); 1202 return __btrfs_run_delayed_items(trans, root, nr);
1184} 1203}
1185 1204
1186static int __btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, 1205int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
1187 struct btrfs_delayed_node *node) 1206 struct inode *inode)
1188{ 1207{
1208 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1189 struct btrfs_path *path; 1209 struct btrfs_path *path;
1190 struct btrfs_block_rsv *block_rsv; 1210 struct btrfs_block_rsv *block_rsv;
1191 int ret; 1211 int ret;
1192 1212
1213 if (!delayed_node)
1214 return 0;
1215
1216 mutex_lock(&delayed_node->mutex);
1217 if (!delayed_node->count) {
1218 mutex_unlock(&delayed_node->mutex);
1219 btrfs_release_delayed_node(delayed_node);
1220 return 0;
1221 }
1222 mutex_unlock(&delayed_node->mutex);
1223
1193 path = btrfs_alloc_path(); 1224 path = btrfs_alloc_path();
1194 if (!path) 1225 if (!path)
1195 return -ENOMEM; 1226 return -ENOMEM;
1196 path->leave_spinning = 1; 1227 path->leave_spinning = 1;
1197 1228
1198 block_rsv = trans->block_rsv; 1229 block_rsv = trans->block_rsv;
1199 trans->block_rsv = &node->root->fs_info->delayed_block_rsv; 1230 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1200 1231
1201 ret = btrfs_insert_delayed_items(trans, path, node->root, node); 1232 ret = __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1202 if (!ret)
1203 ret = btrfs_delete_delayed_items(trans, path, node->root, node);
1204 if (!ret)
1205 ret = btrfs_update_delayed_inode(trans, node->root, path, node);
1206 btrfs_free_path(path);
1207 1233
1234 btrfs_release_delayed_node(delayed_node);
1235 btrfs_free_path(path);
1208 trans->block_rsv = block_rsv; 1236 trans->block_rsv = block_rsv;
1237
1209 return ret; 1238 return ret;
1210} 1239}
1211 1240
1212int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans, 1241int btrfs_commit_inode_delayed_inode(struct inode *inode)
1213 struct inode *inode)
1214{ 1242{
1243 struct btrfs_trans_handle *trans;
1215 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode); 1244 struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
1245 struct btrfs_path *path;
1246 struct btrfs_block_rsv *block_rsv;
1216 int ret; 1247 int ret;
1217 1248
1218 if (!delayed_node) 1249 if (!delayed_node)
1219 return 0; 1250 return 0;
1220 1251
1221 mutex_lock(&delayed_node->mutex); 1252 mutex_lock(&delayed_node->mutex);
1222 if (!delayed_node->count) { 1253 if (!delayed_node->inode_dirty) {
1223 mutex_unlock(&delayed_node->mutex); 1254 mutex_unlock(&delayed_node->mutex);
1224 btrfs_release_delayed_node(delayed_node); 1255 btrfs_release_delayed_node(delayed_node);
1225 return 0; 1256 return 0;
1226 } 1257 }
1227 mutex_unlock(&delayed_node->mutex); 1258 mutex_unlock(&delayed_node->mutex);
1228 1259
1229 ret = __btrfs_commit_inode_delayed_items(trans, delayed_node); 1260 trans = btrfs_join_transaction(delayed_node->root);
1261 if (IS_ERR(trans)) {
1262 ret = PTR_ERR(trans);
1263 goto out;
1264 }
1265
1266 path = btrfs_alloc_path();
1267 if (!path) {
1268 ret = -ENOMEM;
1269 goto trans_out;
1270 }
1271 path->leave_spinning = 1;
1272
1273 block_rsv = trans->block_rsv;
1274 trans->block_rsv = &delayed_node->root->fs_info->delayed_block_rsv;
1275
1276 mutex_lock(&delayed_node->mutex);
1277 if (delayed_node->inode_dirty)
1278 ret = __btrfs_update_delayed_inode(trans, delayed_node->root,
1279 path, delayed_node);
1280 else
1281 ret = 0;
1282 mutex_unlock(&delayed_node->mutex);
1283
1284 btrfs_free_path(path);
1285 trans->block_rsv = block_rsv;
1286trans_out:
1287 btrfs_end_transaction(trans, delayed_node->root);
1288 btrfs_btree_balance_dirty(delayed_node->root);
1289out:
1230 btrfs_release_delayed_node(delayed_node); 1290 btrfs_release_delayed_node(delayed_node);
1291
1231 return ret; 1292 return ret;
1232} 1293}
1233 1294
@@ -1258,7 +1319,6 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1258 struct btrfs_root *root; 1319 struct btrfs_root *root;
1259 struct btrfs_block_rsv *block_rsv; 1320 struct btrfs_block_rsv *block_rsv;
1260 int need_requeue = 0; 1321 int need_requeue = 0;
1261 int ret;
1262 1322
1263 async_node = container_of(work, struct btrfs_async_delayed_node, work); 1323 async_node = container_of(work, struct btrfs_async_delayed_node, work);
1264 1324
@@ -1277,14 +1337,7 @@ static void btrfs_async_run_delayed_node_done(struct btrfs_work *work)
1277 block_rsv = trans->block_rsv; 1337 block_rsv = trans->block_rsv;
1278 trans->block_rsv = &root->fs_info->delayed_block_rsv; 1338 trans->block_rsv = &root->fs_info->delayed_block_rsv;
1279 1339
1280 ret = btrfs_insert_delayed_items(trans, path, root, delayed_node); 1340 __btrfs_commit_inode_delayed_items(trans, path, delayed_node);
1281 if (!ret)
1282 ret = btrfs_delete_delayed_items(trans, path, root,
1283 delayed_node);
1284
1285 if (!ret)
1286 btrfs_update_delayed_inode(trans, root, path, delayed_node);
1287
1288 /* 1341 /*
1289 * Maybe new delayed items have been inserted, so we need requeue 1342 * Maybe new delayed items have been inserted, so we need requeue
1290 * the work. Besides that, we must dequeue the empty delayed nodes 1343 * the work. Besides that, we must dequeue the empty delayed nodes
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 4f808e1baeed..78b6ad0fc669 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -117,6 +117,7 @@ int btrfs_commit_inode_delayed_items(struct btrfs_trans_handle *trans,
117/* Used for evicting the inode. */ 117/* Used for evicting the inode. */
118void btrfs_remove_delayed_node(struct inode *inode); 118void btrfs_remove_delayed_node(struct inode *inode);
119void btrfs_kill_delayed_inode_items(struct inode *inode); 119void btrfs_kill_delayed_inode_items(struct inode *inode);
120int btrfs_commit_inode_delayed_inode(struct inode *inode);
120 121
121 122
122int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans, 123int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index ae9411773397..b7a0641ead77 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -23,6 +23,10 @@
23#include "delayed-ref.h" 23#include "delayed-ref.h"
24#include "transaction.h" 24#include "transaction.h"
25 25
26struct kmem_cache *btrfs_delayed_ref_head_cachep;
27struct kmem_cache *btrfs_delayed_tree_ref_cachep;
28struct kmem_cache *btrfs_delayed_data_ref_cachep;
29struct kmem_cache *btrfs_delayed_extent_op_cachep;
26/* 30/*
27 * delayed back reference update tracking. For subvolume trees 31 * delayed back reference update tracking. For subvolume trees
28 * we queue up extent allocations and backref maintenance for 32 * we queue up extent allocations and backref maintenance for
@@ -422,6 +426,14 @@ again:
422 return 1; 426 return 1;
423} 427}
424 428
429void btrfs_release_ref_cluster(struct list_head *cluster)
430{
431 struct list_head *pos, *q;
432
433 list_for_each_safe(pos, q, cluster)
434 list_del_init(pos);
435}
436
425/* 437/*
426 * helper function to update an extent delayed ref in the 438 * helper function to update an extent delayed ref in the
427 * rbtree. existing and update must both have the same 439 * rbtree. existing and update must both have the same
@@ -511,7 +523,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing,
511 ref->extent_op->flags_to_set; 523 ref->extent_op->flags_to_set;
512 existing_ref->extent_op->update_flags = 1; 524 existing_ref->extent_op->update_flags = 1;
513 } 525 }
514 kfree(ref->extent_op); 526 btrfs_free_delayed_extent_op(ref->extent_op);
515 } 527 }
516 } 528 }
517 /* 529 /*
@@ -592,7 +604,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
592 * we've updated the existing ref, free the newly 604 * we've updated the existing ref, free the newly
593 * allocated ref 605 * allocated ref
594 */ 606 */
595 kfree(head_ref); 607 kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
596 } else { 608 } else {
597 delayed_refs->num_heads++; 609 delayed_refs->num_heads++;
598 delayed_refs->num_heads_ready++; 610 delayed_refs->num_heads_ready++;
@@ -653,7 +665,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
653 * we've updated the existing ref, free the newly 665 * we've updated the existing ref, free the newly
654 * allocated ref 666 * allocated ref
655 */ 667 */
656 kfree(full_ref); 668 kmem_cache_free(btrfs_delayed_tree_ref_cachep, full_ref);
657 } else { 669 } else {
658 delayed_refs->num_entries++; 670 delayed_refs->num_entries++;
659 trans->delayed_ref_updates++; 671 trans->delayed_ref_updates++;
@@ -714,7 +726,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
714 * we've updated the existing ref, free the newly 726 * we've updated the existing ref, free the newly
715 * allocated ref 727 * allocated ref
716 */ 728 */
717 kfree(full_ref); 729 kmem_cache_free(btrfs_delayed_data_ref_cachep, full_ref);
718 } else { 730 } else {
719 delayed_refs->num_entries++; 731 delayed_refs->num_entries++;
720 trans->delayed_ref_updates++; 732 trans->delayed_ref_updates++;
@@ -738,13 +750,13 @@ int btrfs_add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
738 struct btrfs_delayed_ref_root *delayed_refs; 750 struct btrfs_delayed_ref_root *delayed_refs;
739 751
740 BUG_ON(extent_op && extent_op->is_data); 752 BUG_ON(extent_op && extent_op->is_data);
741 ref = kmalloc(sizeof(*ref), GFP_NOFS); 753 ref = kmem_cache_alloc(btrfs_delayed_tree_ref_cachep, GFP_NOFS);
742 if (!ref) 754 if (!ref)
743 return -ENOMEM; 755 return -ENOMEM;
744 756
745 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); 757 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
746 if (!head_ref) { 758 if (!head_ref) {
747 kfree(ref); 759 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
748 return -ENOMEM; 760 return -ENOMEM;
749 } 761 }
750 762
@@ -786,13 +798,13 @@ int btrfs_add_delayed_data_ref(struct btrfs_fs_info *fs_info,
786 struct btrfs_delayed_ref_root *delayed_refs; 798 struct btrfs_delayed_ref_root *delayed_refs;
787 799
788 BUG_ON(extent_op && !extent_op->is_data); 800 BUG_ON(extent_op && !extent_op->is_data);
789 ref = kmalloc(sizeof(*ref), GFP_NOFS); 801 ref = kmem_cache_alloc(btrfs_delayed_data_ref_cachep, GFP_NOFS);
790 if (!ref) 802 if (!ref)
791 return -ENOMEM; 803 return -ENOMEM;
792 804
793 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); 805 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
794 if (!head_ref) { 806 if (!head_ref) {
795 kfree(ref); 807 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
796 return -ENOMEM; 808 return -ENOMEM;
797 } 809 }
798 810
@@ -826,7 +838,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_fs_info *fs_info,
826 struct btrfs_delayed_ref_head *head_ref; 838 struct btrfs_delayed_ref_head *head_ref;
827 struct btrfs_delayed_ref_root *delayed_refs; 839 struct btrfs_delayed_ref_root *delayed_refs;
828 840
829 head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); 841 head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
830 if (!head_ref) 842 if (!head_ref)
831 return -ENOMEM; 843 return -ENOMEM;
832 844
@@ -860,3 +872,51 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
860 return btrfs_delayed_node_to_head(ref); 872 return btrfs_delayed_node_to_head(ref);
861 return NULL; 873 return NULL;
862} 874}
875
876void btrfs_delayed_ref_exit(void)
877{
878 if (btrfs_delayed_ref_head_cachep)
879 kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
880 if (btrfs_delayed_tree_ref_cachep)
881 kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
882 if (btrfs_delayed_data_ref_cachep)
883 kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
884 if (btrfs_delayed_extent_op_cachep)
885 kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
886}
887
888int btrfs_delayed_ref_init(void)
889{
890 btrfs_delayed_ref_head_cachep = kmem_cache_create(
891 "btrfs_delayed_ref_head",
892 sizeof(struct btrfs_delayed_ref_head), 0,
893 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
894 if (!btrfs_delayed_ref_head_cachep)
895 goto fail;
896
897 btrfs_delayed_tree_ref_cachep = kmem_cache_create(
898 "btrfs_delayed_tree_ref",
899 sizeof(struct btrfs_delayed_tree_ref), 0,
900 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
901 if (!btrfs_delayed_tree_ref_cachep)
902 goto fail;
903
904 btrfs_delayed_data_ref_cachep = kmem_cache_create(
905 "btrfs_delayed_data_ref",
906 sizeof(struct btrfs_delayed_data_ref), 0,
907 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
908 if (!btrfs_delayed_data_ref_cachep)
909 goto fail;
910
911 btrfs_delayed_extent_op_cachep = kmem_cache_create(
912 "btrfs_delayed_extent_op",
913 sizeof(struct btrfs_delayed_extent_op), 0,
914 SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL);
915 if (!btrfs_delayed_extent_op_cachep)
916 goto fail;
917
918 return 0;
919fail:
920 btrfs_delayed_ref_exit();
921 return -ENOMEM;
922}
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index c9d703693df0..7939149f8f27 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -141,12 +141,47 @@ struct btrfs_delayed_ref_root {
141 u64 run_delayed_start; 141 u64 run_delayed_start;
142}; 142};
143 143
144extern struct kmem_cache *btrfs_delayed_ref_head_cachep;
145extern struct kmem_cache *btrfs_delayed_tree_ref_cachep;
146extern struct kmem_cache *btrfs_delayed_data_ref_cachep;
147extern struct kmem_cache *btrfs_delayed_extent_op_cachep;
148
149int btrfs_delayed_ref_init(void);
150void btrfs_delayed_ref_exit(void);
151
152static inline struct btrfs_delayed_extent_op *
153btrfs_alloc_delayed_extent_op(void)
154{
155 return kmem_cache_alloc(btrfs_delayed_extent_op_cachep, GFP_NOFS);
156}
157
158static inline void
159btrfs_free_delayed_extent_op(struct btrfs_delayed_extent_op *op)
160{
161 if (op)
162 kmem_cache_free(btrfs_delayed_extent_op_cachep, op);
163}
164
144static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) 165static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
145{ 166{
146 WARN_ON(atomic_read(&ref->refs) == 0); 167 WARN_ON(atomic_read(&ref->refs) == 0);
147 if (atomic_dec_and_test(&ref->refs)) { 168 if (atomic_dec_and_test(&ref->refs)) {
148 WARN_ON(ref->in_tree); 169 WARN_ON(ref->in_tree);
149 kfree(ref); 170 switch (ref->type) {
171 case BTRFS_TREE_BLOCK_REF_KEY:
172 case BTRFS_SHARED_BLOCK_REF_KEY:
173 kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
174 break;
175 case BTRFS_EXTENT_DATA_REF_KEY:
176 case BTRFS_SHARED_DATA_REF_KEY:
177 kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);
178 break;
179 case 0:
180 kmem_cache_free(btrfs_delayed_ref_head_cachep, ref);
181 break;
182 default:
183 BUG();
184 }
150 } 185 }
151} 186}
152 187
@@ -176,8 +211,14 @@ struct btrfs_delayed_ref_head *
176btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); 211btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr);
177int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, 212int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
178 struct btrfs_delayed_ref_head *head); 213 struct btrfs_delayed_ref_head *head);
214static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
215{
216 mutex_unlock(&head->mutex);
217}
218
179int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, 219int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans,
180 struct list_head *cluster, u64 search_start); 220 struct list_head *cluster, u64 search_start);
221void btrfs_release_ref_cluster(struct list_head *cluster);
181 222
182int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, 223int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info,
183 struct btrfs_delayed_ref_root *delayed_refs, 224 struct btrfs_delayed_ref_root *delayed_refs,
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index 66dbc8dbddf7..7ba7b3900cb8 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -465,7 +465,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
465 * flush all outstanding I/O and inode extent mappings before the 465 * flush all outstanding I/O and inode extent mappings before the
466 * copy operation is declared as being finished 466 * copy operation is declared as being finished
467 */ 467 */
468 btrfs_start_delalloc_inodes(root, 0); 468 ret = btrfs_start_delalloc_inodes(root, 0);
469 if (ret) {
470 mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
471 return ret;
472 }
469 btrfs_wait_ordered_extents(root, 0); 473 btrfs_wait_ordered_extents(root, 0);
470 474
471 trans = btrfs_start_transaction(root, 0); 475 trans = btrfs_start_transaction(root, 0);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a8f652dc940b..779b401cd952 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -56,7 +56,8 @@ static void end_workqueue_fn(struct btrfs_work *work);
56static void free_fs_root(struct btrfs_root *root); 56static void free_fs_root(struct btrfs_root *root);
57static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, 57static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
58 int read_only); 58 int read_only);
59static void btrfs_destroy_ordered_operations(struct btrfs_root *root); 59static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
60 struct btrfs_root *root);
60static void btrfs_destroy_ordered_extents(struct btrfs_root *root); 61static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
61static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, 62static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
62 struct btrfs_root *root); 63 struct btrfs_root *root);
@@ -420,7 +421,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
420static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) 421static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
421{ 422{
422 struct extent_io_tree *tree; 423 struct extent_io_tree *tree;
423 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 424 u64 start = page_offset(page);
424 u64 found_start; 425 u64 found_start;
425 struct extent_buffer *eb; 426 struct extent_buffer *eb;
426 427
@@ -946,18 +947,20 @@ static int btree_writepages(struct address_space *mapping,
946 struct writeback_control *wbc) 947 struct writeback_control *wbc)
947{ 948{
948 struct extent_io_tree *tree; 949 struct extent_io_tree *tree;
950 struct btrfs_fs_info *fs_info;
951 int ret;
952
949 tree = &BTRFS_I(mapping->host)->io_tree; 953 tree = &BTRFS_I(mapping->host)->io_tree;
950 if (wbc->sync_mode == WB_SYNC_NONE) { 954 if (wbc->sync_mode == WB_SYNC_NONE) {
951 struct btrfs_root *root = BTRFS_I(mapping->host)->root;
952 u64 num_dirty;
953 unsigned long thresh = 32 * 1024 * 1024;
954 955
955 if (wbc->for_kupdate) 956 if (wbc->for_kupdate)
956 return 0; 957 return 0;
957 958
959 fs_info = BTRFS_I(mapping->host)->root->fs_info;
958 /* this is a bit racy, but that's ok */ 960 /* this is a bit racy, but that's ok */
959 num_dirty = root->fs_info->dirty_metadata_bytes; 961 ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
960 if (num_dirty < thresh) 962 BTRFS_DIRTY_METADATA_THRESH);
963 if (ret < 0)
961 return 0; 964 return 0;
962 } 965 }
963 return btree_write_cache_pages(mapping, wbc); 966 return btree_write_cache_pages(mapping, wbc);
@@ -1125,24 +1128,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
1125void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1128void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
1126 struct extent_buffer *buf) 1129 struct extent_buffer *buf)
1127{ 1130{
1131 struct btrfs_fs_info *fs_info = root->fs_info;
1132
1128 if (btrfs_header_generation(buf) == 1133 if (btrfs_header_generation(buf) ==
1129 root->fs_info->running_transaction->transid) { 1134 fs_info->running_transaction->transid) {
1130 btrfs_assert_tree_locked(buf); 1135 btrfs_assert_tree_locked(buf);
1131 1136
1132 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { 1137 if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
1133 spin_lock(&root->fs_info->delalloc_lock); 1138 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
1134 if (root->fs_info->dirty_metadata_bytes >= buf->len) 1139 -buf->len,
1135 root->fs_info->dirty_metadata_bytes -= buf->len; 1140 fs_info->dirty_metadata_batch);
1136 else {
1137 spin_unlock(&root->fs_info->delalloc_lock);
1138 btrfs_panic(root->fs_info, -EOVERFLOW,
1139 "Can't clear %lu bytes from "
1140 " dirty_mdatadata_bytes (%llu)",
1141 buf->len,
1142 root->fs_info->dirty_metadata_bytes);
1143 }
1144 spin_unlock(&root->fs_info->delalloc_lock);
1145
1146 /* ugh, clear_extent_buffer_dirty needs to lock the page */ 1141 /* ugh, clear_extent_buffer_dirty needs to lock the page */
1147 btrfs_set_lock_blocking(buf); 1142 btrfs_set_lock_blocking(buf);
1148 clear_extent_buffer_dirty(buf); 1143 clear_extent_buffer_dirty(buf);
@@ -1178,9 +1173,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
1178 1173
1179 INIT_LIST_HEAD(&root->dirty_list); 1174 INIT_LIST_HEAD(&root->dirty_list);
1180 INIT_LIST_HEAD(&root->root_list); 1175 INIT_LIST_HEAD(&root->root_list);
1176 INIT_LIST_HEAD(&root->logged_list[0]);
1177 INIT_LIST_HEAD(&root->logged_list[1]);
1181 spin_lock_init(&root->orphan_lock); 1178 spin_lock_init(&root->orphan_lock);
1182 spin_lock_init(&root->inode_lock); 1179 spin_lock_init(&root->inode_lock);
1183 spin_lock_init(&root->accounting_lock); 1180 spin_lock_init(&root->accounting_lock);
1181 spin_lock_init(&root->log_extents_lock[0]);
1182 spin_lock_init(&root->log_extents_lock[1]);
1184 mutex_init(&root->objectid_mutex); 1183 mutex_init(&root->objectid_mutex);
1185 mutex_init(&root->log_mutex); 1184 mutex_init(&root->log_mutex);
1186 init_waitqueue_head(&root->log_writer_wait); 1185 init_waitqueue_head(&root->log_writer_wait);
@@ -2004,10 +2003,24 @@ int open_ctree(struct super_block *sb,
2004 goto fail_srcu; 2003 goto fail_srcu;
2005 } 2004 }
2006 2005
2006 ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
2007 if (ret) {
2008 err = ret;
2009 goto fail_bdi;
2010 }
2011 fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
2012 (1 + ilog2(nr_cpu_ids));
2013
2014 ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
2015 if (ret) {
2016 err = ret;
2017 goto fail_dirty_metadata_bytes;
2018 }
2019
2007 fs_info->btree_inode = new_inode(sb); 2020 fs_info->btree_inode = new_inode(sb);
2008 if (!fs_info->btree_inode) { 2021 if (!fs_info->btree_inode) {
2009 err = -ENOMEM; 2022 err = -ENOMEM;
2010 goto fail_bdi; 2023 goto fail_delalloc_bytes;
2011 } 2024 }
2012 2025
2013 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); 2026 mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2017,7 +2030,6 @@ int open_ctree(struct super_block *sb,
2017 INIT_LIST_HEAD(&fs_info->dead_roots); 2030 INIT_LIST_HEAD(&fs_info->dead_roots);
2018 INIT_LIST_HEAD(&fs_info->delayed_iputs); 2031 INIT_LIST_HEAD(&fs_info->delayed_iputs);
2019 INIT_LIST_HEAD(&fs_info->delalloc_inodes); 2032 INIT_LIST_HEAD(&fs_info->delalloc_inodes);
2020 INIT_LIST_HEAD(&fs_info->ordered_operations);
2021 INIT_LIST_HEAD(&fs_info->caching_block_groups); 2033 INIT_LIST_HEAD(&fs_info->caching_block_groups);
2022 spin_lock_init(&fs_info->delalloc_lock); 2034 spin_lock_init(&fs_info->delalloc_lock);
2023 spin_lock_init(&fs_info->trans_lock); 2035 spin_lock_init(&fs_info->trans_lock);
@@ -2028,6 +2040,7 @@ int open_ctree(struct super_block *sb,
2028 spin_lock_init(&fs_info->tree_mod_seq_lock); 2040 spin_lock_init(&fs_info->tree_mod_seq_lock);
2029 rwlock_init(&fs_info->tree_mod_log_lock); 2041 rwlock_init(&fs_info->tree_mod_log_lock);
2030 mutex_init(&fs_info->reloc_mutex); 2042 mutex_init(&fs_info->reloc_mutex);
2043 seqlock_init(&fs_info->profiles_lock);
2031 2044
2032 init_completion(&fs_info->kobj_unregister); 2045 init_completion(&fs_info->kobj_unregister);
2033 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); 2046 INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2126,6 +2139,7 @@ int open_ctree(struct super_block *sb,
2126 2139
2127 spin_lock_init(&fs_info->block_group_cache_lock); 2140 spin_lock_init(&fs_info->block_group_cache_lock);
2128 fs_info->block_group_cache_tree = RB_ROOT; 2141 fs_info->block_group_cache_tree = RB_ROOT;
2142 fs_info->first_logical_byte = (u64)-1;
2129 2143
2130 extent_io_tree_init(&fs_info->freed_extents[0], 2144 extent_io_tree_init(&fs_info->freed_extents[0],
2131 fs_info->btree_inode->i_mapping); 2145 fs_info->btree_inode->i_mapping);
@@ -2187,7 +2201,8 @@ int open_ctree(struct super_block *sb,
2187 goto fail_alloc; 2201 goto fail_alloc;
2188 2202
2189 /* check FS state, whether FS is broken. */ 2203 /* check FS state, whether FS is broken. */
2190 fs_info->fs_state |= btrfs_super_flags(disk_super); 2204 if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
2205 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
2191 2206
2192 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); 2207 ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
2193 if (ret) { 2208 if (ret) {
@@ -2261,6 +2276,8 @@ int open_ctree(struct super_block *sb,
2261 leafsize = btrfs_super_leafsize(disk_super); 2276 leafsize = btrfs_super_leafsize(disk_super);
2262 sectorsize = btrfs_super_sectorsize(disk_super); 2277 sectorsize = btrfs_super_sectorsize(disk_super);
2263 stripesize = btrfs_super_stripesize(disk_super); 2278 stripesize = btrfs_super_stripesize(disk_super);
2279 fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
2280 fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
2264 2281
2265 /* 2282 /*
2266 * mixed block groups end up with duplicate but slightly offset 2283 * mixed block groups end up with duplicate but slightly offset
@@ -2390,8 +2407,7 @@ int open_ctree(struct super_block *sb,
2390 sb->s_blocksize = sectorsize; 2407 sb->s_blocksize = sectorsize;
2391 sb->s_blocksize_bits = blksize_bits(sectorsize); 2408 sb->s_blocksize_bits = blksize_bits(sectorsize);
2392 2409
2393 if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, 2410 if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) {
2394 sizeof(disk_super->magic))) {
2395 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id); 2411 printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
2396 goto fail_sb_buffer; 2412 goto fail_sb_buffer;
2397 } 2413 }
@@ -2694,13 +2710,13 @@ fail_cleaner:
2694 * kthreads 2710 * kthreads
2695 */ 2711 */
2696 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2712 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2697 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2698 2713
2699fail_block_groups: 2714fail_block_groups:
2700 btrfs_free_block_groups(fs_info); 2715 btrfs_free_block_groups(fs_info);
2701 2716
2702fail_tree_roots: 2717fail_tree_roots:
2703 free_root_pointers(fs_info, 1); 2718 free_root_pointers(fs_info, 1);
2719 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2704 2720
2705fail_sb_buffer: 2721fail_sb_buffer:
2706 btrfs_stop_workers(&fs_info->generic_worker); 2722 btrfs_stop_workers(&fs_info->generic_worker);
@@ -2721,8 +2737,11 @@ fail_alloc:
2721fail_iput: 2737fail_iput:
2722 btrfs_mapping_tree_free(&fs_info->mapping_tree); 2738 btrfs_mapping_tree_free(&fs_info->mapping_tree);
2723 2739
2724 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2725 iput(fs_info->btree_inode); 2740 iput(fs_info->btree_inode);
2741fail_delalloc_bytes:
2742 percpu_counter_destroy(&fs_info->delalloc_bytes);
2743fail_dirty_metadata_bytes:
2744 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
2726fail_bdi: 2745fail_bdi:
2727 bdi_destroy(&fs_info->bdi); 2746 bdi_destroy(&fs_info->bdi);
2728fail_srcu: 2747fail_srcu:
@@ -2795,8 +2814,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
2795 2814
2796 super = (struct btrfs_super_block *)bh->b_data; 2815 super = (struct btrfs_super_block *)bh->b_data;
2797 if (btrfs_super_bytenr(super) != bytenr || 2816 if (btrfs_super_bytenr(super) != bytenr ||
2798 strncmp((char *)(&super->magic), BTRFS_MAGIC, 2817 super->magic != cpu_to_le64(BTRFS_MAGIC)) {
2799 sizeof(super->magic))) {
2800 brelse(bh); 2818 brelse(bh);
2801 continue; 2819 continue;
2802 } 2820 }
@@ -3339,7 +3357,7 @@ int close_ctree(struct btrfs_root *root)
3339 printk(KERN_ERR "btrfs: commit super ret %d\n", ret); 3357 printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
3340 } 3358 }
3341 3359
3342 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 3360 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
3343 btrfs_error_commit_super(root); 3361 btrfs_error_commit_super(root);
3344 3362
3345 btrfs_put_block_group_cache(fs_info); 3363 btrfs_put_block_group_cache(fs_info);
@@ -3352,9 +3370,9 @@ int close_ctree(struct btrfs_root *root)
3352 3370
3353 btrfs_free_qgroup_config(root->fs_info); 3371 btrfs_free_qgroup_config(root->fs_info);
3354 3372
3355 if (fs_info->delalloc_bytes) { 3373 if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
3356 printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n", 3374 printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
3357 (unsigned long long)fs_info->delalloc_bytes); 3375 percpu_counter_sum(&fs_info->delalloc_bytes));
3358 } 3376 }
3359 3377
3360 free_extent_buffer(fs_info->extent_root->node); 3378 free_extent_buffer(fs_info->extent_root->node);
@@ -3401,6 +3419,8 @@ int close_ctree(struct btrfs_root *root)
3401 btrfs_close_devices(fs_info->fs_devices); 3419 btrfs_close_devices(fs_info->fs_devices);
3402 btrfs_mapping_tree_free(&fs_info->mapping_tree); 3420 btrfs_mapping_tree_free(&fs_info->mapping_tree);
3403 3421
3422 percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
3423 percpu_counter_destroy(&fs_info->delalloc_bytes);
3404 bdi_destroy(&fs_info->bdi); 3424 bdi_destroy(&fs_info->bdi);
3405 cleanup_srcu_struct(&fs_info->subvol_srcu); 3425 cleanup_srcu_struct(&fs_info->subvol_srcu);
3406 3426
@@ -3443,11 +3463,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
3443 (unsigned long long)transid, 3463 (unsigned long long)transid,
3444 (unsigned long long)root->fs_info->generation); 3464 (unsigned long long)root->fs_info->generation);
3445 was_dirty = set_extent_buffer_dirty(buf); 3465 was_dirty = set_extent_buffer_dirty(buf);
3446 if (!was_dirty) { 3466 if (!was_dirty)
3447 spin_lock(&root->fs_info->delalloc_lock); 3467 __percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
3448 root->fs_info->dirty_metadata_bytes += buf->len; 3468 buf->len,
3449 spin_unlock(&root->fs_info->delalloc_lock); 3469 root->fs_info->dirty_metadata_batch);
3450 }
3451} 3470}
3452 3471
3453static void __btrfs_btree_balance_dirty(struct btrfs_root *root, 3472static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
@@ -3457,8 +3476,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
3457 * looks as though older kernels can get into trouble with 3476 * looks as though older kernels can get into trouble with
3458 * this code, they end up stuck in balance_dirty_pages forever 3477 * this code, they end up stuck in balance_dirty_pages forever
3459 */ 3478 */
3460 u64 num_dirty; 3479 int ret;
3461 unsigned long thresh = 32 * 1024 * 1024;
3462 3480
3463 if (current->flags & PF_MEMALLOC) 3481 if (current->flags & PF_MEMALLOC)
3464 return; 3482 return;
@@ -3466,9 +3484,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
3466 if (flush_delayed) 3484 if (flush_delayed)
3467 btrfs_balance_delayed_items(root); 3485 btrfs_balance_delayed_items(root);
3468 3486
3469 num_dirty = root->fs_info->dirty_metadata_bytes; 3487 ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
3470 3488 BTRFS_DIRTY_METADATA_THRESH);
3471 if (num_dirty > thresh) { 3489 if (ret > 0) {
3472 balance_dirty_pages_ratelimited( 3490 balance_dirty_pages_ratelimited(
3473 root->fs_info->btree_inode->i_mapping); 3491 root->fs_info->btree_inode->i_mapping);
3474 } 3492 }
@@ -3518,7 +3536,8 @@ void btrfs_error_commit_super(struct btrfs_root *root)
3518 btrfs_cleanup_transaction(root); 3536 btrfs_cleanup_transaction(root);
3519} 3537}
3520 3538
3521static void btrfs_destroy_ordered_operations(struct btrfs_root *root) 3539static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
3540 struct btrfs_root *root)
3522{ 3541{
3523 struct btrfs_inode *btrfs_inode; 3542 struct btrfs_inode *btrfs_inode;
3524 struct list_head splice; 3543 struct list_head splice;
@@ -3528,7 +3547,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
3528 mutex_lock(&root->fs_info->ordered_operations_mutex); 3547 mutex_lock(&root->fs_info->ordered_operations_mutex);
3529 spin_lock(&root->fs_info->ordered_extent_lock); 3548 spin_lock(&root->fs_info->ordered_extent_lock);
3530 3549
3531 list_splice_init(&root->fs_info->ordered_operations, &splice); 3550 list_splice_init(&t->ordered_operations, &splice);
3532 while (!list_empty(&splice)) { 3551 while (!list_empty(&splice)) {
3533 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 3552 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
3534 ordered_operations); 3553 ordered_operations);
@@ -3544,35 +3563,16 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
3544 3563
3545static void btrfs_destroy_ordered_extents(struct btrfs_root *root) 3564static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
3546{ 3565{
3547 struct list_head splice;
3548 struct btrfs_ordered_extent *ordered; 3566 struct btrfs_ordered_extent *ordered;
3549 struct inode *inode;
3550
3551 INIT_LIST_HEAD(&splice);
3552 3567
3553 spin_lock(&root->fs_info->ordered_extent_lock); 3568 spin_lock(&root->fs_info->ordered_extent_lock);
3554 3569 /*
3555 list_splice_init(&root->fs_info->ordered_extents, &splice); 3570 * This will just short circuit the ordered completion stuff which will
3556 while (!list_empty(&splice)) { 3571 * make sure the ordered extent gets properly cleaned up.
3557 ordered = list_entry(splice.next, struct btrfs_ordered_extent, 3572 */
3558 root_extent_list); 3573 list_for_each_entry(ordered, &root->fs_info->ordered_extents,
3559 3574 root_extent_list)
3560 list_del_init(&ordered->root_extent_list); 3575 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
3561 atomic_inc(&ordered->refs);
3562
3563 /* the inode may be getting freed (in sys_unlink path). */
3564 inode = igrab(ordered->inode);
3565
3566 spin_unlock(&root->fs_info->ordered_extent_lock);
3567 if (inode)
3568 iput(inode);
3569
3570 atomic_set(&ordered->refs, 1);
3571 btrfs_put_ordered_extent(ordered);
3572
3573 spin_lock(&root->fs_info->ordered_extent_lock);
3574 }
3575
3576 spin_unlock(&root->fs_info->ordered_extent_lock); 3576 spin_unlock(&root->fs_info->ordered_extent_lock);
3577} 3577}
3578 3578
@@ -3594,11 +3594,11 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3594 } 3594 }
3595 3595
3596 while ((node = rb_first(&delayed_refs->root)) != NULL) { 3596 while ((node = rb_first(&delayed_refs->root)) != NULL) {
3597 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3597 struct btrfs_delayed_ref_head *head = NULL;
3598 3598
3599 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
3599 atomic_set(&ref->refs, 1); 3600 atomic_set(&ref->refs, 1);
3600 if (btrfs_delayed_ref_is_head(ref)) { 3601 if (btrfs_delayed_ref_is_head(ref)) {
3601 struct btrfs_delayed_ref_head *head;
3602 3602
3603 head = btrfs_delayed_node_to_head(ref); 3603 head = btrfs_delayed_node_to_head(ref);
3604 if (!mutex_trylock(&head->mutex)) { 3604 if (!mutex_trylock(&head->mutex)) {
@@ -3614,16 +3614,18 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
3614 continue; 3614 continue;
3615 } 3615 }
3616 3616
3617 kfree(head->extent_op); 3617 btrfs_free_delayed_extent_op(head->extent_op);
3618 delayed_refs->num_heads--; 3618 delayed_refs->num_heads--;
3619 if (list_empty(&head->cluster)) 3619 if (list_empty(&head->cluster))
3620 delayed_refs->num_heads_ready--; 3620 delayed_refs->num_heads_ready--;
3621 list_del_init(&head->cluster); 3621 list_del_init(&head->cluster);
3622 } 3622 }
3623
3623 ref->in_tree = 0; 3624 ref->in_tree = 0;
3624 rb_erase(&ref->rb_node, &delayed_refs->root); 3625 rb_erase(&ref->rb_node, &delayed_refs->root);
3625 delayed_refs->num_entries--; 3626 delayed_refs->num_entries--;
3626 3627 if (head)
3628 mutex_unlock(&head->mutex);
3627 spin_unlock(&delayed_refs->lock); 3629 spin_unlock(&delayed_refs->lock);
3628 btrfs_put_delayed_ref(ref); 3630 btrfs_put_delayed_ref(ref);
3629 3631
@@ -3671,6 +3673,8 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
3671 delalloc_inodes); 3673 delalloc_inodes);
3672 3674
3673 list_del_init(&btrfs_inode->delalloc_inodes); 3675 list_del_init(&btrfs_inode->delalloc_inodes);
3676 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
3677 &btrfs_inode->runtime_flags);
3674 3678
3675 btrfs_invalidate_inodes(btrfs_inode->root); 3679 btrfs_invalidate_inodes(btrfs_inode->root);
3676 } 3680 }
@@ -3823,10 +3827,8 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
3823 3827
3824 while (!list_empty(&list)) { 3828 while (!list_empty(&list)) {
3825 t = list_entry(list.next, struct btrfs_transaction, list); 3829 t = list_entry(list.next, struct btrfs_transaction, list);
3826 if (!t)
3827 break;
3828 3830
3829 btrfs_destroy_ordered_operations(root); 3831 btrfs_destroy_ordered_operations(t, root);
3830 3832
3831 btrfs_destroy_ordered_extents(root); 3833 btrfs_destroy_ordered_extents(root);
3832 3834
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 5a3327b8f90d..5cd44e239595 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -72,8 +72,7 @@ enum {
72 RESERVE_ALLOC_NO_ACCOUNT = 2, 72 RESERVE_ALLOC_NO_ACCOUNT = 2,
73}; 73};
74 74
75static int update_block_group(struct btrfs_trans_handle *trans, 75static int update_block_group(struct btrfs_root *root,
76 struct btrfs_root *root,
77 u64 bytenr, u64 num_bytes, int alloc); 76 u64 bytenr, u64 num_bytes, int alloc);
78static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 77static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
79 struct btrfs_root *root, 78 struct btrfs_root *root,
@@ -103,6 +102,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
103 int dump_block_groups); 102 int dump_block_groups);
104static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 103static int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache,
105 u64 num_bytes, int reserve); 104 u64 num_bytes, int reserve);
105static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv,
106 u64 num_bytes);
106 107
107static noinline int 108static noinline int
108block_group_cache_done(struct btrfs_block_group_cache *cache) 109block_group_cache_done(struct btrfs_block_group_cache *cache)
@@ -162,6 +163,10 @@ static int btrfs_add_block_group_cache(struct btrfs_fs_info *info,
162 rb_link_node(&block_group->cache_node, parent, p); 163 rb_link_node(&block_group->cache_node, parent, p);
163 rb_insert_color(&block_group->cache_node, 164 rb_insert_color(&block_group->cache_node,
164 &info->block_group_cache_tree); 165 &info->block_group_cache_tree);
166
167 if (info->first_logical_byte > block_group->key.objectid)
168 info->first_logical_byte = block_group->key.objectid;
169
165 spin_unlock(&info->block_group_cache_lock); 170 spin_unlock(&info->block_group_cache_lock);
166 171
167 return 0; 172 return 0;
@@ -203,8 +208,11 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
203 break; 208 break;
204 } 209 }
205 } 210 }
206 if (ret) 211 if (ret) {
207 btrfs_get_block_group(ret); 212 btrfs_get_block_group(ret);
213 if (bytenr == 0 && info->first_logical_byte > ret->key.objectid)
214 info->first_logical_byte = ret->key.objectid;
215 }
208 spin_unlock(&info->block_group_cache_lock); 216 spin_unlock(&info->block_group_cache_lock);
209 217
210 return ret; 218 return ret;
@@ -468,8 +476,6 @@ out:
468} 476}
469 477
470static int cache_block_group(struct btrfs_block_group_cache *cache, 478static int cache_block_group(struct btrfs_block_group_cache *cache,
471 struct btrfs_trans_handle *trans,
472 struct btrfs_root *root,
473 int load_cache_only) 479 int load_cache_only)
474{ 480{
475 DEFINE_WAIT(wait); 481 DEFINE_WAIT(wait);
@@ -527,12 +533,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
527 cache->cached = BTRFS_CACHE_FAST; 533 cache->cached = BTRFS_CACHE_FAST;
528 spin_unlock(&cache->lock); 534 spin_unlock(&cache->lock);
529 535
530 /*
531 * We can't do the read from on-disk cache during a commit since we need
532 * to have the normal tree locking. Also if we are currently trying to
533 * allocate blocks for the tree root we can't do the fast caching since
534 * we likely hold important locks.
535 */
536 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { 536 if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) {
537 ret = load_free_space_cache(fs_info, cache); 537 ret = load_free_space_cache(fs_info, cache);
538 538
@@ -2143,7 +2143,6 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2143 node->num_bytes); 2143 node->num_bytes);
2144 } 2144 }
2145 } 2145 }
2146 mutex_unlock(&head->mutex);
2147 return ret; 2146 return ret;
2148 } 2147 }
2149 2148
@@ -2258,7 +2257,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2258 * process of being added. Don't run this ref yet. 2257 * process of being added. Don't run this ref yet.
2259 */ 2258 */
2260 list_del_init(&locked_ref->cluster); 2259 list_del_init(&locked_ref->cluster);
2261 mutex_unlock(&locked_ref->mutex); 2260 btrfs_delayed_ref_unlock(locked_ref);
2262 locked_ref = NULL; 2261 locked_ref = NULL;
2263 delayed_refs->num_heads_ready++; 2262 delayed_refs->num_heads_ready++;
2264 spin_unlock(&delayed_refs->lock); 2263 spin_unlock(&delayed_refs->lock);
@@ -2285,7 +2284,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2285 ref = &locked_ref->node; 2284 ref = &locked_ref->node;
2286 2285
2287 if (extent_op && must_insert_reserved) { 2286 if (extent_op && must_insert_reserved) {
2288 kfree(extent_op); 2287 btrfs_free_delayed_extent_op(extent_op);
2289 extent_op = NULL; 2288 extent_op = NULL;
2290 } 2289 }
2291 2290
@@ -2294,28 +2293,25 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2294 2293
2295 ret = run_delayed_extent_op(trans, root, 2294 ret = run_delayed_extent_op(trans, root,
2296 ref, extent_op); 2295 ref, extent_op);
2297 kfree(extent_op); 2296 btrfs_free_delayed_extent_op(extent_op);
2298 2297
2299 if (ret) { 2298 if (ret) {
2300 list_del_init(&locked_ref->cluster); 2299 printk(KERN_DEBUG
2301 mutex_unlock(&locked_ref->mutex); 2300 "btrfs: run_delayed_extent_op "
2302 2301 "returned %d\n", ret);
2303 printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret);
2304 spin_lock(&delayed_refs->lock); 2302 spin_lock(&delayed_refs->lock);
2303 btrfs_delayed_ref_unlock(locked_ref);
2305 return ret; 2304 return ret;
2306 } 2305 }
2307 2306
2308 goto next; 2307 goto next;
2309 } 2308 }
2310
2311 list_del_init(&locked_ref->cluster);
2312 locked_ref = NULL;
2313 } 2309 }
2314 2310
2315 ref->in_tree = 0; 2311 ref->in_tree = 0;
2316 rb_erase(&ref->rb_node, &delayed_refs->root); 2312 rb_erase(&ref->rb_node, &delayed_refs->root);
2317 delayed_refs->num_entries--; 2313 delayed_refs->num_entries--;
2318 if (locked_ref) { 2314 if (!btrfs_delayed_ref_is_head(ref)) {
2319 /* 2315 /*
2320 * when we play the delayed ref, also correct the 2316 * when we play the delayed ref, also correct the
2321 * ref_mod on head 2317 * ref_mod on head
@@ -2337,20 +2333,29 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
2337 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2333 ret = run_one_delayed_ref(trans, root, ref, extent_op,
2338 must_insert_reserved); 2334 must_insert_reserved);
2339 2335
2340 btrfs_put_delayed_ref(ref); 2336 btrfs_free_delayed_extent_op(extent_op);
2341 kfree(extent_op);
2342 count++;
2343
2344 if (ret) { 2337 if (ret) {
2345 if (locked_ref) { 2338 btrfs_delayed_ref_unlock(locked_ref);
2346 list_del_init(&locked_ref->cluster); 2339 btrfs_put_delayed_ref(ref);
2347 mutex_unlock(&locked_ref->mutex); 2340 printk(KERN_DEBUG
2348 } 2341 "btrfs: run_one_delayed_ref returned %d\n", ret);
2349 printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret);
2350 spin_lock(&delayed_refs->lock); 2342 spin_lock(&delayed_refs->lock);
2351 return ret; 2343 return ret;
2352 } 2344 }
2353 2345
2346 /*
2347 * If this node is a head, that means all the refs in this head
2348 * have been dealt with, and we will pick the next head to deal
2349 * with, so we must unlock the head and drop it from the cluster
2350 * list before we release it.
2351 */
2352 if (btrfs_delayed_ref_is_head(ref)) {
2353 list_del_init(&locked_ref->cluster);
2354 btrfs_delayed_ref_unlock(locked_ref);
2355 locked_ref = NULL;
2356 }
2357 btrfs_put_delayed_ref(ref);
2358 count++;
2354next: 2359next:
2355 cond_resched(); 2360 cond_resched();
2356 spin_lock(&delayed_refs->lock); 2361 spin_lock(&delayed_refs->lock);
@@ -2500,6 +2505,7 @@ again:
2500 2505
2501 ret = run_clustered_refs(trans, root, &cluster); 2506 ret = run_clustered_refs(trans, root, &cluster);
2502 if (ret < 0) { 2507 if (ret < 0) {
2508 btrfs_release_ref_cluster(&cluster);
2503 spin_unlock(&delayed_refs->lock); 2509 spin_unlock(&delayed_refs->lock);
2504 btrfs_abort_transaction(trans, root, ret); 2510 btrfs_abort_transaction(trans, root, ret);
2505 return ret; 2511 return ret;
@@ -2586,7 +2592,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2586 struct btrfs_delayed_extent_op *extent_op; 2592 struct btrfs_delayed_extent_op *extent_op;
2587 int ret; 2593 int ret;
2588 2594
2589 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2595 extent_op = btrfs_alloc_delayed_extent_op();
2590 if (!extent_op) 2596 if (!extent_op)
2591 return -ENOMEM; 2597 return -ENOMEM;
2592 2598
@@ -2598,7 +2604,7 @@ int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
2598 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr, 2604 ret = btrfs_add_delayed_extent_op(root->fs_info, trans, bytenr,
2599 num_bytes, extent_op); 2605 num_bytes, extent_op);
2600 if (ret) 2606 if (ret)
2601 kfree(extent_op); 2607 btrfs_free_delayed_extent_op(extent_op);
2602 return ret; 2608 return ret;
2603} 2609}
2604 2610
@@ -3223,12 +3229,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3223 u64 extra_flags = chunk_to_extended(flags) & 3229 u64 extra_flags = chunk_to_extended(flags) &
3224 BTRFS_EXTENDED_PROFILE_MASK; 3230 BTRFS_EXTENDED_PROFILE_MASK;
3225 3231
3232 write_seqlock(&fs_info->profiles_lock);
3226 if (flags & BTRFS_BLOCK_GROUP_DATA) 3233 if (flags & BTRFS_BLOCK_GROUP_DATA)
3227 fs_info->avail_data_alloc_bits |= extra_flags; 3234 fs_info->avail_data_alloc_bits |= extra_flags;
3228 if (flags & BTRFS_BLOCK_GROUP_METADATA) 3235 if (flags & BTRFS_BLOCK_GROUP_METADATA)
3229 fs_info->avail_metadata_alloc_bits |= extra_flags; 3236 fs_info->avail_metadata_alloc_bits |= extra_flags;
3230 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 3237 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3231 fs_info->avail_system_alloc_bits |= extra_flags; 3238 fs_info->avail_system_alloc_bits |= extra_flags;
3239 write_sequnlock(&fs_info->profiles_lock);
3232} 3240}
3233 3241
3234/* 3242/*
@@ -3320,12 +3328,18 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3320 3328
3321static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 3329static u64 get_alloc_profile(struct btrfs_root *root, u64 flags)
3322{ 3330{
3323 if (flags & BTRFS_BLOCK_GROUP_DATA) 3331 unsigned seq;
3324 flags |= root->fs_info->avail_data_alloc_bits; 3332
3325 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 3333 do {
3326 flags |= root->fs_info->avail_system_alloc_bits; 3334 seq = read_seqbegin(&root->fs_info->profiles_lock);
3327 else if (flags & BTRFS_BLOCK_GROUP_METADATA) 3335
3328 flags |= root->fs_info->avail_metadata_alloc_bits; 3336 if (flags & BTRFS_BLOCK_GROUP_DATA)
3337 flags |= root->fs_info->avail_data_alloc_bits;
3338 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
3339 flags |= root->fs_info->avail_system_alloc_bits;
3340 else if (flags & BTRFS_BLOCK_GROUP_METADATA)
3341 flags |= root->fs_info->avail_metadata_alloc_bits;
3342 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
3329 3343
3330 return btrfs_reduce_alloc_profile(root, flags); 3344 return btrfs_reduce_alloc_profile(root, flags);
3331} 3345}
@@ -3564,6 +3578,10 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3564 int wait_for_alloc = 0; 3578 int wait_for_alloc = 0;
3565 int ret = 0; 3579 int ret = 0;
3566 3580
3581 /* Don't re-enter if we're already allocating a chunk */
3582 if (trans->allocating_chunk)
3583 return -ENOSPC;
3584
3567 space_info = __find_space_info(extent_root->fs_info, flags); 3585 space_info = __find_space_info(extent_root->fs_info, flags);
3568 if (!space_info) { 3586 if (!space_info) {
3569 ret = update_space_info(extent_root->fs_info, flags, 3587 ret = update_space_info(extent_root->fs_info, flags,
@@ -3606,6 +3624,8 @@ again:
3606 goto again; 3624 goto again;
3607 } 3625 }
3608 3626
3627 trans->allocating_chunk = true;
3628
3609 /* 3629 /*
3610 * If we have mixed data/metadata chunks we want to make sure we keep 3630 * If we have mixed data/metadata chunks we want to make sure we keep
3611 * allocating mixed chunks instead of individual chunks. 3631 * allocating mixed chunks instead of individual chunks.
@@ -3632,6 +3652,7 @@ again:
3632 check_system_chunk(trans, extent_root, flags); 3652 check_system_chunk(trans, extent_root, flags);
3633 3653
3634 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3654 ret = btrfs_alloc_chunk(trans, extent_root, flags);
3655 trans->allocating_chunk = false;
3635 if (ret < 0 && ret != -ENOSPC) 3656 if (ret < 0 && ret != -ENOSPC)
3636 goto out; 3657 goto out;
3637 3658
@@ -3653,13 +3674,31 @@ static int can_overcommit(struct btrfs_root *root,
3653 struct btrfs_space_info *space_info, u64 bytes, 3674 struct btrfs_space_info *space_info, u64 bytes,
3654 enum btrfs_reserve_flush_enum flush) 3675 enum btrfs_reserve_flush_enum flush)
3655{ 3676{
3677 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
3656 u64 profile = btrfs_get_alloc_profile(root, 0); 3678 u64 profile = btrfs_get_alloc_profile(root, 0);
3679 u64 rsv_size = 0;
3657 u64 avail; 3680 u64 avail;
3658 u64 used; 3681 u64 used;
3682 u64 to_add;
3659 3683
3660 used = space_info->bytes_used + space_info->bytes_reserved + 3684 used = space_info->bytes_used + space_info->bytes_reserved +
3661 space_info->bytes_pinned + space_info->bytes_readonly + 3685 space_info->bytes_pinned + space_info->bytes_readonly;
3662 space_info->bytes_may_use; 3686
3687 spin_lock(&global_rsv->lock);
3688 rsv_size = global_rsv->size;
3689 spin_unlock(&global_rsv->lock);
3690
3691 /*
3692 * We only want to allow over committing if we have lots of actual space
3693 * free, but if we don't have enough space to handle the global reserve
3694 * space then we could end up having a real enospc problem when trying
3695 * to allocate a chunk or some other such important allocation.
3696 */
3697 rsv_size <<= 1;
3698 if (used + rsv_size >= space_info->total_bytes)
3699 return 0;
3700
3701 used += space_info->bytes_may_use;
3663 3702
3664 spin_lock(&root->fs_info->free_chunk_lock); 3703 spin_lock(&root->fs_info->free_chunk_lock);
3665 avail = root->fs_info->free_chunk_space; 3704 avail = root->fs_info->free_chunk_space;
@@ -3674,27 +3713,38 @@ static int can_overcommit(struct btrfs_root *root,
3674 BTRFS_BLOCK_GROUP_RAID10)) 3713 BTRFS_BLOCK_GROUP_RAID10))
3675 avail >>= 1; 3714 avail >>= 1;
3676 3715
3716 to_add = space_info->total_bytes;
3717
3677 /* 3718 /*
3678 * If we aren't flushing all things, let us overcommit up to 3719 * If we aren't flushing all things, let us overcommit up to
3679 * 1/2th of the space. If we can flush, don't let us overcommit 3720 * 1/2th of the space. If we can flush, don't let us overcommit
3680 * too much, let it overcommit up to 1/8 of the space. 3721 * too much, let it overcommit up to 1/8 of the space.
3681 */ 3722 */
3682 if (flush == BTRFS_RESERVE_FLUSH_ALL) 3723 if (flush == BTRFS_RESERVE_FLUSH_ALL)
3683 avail >>= 3; 3724 to_add >>= 3;
3684 else 3725 else
3685 avail >>= 1; 3726 to_add >>= 1;
3686 3727
3687 if (used + bytes < space_info->total_bytes + avail) 3728 /*
3729 * Limit the overcommit to the amount of free space we could possibly
3730 * allocate for chunks.
3731 */
3732 to_add = min(avail, to_add);
3733
3734 if (used + bytes < space_info->total_bytes + to_add)
3688 return 1; 3735 return 1;
3689 return 0; 3736 return 0;
3690} 3737}
3691 3738
3692static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb, 3739static inline int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
3693 unsigned long nr_pages, 3740 unsigned long nr_pages,
3694 enum wb_reason reason) 3741 enum wb_reason reason)
3695{ 3742{
3696 if (!writeback_in_progress(sb->s_bdi) && 3743 /* the flusher is dealing with the dirty inodes now. */
3697 down_read_trylock(&sb->s_umount)) { 3744 if (writeback_in_progress(sb->s_bdi))
3745 return 1;
3746
3747 if (down_read_trylock(&sb->s_umount)) {
3698 writeback_inodes_sb_nr(sb, nr_pages, reason); 3748 writeback_inodes_sb_nr(sb, nr_pages, reason);
3699 up_read(&sb->s_umount); 3749 up_read(&sb->s_umount);
3700 return 1; 3750 return 1;
@@ -3703,6 +3753,28 @@ static int writeback_inodes_sb_nr_if_idle_safe(struct super_block *sb,
3703 return 0; 3753 return 0;
3704} 3754}
3705 3755
3756void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root,
3757 unsigned long nr_pages)
3758{
3759 struct super_block *sb = root->fs_info->sb;
3760 int started;
3761
3762 /* If we can not start writeback, just sync all the delalloc file. */
3763 started = writeback_inodes_sb_nr_if_idle_safe(sb, nr_pages,
3764 WB_REASON_FS_FREE_SPACE);
3765 if (!started) {
3766 /*
3767 * We needn't worry the filesystem going from r/w to r/o though
3768 * we don't acquire ->s_umount mutex, because the filesystem
3769 * should guarantee the delalloc inodes list be empty after
3770 * the filesystem is readonly(all dirty pages are written to
3771 * the disk).
3772 */
3773 btrfs_start_delalloc_inodes(root, 0);
3774 btrfs_wait_ordered_extents(root, 0);
3775 }
3776}
3777
3706/* 3778/*
3707 * shrink metadata reservation for delalloc 3779 * shrink metadata reservation for delalloc
3708 */ 3780 */
@@ -3724,7 +3796,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
3724 space_info = block_rsv->space_info; 3796 space_info = block_rsv->space_info;
3725 3797
3726 smp_mb(); 3798 smp_mb();
3727 delalloc_bytes = root->fs_info->delalloc_bytes; 3799 delalloc_bytes = percpu_counter_sum_positive(
3800 &root->fs_info->delalloc_bytes);
3728 if (delalloc_bytes == 0) { 3801 if (delalloc_bytes == 0) {
3729 if (trans) 3802 if (trans)
3730 return; 3803 return;
@@ -3735,10 +3808,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
3735 while (delalloc_bytes && loops < 3) { 3808 while (delalloc_bytes && loops < 3) {
3736 max_reclaim = min(delalloc_bytes, to_reclaim); 3809 max_reclaim = min(delalloc_bytes, to_reclaim);
3737 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; 3810 nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
3738 writeback_inodes_sb_nr_if_idle_safe(root->fs_info->sb, 3811 btrfs_writeback_inodes_sb_nr(root, nr_pages);
3739 nr_pages,
3740 WB_REASON_FS_FREE_SPACE);
3741
3742 /* 3812 /*
3743 * We need to wait for the async pages to actually start before 3813 * We need to wait for the async pages to actually start before
3744 * we do anything. 3814 * we do anything.
@@ -3766,7 +3836,8 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
3766 break; 3836 break;
3767 } 3837 }
3768 smp_mb(); 3838 smp_mb();
3769 delalloc_bytes = root->fs_info->delalloc_bytes; 3839 delalloc_bytes = percpu_counter_sum_positive(
3840 &root->fs_info->delalloc_bytes);
3770 } 3841 }
3771} 3842}
3772 3843
@@ -4030,6 +4101,15 @@ again:
4030 goto again; 4101 goto again;
4031 4102
4032out: 4103out:
4104 if (ret == -ENOSPC &&
4105 unlikely(root->orphan_cleanup_state == ORPHAN_CLEANUP_STARTED)) {
4106 struct btrfs_block_rsv *global_rsv =
4107 &root->fs_info->global_block_rsv;
4108
4109 if (block_rsv != global_rsv &&
4110 !block_rsv_use_bytes(global_rsv, orig_bytes))
4111 ret = 0;
4112 }
4033 if (flushing) { 4113 if (flushing) {
4034 spin_lock(&space_info->lock); 4114 spin_lock(&space_info->lock);
4035 space_info->flush = 0; 4115 space_info->flush = 0;
@@ -4668,7 +4748,8 @@ void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes)
4668 spin_lock(&BTRFS_I(inode)->lock); 4748 spin_lock(&BTRFS_I(inode)->lock);
4669 dropped = drop_outstanding_extent(inode); 4749 dropped = drop_outstanding_extent(inode);
4670 4750
4671 to_free = calc_csum_metadata_size(inode, num_bytes, 0); 4751 if (num_bytes)
4752 to_free = calc_csum_metadata_size(inode, num_bytes, 0);
4672 spin_unlock(&BTRFS_I(inode)->lock); 4753 spin_unlock(&BTRFS_I(inode)->lock);
4673 if (dropped > 0) 4754 if (dropped > 0)
4674 to_free += btrfs_calc_trans_metadata_size(root, dropped); 4755 to_free += btrfs_calc_trans_metadata_size(root, dropped);
@@ -4735,8 +4816,7 @@ void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes)
4735 btrfs_free_reserved_data_space(inode, num_bytes); 4816 btrfs_free_reserved_data_space(inode, num_bytes);
4736} 4817}
4737 4818
4738static int update_block_group(struct btrfs_trans_handle *trans, 4819static int update_block_group(struct btrfs_root *root,
4739 struct btrfs_root *root,
4740 u64 bytenr, u64 num_bytes, int alloc) 4820 u64 bytenr, u64 num_bytes, int alloc)
4741{ 4821{
4742 struct btrfs_block_group_cache *cache = NULL; 4822 struct btrfs_block_group_cache *cache = NULL;
@@ -4773,7 +4853,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4773 * space back to the block group, otherwise we will leak space. 4853 * space back to the block group, otherwise we will leak space.
4774 */ 4854 */
4775 if (!alloc && cache->cached == BTRFS_CACHE_NO) 4855 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4776 cache_block_group(cache, trans, NULL, 1); 4856 cache_block_group(cache, 1);
4777 4857
4778 byte_in_group = bytenr - cache->key.objectid; 4858 byte_in_group = bytenr - cache->key.objectid;
4779 WARN_ON(byte_in_group > cache->key.offset); 4859 WARN_ON(byte_in_group > cache->key.offset);
@@ -4823,6 +4903,13 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start)
4823 struct btrfs_block_group_cache *cache; 4903 struct btrfs_block_group_cache *cache;
4824 u64 bytenr; 4904 u64 bytenr;
4825 4905
4906 spin_lock(&root->fs_info->block_group_cache_lock);
4907 bytenr = root->fs_info->first_logical_byte;
4908 spin_unlock(&root->fs_info->block_group_cache_lock);
4909
4910 if (bytenr < (u64)-1)
4911 return bytenr;
4912
4826 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 4913 cache = btrfs_lookup_first_block_group(root->fs_info, search_start);
4827 if (!cache) 4914 if (!cache)
4828 return 0; 4915 return 0;
@@ -4873,8 +4960,7 @@ int btrfs_pin_extent(struct btrfs_root *root,
4873/* 4960/*
4874 * this function must be called within transaction 4961 * this function must be called within transaction
4875 */ 4962 */
4876int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, 4963int btrfs_pin_extent_for_log_replay(struct btrfs_root *root,
4877 struct btrfs_root *root,
4878 u64 bytenr, u64 num_bytes) 4964 u64 bytenr, u64 num_bytes)
4879{ 4965{
4880 struct btrfs_block_group_cache *cache; 4966 struct btrfs_block_group_cache *cache;
@@ -4888,7 +4974,7 @@ int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
4888 * to one because the slow code to read in the free extents does check 4974 * to one because the slow code to read in the free extents does check
4889 * the pinned extents. 4975 * the pinned extents.
4890 */ 4976 */
4891 cache_block_group(cache, trans, root, 1); 4977 cache_block_group(cache, 1);
4892 4978
4893 pin_down_extent(root, cache, bytenr, num_bytes, 0); 4979 pin_down_extent(root, cache, bytenr, num_bytes, 0);
4894 4980
@@ -5285,7 +5371,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
5285 } 5371 }
5286 } 5372 }
5287 5373
5288 ret = update_block_group(trans, root, bytenr, num_bytes, 0); 5374 ret = update_block_group(root, bytenr, num_bytes, 0);
5289 if (ret) { 5375 if (ret) {
5290 btrfs_abort_transaction(trans, extent_root, ret); 5376 btrfs_abort_transaction(trans, extent_root, ret);
5291 goto out; 5377 goto out;
@@ -5330,7 +5416,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
5330 if (head->extent_op) { 5416 if (head->extent_op) {
5331 if (!head->must_insert_reserved) 5417 if (!head->must_insert_reserved)
5332 goto out; 5418 goto out;
5333 kfree(head->extent_op); 5419 btrfs_free_delayed_extent_op(head->extent_op);
5334 head->extent_op = NULL; 5420 head->extent_op = NULL;
5335 } 5421 }
5336 5422
@@ -5476,7 +5562,6 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
5476 u64 num_bytes) 5562 u64 num_bytes)
5477{ 5563{
5478 struct btrfs_caching_control *caching_ctl; 5564 struct btrfs_caching_control *caching_ctl;
5479 DEFINE_WAIT(wait);
5480 5565
5481 caching_ctl = get_caching_control(cache); 5566 caching_ctl = get_caching_control(cache);
5482 if (!caching_ctl) 5567 if (!caching_ctl)
@@ -5493,7 +5578,6 @@ static noinline int
5493wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 5578wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
5494{ 5579{
5495 struct btrfs_caching_control *caching_ctl; 5580 struct btrfs_caching_control *caching_ctl;
5496 DEFINE_WAIT(wait);
5497 5581
5498 caching_ctl = get_caching_control(cache); 5582 caching_ctl = get_caching_control(cache);
5499 if (!caching_ctl) 5583 if (!caching_ctl)
@@ -5507,20 +5591,16 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
5507 5591
5508int __get_raid_index(u64 flags) 5592int __get_raid_index(u64 flags)
5509{ 5593{
5510 int index;
5511
5512 if (flags & BTRFS_BLOCK_GROUP_RAID10) 5594 if (flags & BTRFS_BLOCK_GROUP_RAID10)
5513 index = 0; 5595 return BTRFS_RAID_RAID10;
5514 else if (flags & BTRFS_BLOCK_GROUP_RAID1) 5596 else if (flags & BTRFS_BLOCK_GROUP_RAID1)
5515 index = 1; 5597 return BTRFS_RAID_RAID1;
5516 else if (flags & BTRFS_BLOCK_GROUP_DUP) 5598 else if (flags & BTRFS_BLOCK_GROUP_DUP)
5517 index = 2; 5599 return BTRFS_RAID_DUP;
5518 else if (flags & BTRFS_BLOCK_GROUP_RAID0) 5600 else if (flags & BTRFS_BLOCK_GROUP_RAID0)
5519 index = 3; 5601 return BTRFS_RAID_RAID0;
5520 else 5602 else
5521 index = 4; 5603 return BTRFS_RAID_SINGLE;
5522
5523 return index;
5524} 5604}
5525 5605
5526static int get_block_group_index(struct btrfs_block_group_cache *cache) 5606static int get_block_group_index(struct btrfs_block_group_cache *cache)
@@ -5678,8 +5758,7 @@ have_block_group:
5678 cached = block_group_cache_done(block_group); 5758 cached = block_group_cache_done(block_group);
5679 if (unlikely(!cached)) { 5759 if (unlikely(!cached)) {
5680 found_uncached_bg = true; 5760 found_uncached_bg = true;
5681 ret = cache_block_group(block_group, trans, 5761 ret = cache_block_group(block_group, 0);
5682 orig_root, 0);
5683 BUG_ON(ret < 0); 5762 BUG_ON(ret < 0);
5684 ret = 0; 5763 ret = 0;
5685 } 5764 }
@@ -6108,7 +6187,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
6108 btrfs_mark_buffer_dirty(path->nodes[0]); 6187 btrfs_mark_buffer_dirty(path->nodes[0]);
6109 btrfs_free_path(path); 6188 btrfs_free_path(path);
6110 6189
6111 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 6190 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6112 if (ret) { /* -ENOENT, logic error */ 6191 if (ret) { /* -ENOENT, logic error */
6113 printk(KERN_ERR "btrfs update block group failed for %llu " 6192 printk(KERN_ERR "btrfs update block group failed for %llu "
6114 "%llu\n", (unsigned long long)ins->objectid, 6193 "%llu\n", (unsigned long long)ins->objectid,
@@ -6172,7 +6251,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
6172 btrfs_mark_buffer_dirty(leaf); 6251 btrfs_mark_buffer_dirty(leaf);
6173 btrfs_free_path(path); 6252 btrfs_free_path(path);
6174 6253
6175 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 6254 ret = update_block_group(root, ins->objectid, ins->offset, 1);
6176 if (ret) { /* -ENOENT, logic error */ 6255 if (ret) { /* -ENOENT, logic error */
6177 printk(KERN_ERR "btrfs update block group failed for %llu " 6256 printk(KERN_ERR "btrfs update block group failed for %llu "
6178 "%llu\n", (unsigned long long)ins->objectid, 6257 "%llu\n", (unsigned long long)ins->objectid,
@@ -6215,7 +6294,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
6215 u64 num_bytes = ins->offset; 6294 u64 num_bytes = ins->offset;
6216 6295
6217 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 6296 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
6218 cache_block_group(block_group, trans, NULL, 0); 6297 cache_block_group(block_group, 0);
6219 caching_ctl = get_caching_control(block_group); 6298 caching_ctl = get_caching_control(block_group);
6220 6299
6221 if (!caching_ctl) { 6300 if (!caching_ctl) {
@@ -6329,12 +6408,14 @@ use_block_rsv(struct btrfs_trans_handle *trans,
6329 if (!ret) 6408 if (!ret)
6330 return block_rsv; 6409 return block_rsv;
6331 if (ret && !block_rsv->failfast) { 6410 if (ret && !block_rsv->failfast) {
6332 static DEFINE_RATELIMIT_STATE(_rs, 6411 if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
6333 DEFAULT_RATELIMIT_INTERVAL, 6412 static DEFINE_RATELIMIT_STATE(_rs,
6334 /*DEFAULT_RATELIMIT_BURST*/ 2); 6413 DEFAULT_RATELIMIT_INTERVAL * 10,
6335 if (__ratelimit(&_rs)) 6414 /*DEFAULT_RATELIMIT_BURST*/ 1);
6336 WARN(1, KERN_DEBUG "btrfs: block rsv returned %d\n", 6415 if (__ratelimit(&_rs))
6337 ret); 6416 WARN(1, KERN_DEBUG
6417 "btrfs: block rsv returned %d\n", ret);
6418 }
6338 ret = reserve_metadata_bytes(root, block_rsv, blocksize, 6419 ret = reserve_metadata_bytes(root, block_rsv, blocksize,
6339 BTRFS_RESERVE_NO_FLUSH); 6420 BTRFS_RESERVE_NO_FLUSH);
6340 if (!ret) { 6421 if (!ret) {
@@ -6400,7 +6481,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
6400 6481
6401 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 6482 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) {
6402 struct btrfs_delayed_extent_op *extent_op; 6483 struct btrfs_delayed_extent_op *extent_op;
6403 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 6484 extent_op = btrfs_alloc_delayed_extent_op();
6404 BUG_ON(!extent_op); /* -ENOMEM */ 6485 BUG_ON(!extent_op); /* -ENOMEM */
6405 if (key) 6486 if (key)
6406 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 6487 memcpy(&extent_op->key, key, sizeof(extent_op->key));
@@ -7481,16 +7562,16 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
7481 index = get_block_group_index(block_group); 7562 index = get_block_group_index(block_group);
7482 } 7563 }
7483 7564
7484 if (index == 0) { 7565 if (index == BTRFS_RAID_RAID10) {
7485 dev_min = 4; 7566 dev_min = 4;
7486 /* Divide by 2 */ 7567 /* Divide by 2 */
7487 min_free >>= 1; 7568 min_free >>= 1;
7488 } else if (index == 1) { 7569 } else if (index == BTRFS_RAID_RAID1) {
7489 dev_min = 2; 7570 dev_min = 2;
7490 } else if (index == 2) { 7571 } else if (index == BTRFS_RAID_DUP) {
7491 /* Multiply by 2 */ 7572 /* Multiply by 2 */
7492 min_free <<= 1; 7573 min_free <<= 1;
7493 } else if (index == 3) { 7574 } else if (index == BTRFS_RAID_RAID0) {
7494 dev_min = fs_devices->rw_devices; 7575 dev_min = fs_devices->rw_devices;
7495 do_div(min_free, dev_min); 7576 do_div(min_free, dev_min);
7496 } 7577 }
@@ -7651,11 +7732,13 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7651 space_info = list_entry(info->space_info.next, 7732 space_info = list_entry(info->space_info.next,
7652 struct btrfs_space_info, 7733 struct btrfs_space_info,
7653 list); 7734 list);
7654 if (space_info->bytes_pinned > 0 || 7735 if (btrfs_test_opt(info->tree_root, ENOSPC_DEBUG)) {
7655 space_info->bytes_reserved > 0 || 7736 if (space_info->bytes_pinned > 0 ||
7656 space_info->bytes_may_use > 0) { 7737 space_info->bytes_reserved > 0 ||
7657 WARN_ON(1); 7738 space_info->bytes_may_use > 0) {
7658 dump_space_info(space_info, 0, 0); 7739 WARN_ON(1);
7740 dump_space_info(space_info, 0, 0);
7741 }
7659 } 7742 }
7660 list_del(&space_info->list); 7743 list_del(&space_info->list);
7661 kfree(space_info); 7744 kfree(space_info);
@@ -7932,12 +8015,14 @@ static void clear_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
7932 u64 extra_flags = chunk_to_extended(flags) & 8015 u64 extra_flags = chunk_to_extended(flags) &
7933 BTRFS_EXTENDED_PROFILE_MASK; 8016 BTRFS_EXTENDED_PROFILE_MASK;
7934 8017
8018 write_seqlock(&fs_info->profiles_lock);
7935 if (flags & BTRFS_BLOCK_GROUP_DATA) 8019 if (flags & BTRFS_BLOCK_GROUP_DATA)
7936 fs_info->avail_data_alloc_bits &= ~extra_flags; 8020 fs_info->avail_data_alloc_bits &= ~extra_flags;
7937 if (flags & BTRFS_BLOCK_GROUP_METADATA) 8021 if (flags & BTRFS_BLOCK_GROUP_METADATA)
7938 fs_info->avail_metadata_alloc_bits &= ~extra_flags; 8022 fs_info->avail_metadata_alloc_bits &= ~extra_flags;
7939 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 8023 if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
7940 fs_info->avail_system_alloc_bits &= ~extra_flags; 8024 fs_info->avail_system_alloc_bits &= ~extra_flags;
8025 write_sequnlock(&fs_info->profiles_lock);
7941} 8026}
7942 8027
7943int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 8028int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
@@ -8036,6 +8121,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8036 spin_lock(&root->fs_info->block_group_cache_lock); 8121 spin_lock(&root->fs_info->block_group_cache_lock);
8037 rb_erase(&block_group->cache_node, 8122 rb_erase(&block_group->cache_node,
8038 &root->fs_info->block_group_cache_tree); 8123 &root->fs_info->block_group_cache_tree);
8124
8125 if (root->fs_info->first_logical_byte == block_group->key.objectid)
8126 root->fs_info->first_logical_byte = (u64)-1;
8039 spin_unlock(&root->fs_info->block_group_cache_lock); 8127 spin_unlock(&root->fs_info->block_group_cache_lock);
8040 8128
8041 down_write(&block_group->space_info->groups_sem); 8129 down_write(&block_group->space_info->groups_sem);
@@ -8158,7 +8246,7 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
8158 8246
8159 if (end - start >= range->minlen) { 8247 if (end - start >= range->minlen) {
8160 if (!block_group_cache_done(cache)) { 8248 if (!block_group_cache_done(cache)) {
8161 ret = cache_block_group(cache, NULL, root, 0); 8249 ret = cache_block_group(cache, 0);
8162 if (!ret) 8250 if (!ret)
8163 wait_block_group_cache_done(cache); 8251 wait_block_group_cache_done(cache);
8164 } 8252 }
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 1b319df29eee..5c00d6aeae75 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1834,7 +1834,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
1834 */ 1834 */
1835static void check_page_uptodate(struct extent_io_tree *tree, struct page *page) 1835static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1836{ 1836{
1837 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1837 u64 start = page_offset(page);
1838 u64 end = start + PAGE_CACHE_SIZE - 1; 1838 u64 end = start + PAGE_CACHE_SIZE - 1;
1839 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL)) 1839 if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
1840 SetPageUptodate(page); 1840 SetPageUptodate(page);
@@ -1846,7 +1846,7 @@ static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
1846 */ 1846 */
1847static void check_page_locked(struct extent_io_tree *tree, struct page *page) 1847static void check_page_locked(struct extent_io_tree *tree, struct page *page)
1848{ 1848{
1849 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 1849 u64 start = page_offset(page);
1850 u64 end = start + PAGE_CACHE_SIZE - 1; 1850 u64 end = start + PAGE_CACHE_SIZE - 1;
1851 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL)) 1851 if (!test_range_bit(tree, start, end, EXTENT_LOCKED, 0, NULL))
1852 unlock_page(page); 1852 unlock_page(page);
@@ -1960,7 +1960,7 @@ int repair_io_failure(struct btrfs_fs_info *fs_info, u64 start,
1960 return -EIO; 1960 return -EIO;
1961 } 1961 }
1962 bio->bi_bdev = dev->bdev; 1962 bio->bi_bdev = dev->bdev;
1963 bio_add_page(bio, page, length, start-page_offset(page)); 1963 bio_add_page(bio, page, length, start - page_offset(page));
1964 btrfsic_submit_bio(WRITE_SYNC, bio); 1964 btrfsic_submit_bio(WRITE_SYNC, bio);
1965 wait_for_completion(&compl); 1965 wait_for_completion(&compl);
1966 1966
@@ -2293,8 +2293,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
2293 struct page *page = bvec->bv_page; 2293 struct page *page = bvec->bv_page;
2294 tree = &BTRFS_I(page->mapping->host)->io_tree; 2294 tree = &BTRFS_I(page->mapping->host)->io_tree;
2295 2295
2296 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 2296 start = page_offset(page) + bvec->bv_offset;
2297 bvec->bv_offset;
2298 end = start + bvec->bv_len - 1; 2297 end = start + bvec->bv_len - 1;
2299 2298
2300 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2299 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@@ -2353,8 +2352,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
2353 (long int)bio->bi_bdev); 2352 (long int)bio->bi_bdev);
2354 tree = &BTRFS_I(page->mapping->host)->io_tree; 2353 tree = &BTRFS_I(page->mapping->host)->io_tree;
2355 2354
2356 start = ((u64)page->index << PAGE_CACHE_SHIFT) + 2355 start = page_offset(page) + bvec->bv_offset;
2357 bvec->bv_offset;
2358 end = start + bvec->bv_len - 1; 2356 end = start + bvec->bv_len - 1;
2359 2357
2360 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE) 2358 if (bvec->bv_offset == 0 && bvec->bv_len == PAGE_CACHE_SIZE)
@@ -2471,7 +2469,7 @@ static int __must_check submit_one_bio(int rw, struct bio *bio,
2471 struct extent_io_tree *tree = bio->bi_private; 2469 struct extent_io_tree *tree = bio->bi_private;
2472 u64 start; 2470 u64 start;
2473 2471
2474 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 2472 start = page_offset(page) + bvec->bv_offset;
2475 2473
2476 bio->bi_private = NULL; 2474 bio->bi_private = NULL;
2477 2475
@@ -2595,7 +2593,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2595 unsigned long *bio_flags) 2593 unsigned long *bio_flags)
2596{ 2594{
2597 struct inode *inode = page->mapping->host; 2595 struct inode *inode = page->mapping->host;
2598 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2596 u64 start = page_offset(page);
2599 u64 page_end = start + PAGE_CACHE_SIZE - 1; 2597 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2600 u64 end; 2598 u64 end;
2601 u64 cur = start; 2599 u64 cur = start;
@@ -2648,6 +2646,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2648 } 2646 }
2649 } 2647 }
2650 while (cur <= end) { 2648 while (cur <= end) {
2649 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2650
2651 if (cur >= last_byte) { 2651 if (cur >= last_byte) {
2652 char *userpage; 2652 char *userpage;
2653 struct extent_state *cached = NULL; 2653 struct extent_state *cached = NULL;
@@ -2735,26 +2735,17 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
2735 continue; 2735 continue;
2736 } 2736 }
2737 2737
2738 ret = 0; 2738 pnr -= page->index;
2739 if (tree->ops && tree->ops->readpage_io_hook) { 2739 ret = submit_extent_page(READ, tree, page,
2740 ret = tree->ops->readpage_io_hook(page, cur,
2741 cur + iosize - 1);
2742 }
2743 if (!ret) {
2744 unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
2745 pnr -= page->index;
2746 ret = submit_extent_page(READ, tree, page,
2747 sector, disk_io_size, pg_offset, 2740 sector, disk_io_size, pg_offset,
2748 bdev, bio, pnr, 2741 bdev, bio, pnr,
2749 end_bio_extent_readpage, mirror_num, 2742 end_bio_extent_readpage, mirror_num,
2750 *bio_flags, 2743 *bio_flags,
2751 this_bio_flag); 2744 this_bio_flag);
2752 if (!ret) { 2745 if (!ret) {
2753 nr++; 2746 nr++;
2754 *bio_flags = this_bio_flag; 2747 *bio_flags = this_bio_flag;
2755 } 2748 } else {
2756 }
2757 if (ret) {
2758 SetPageError(page); 2749 SetPageError(page);
2759 unlock_extent(tree, cur, cur + iosize - 1); 2750 unlock_extent(tree, cur, cur + iosize - 1);
2760 } 2751 }
@@ -2806,7 +2797,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2806 struct inode *inode = page->mapping->host; 2797 struct inode *inode = page->mapping->host;
2807 struct extent_page_data *epd = data; 2798 struct extent_page_data *epd = data;
2808 struct extent_io_tree *tree = epd->tree; 2799 struct extent_io_tree *tree = epd->tree;
2809 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 2800 u64 start = page_offset(page);
2810 u64 delalloc_start; 2801 u64 delalloc_start;
2811 u64 page_end = start + PAGE_CACHE_SIZE - 1; 2802 u64 page_end = start + PAGE_CACHE_SIZE - 1;
2812 u64 end; 2803 u64 end;
@@ -3124,12 +3115,9 @@ static int lock_extent_buffer_for_io(struct extent_buffer *eb,
3124 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags); 3115 set_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags);
3125 spin_unlock(&eb->refs_lock); 3116 spin_unlock(&eb->refs_lock);
3126 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); 3117 btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN);
3127 spin_lock(&fs_info->delalloc_lock); 3118 __percpu_counter_add(&fs_info->dirty_metadata_bytes,
3128 if (fs_info->dirty_metadata_bytes >= eb->len) 3119 -eb->len,
3129 fs_info->dirty_metadata_bytes -= eb->len; 3120 fs_info->dirty_metadata_batch);
3130 else
3131 WARN_ON(1);
3132 spin_unlock(&fs_info->delalloc_lock);
3133 ret = 1; 3121 ret = 1;
3134 } else { 3122 } else {
3135 spin_unlock(&eb->refs_lock); 3123 spin_unlock(&eb->refs_lock);
@@ -3446,15 +3434,9 @@ retry:
3446 * swizzled back from swapper_space to tmpfs file 3434 * swizzled back from swapper_space to tmpfs file
3447 * mapping 3435 * mapping
3448 */ 3436 */
3449 if (tree->ops && 3437 if (!trylock_page(page)) {
3450 tree->ops->write_cache_pages_lock_hook) { 3438 flush_fn(data);
3451 tree->ops->write_cache_pages_lock_hook(page, 3439 lock_page(page);
3452 data, flush_fn);
3453 } else {
3454 if (!trylock_page(page)) {
3455 flush_fn(data);
3456 lock_page(page);
3457 }
3458 } 3440 }
3459 3441
3460 if (unlikely(page->mapping != mapping)) { 3442 if (unlikely(page->mapping != mapping)) {
@@ -3674,7 +3656,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
3674 struct page *page, unsigned long offset) 3656 struct page *page, unsigned long offset)
3675{ 3657{
3676 struct extent_state *cached_state = NULL; 3658 struct extent_state *cached_state = NULL;
3677 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); 3659 u64 start = page_offset(page);
3678 u64 end = start + PAGE_CACHE_SIZE - 1; 3660 u64 end = start + PAGE_CACHE_SIZE - 1;
3679 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 3661 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
3680 3662
@@ -3700,7 +3682,7 @@ int try_release_extent_state(struct extent_map_tree *map,
3700 struct extent_io_tree *tree, struct page *page, 3682 struct extent_io_tree *tree, struct page *page,
3701 gfp_t mask) 3683 gfp_t mask)
3702{ 3684{
3703 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 3685 u64 start = page_offset(page);
3704 u64 end = start + PAGE_CACHE_SIZE - 1; 3686 u64 end = start + PAGE_CACHE_SIZE - 1;
3705 int ret = 1; 3687 int ret = 1;
3706 3688
@@ -3739,7 +3721,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
3739 gfp_t mask) 3721 gfp_t mask)
3740{ 3722{
3741 struct extent_map *em; 3723 struct extent_map *em;
3742 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 3724 u64 start = page_offset(page);
3743 u64 end = start + PAGE_CACHE_SIZE - 1; 3725 u64 end = start + PAGE_CACHE_SIZE - 1;
3744 3726
3745 if ((mask & __GFP_WAIT) && 3727 if ((mask & __GFP_WAIT) &&
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 2eacfabd3263..ff182322d112 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -75,7 +75,6 @@ struct extent_io_ops {
75 int (*merge_bio_hook)(struct page *page, unsigned long offset, 75 int (*merge_bio_hook)(struct page *page, unsigned long offset,
76 size_t size, struct bio *bio, 76 size_t size, struct bio *bio,
77 unsigned long bio_flags); 77 unsigned long bio_flags);
78 int (*readpage_io_hook)(struct page *page, u64 start, u64 end);
79 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror); 78 int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
80 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, 79 int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
81 struct extent_state *state, int mirror); 80 struct extent_state *state, int mirror);
@@ -90,8 +89,6 @@ struct extent_io_ops {
90 struct extent_state *other); 89 struct extent_state *other);
91 void (*split_extent_hook)(struct inode *inode, 90 void (*split_extent_hook)(struct inode *inode,
92 struct extent_state *orig, u64 split); 91 struct extent_state *orig, u64 split);
93 int (*write_cache_pages_lock_hook)(struct page *page, void *data,
94 void (*flush_fn)(void *));
95}; 92};
96 93
97struct extent_io_tree { 94struct extent_io_tree {
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 94aa53b38721..ec160202be3e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -684,6 +684,24 @@ out:
684 return ret; 684 return ret;
685} 685}
686 686
687static u64 btrfs_sector_sum_left(struct btrfs_ordered_sum *sums,
688 struct btrfs_sector_sum *sector_sum,
689 u64 total_bytes, u64 sectorsize)
690{
691 u64 tmp = sectorsize;
692 u64 next_sector = sector_sum->bytenr;
693 struct btrfs_sector_sum *next = sector_sum + 1;
694
695 while ((tmp + total_bytes) < sums->len) {
696 if (next_sector + sectorsize != next->bytenr)
697 break;
698 tmp += sectorsize;
699 next_sector = next->bytenr;
700 next++;
701 }
702 return tmp;
703}
704
687int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, 705int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
688 struct btrfs_root *root, 706 struct btrfs_root *root,
689 struct btrfs_ordered_sum *sums) 707 struct btrfs_ordered_sum *sums)
@@ -789,20 +807,32 @@ again:
789 goto insert; 807 goto insert;
790 } 808 }
791 809
792 if (csum_offset >= btrfs_item_size_nr(leaf, path->slots[0]) / 810 if (csum_offset == btrfs_item_size_nr(leaf, path->slots[0]) /
793 csum_size) { 811 csum_size) {
794 u32 diff = (csum_offset + 1) * csum_size; 812 int extend_nr;
813 u64 tmp;
814 u32 diff;
815 u32 free_space;
795 816
796 /* 817 if (btrfs_leaf_free_space(root, leaf) <
797 * is the item big enough already? we dropped our lock 818 sizeof(struct btrfs_item) + csum_size * 2)
798 * before and need to recheck 819 goto insert;
799 */ 820
800 if (diff < btrfs_item_size_nr(leaf, path->slots[0])) 821 free_space = btrfs_leaf_free_space(root, leaf) -
801 goto csum; 822 sizeof(struct btrfs_item) - csum_size;
823 tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes,
824 root->sectorsize);
825 tmp >>= root->fs_info->sb->s_blocksize_bits;
826 WARN_ON(tmp < 1);
827
828 extend_nr = max_t(int, 1, (int)tmp);
829 diff = (csum_offset + extend_nr) * csum_size;
830 diff = min(diff, MAX_CSUM_ITEMS(root, csum_size) * csum_size);
802 831
803 diff = diff - btrfs_item_size_nr(leaf, path->slots[0]); 832 diff = diff - btrfs_item_size_nr(leaf, path->slots[0]);
804 if (diff != csum_size) 833 diff = min(free_space, diff);
805 goto insert; 834 diff /= csum_size;
835 diff *= csum_size;
806 836
807 btrfs_extend_item(trans, root, path, diff); 837 btrfs_extend_item(trans, root, path, diff);
808 goto csum; 838 goto csum;
@@ -812,19 +842,14 @@ insert:
812 btrfs_release_path(path); 842 btrfs_release_path(path);
813 csum_offset = 0; 843 csum_offset = 0;
814 if (found_next) { 844 if (found_next) {
815 u64 tmp = total_bytes + root->sectorsize; 845 u64 tmp;
816 u64 next_sector = sector_sum->bytenr;
817 struct btrfs_sector_sum *next = sector_sum + 1;
818 846
819 while (tmp < sums->len) { 847 tmp = btrfs_sector_sum_left(sums, sector_sum, total_bytes,
820 if (next_sector + root->sectorsize != next->bytenr) 848 root->sectorsize);
821 break;
822 tmp += root->sectorsize;
823 next_sector = next->bytenr;
824 next++;
825 }
826 tmp = min(tmp, next_offset - file_key.offset);
827 tmp >>= root->fs_info->sb->s_blocksize_bits; 849 tmp >>= root->fs_info->sb->s_blocksize_bits;
850 tmp = min(tmp, (next_offset - file_key.offset) >>
851 root->fs_info->sb->s_blocksize_bits);
852
828 tmp = max((u64)1, tmp); 853 tmp = max((u64)1, tmp);
829 tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size)); 854 tmp = min(tmp, (u64)MAX_CSUM_ITEMS(root, csum_size));
830 ins_size = csum_size * tmp; 855 ins_size = csum_size * tmp;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index aeb84469d2c4..9f67e623206d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -30,11 +30,11 @@
30#include <linux/statfs.h> 30#include <linux/statfs.h>
31#include <linux/compat.h> 31#include <linux/compat.h>
32#include <linux/slab.h> 32#include <linux/slab.h>
33#include <linux/btrfs.h>
33#include "ctree.h" 34#include "ctree.h"
34#include "disk-io.h" 35#include "disk-io.h"
35#include "transaction.h" 36#include "transaction.h"
36#include "btrfs_inode.h" 37#include "btrfs_inode.h"
37#include "ioctl.h"
38#include "print-tree.h" 38#include "print-tree.h"
39#include "tree-log.h" 39#include "tree-log.h"
40#include "locking.h" 40#include "locking.h"
@@ -1544,7 +1544,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1544 * although we have opened a file as writable, we have 1544 * although we have opened a file as writable, we have
1545 * to stop this write operation to ensure FS consistency. 1545 * to stop this write operation to ensure FS consistency.
1546 */ 1546 */
1547 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 1547 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) {
1548 mutex_unlock(&inode->i_mutex); 1548 mutex_unlock(&inode->i_mutex);
1549 err = -EROFS; 1549 err = -EROFS;
1550 goto out; 1550 goto out;
@@ -1627,7 +1627,20 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
1627 */ 1627 */
1628 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, 1628 if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
1629 &BTRFS_I(inode)->runtime_flags)) { 1629 &BTRFS_I(inode)->runtime_flags)) {
1630 btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); 1630 struct btrfs_trans_handle *trans;
1631 struct btrfs_root *root = BTRFS_I(inode)->root;
1632
1633 /*
1634 * We need to block on a committing transaction to keep us from
1635 * throwing a ordered operation on to the list and causing
1636 * something like sync to deadlock trying to flush out this
1637 * inode.
1638 */
1639 trans = btrfs_start_transaction(root, 0);
1640 if (IS_ERR(trans))
1641 return PTR_ERR(trans);
1642 btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
1643 btrfs_end_transaction(trans, root);
1631 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) 1644 if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
1632 filemap_flush(inode->i_mapping); 1645 filemap_flush(inode->i_mapping);
1633 } 1646 }
@@ -1654,16 +1667,21 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1654 struct btrfs_root *root = BTRFS_I(inode)->root; 1667 struct btrfs_root *root = BTRFS_I(inode)->root;
1655 int ret = 0; 1668 int ret = 0;
1656 struct btrfs_trans_handle *trans; 1669 struct btrfs_trans_handle *trans;
1670 bool full_sync = 0;
1657 1671
1658 trace_btrfs_sync_file(file, datasync); 1672 trace_btrfs_sync_file(file, datasync);
1659 1673
1660 /* 1674 /*
1661 * We write the dirty pages in the range and wait until they complete 1675 * We write the dirty pages in the range and wait until they complete
1662 * out of the ->i_mutex. If so, we can flush the dirty pages by 1676 * out of the ->i_mutex. If so, we can flush the dirty pages by
1663 * multi-task, and make the performance up. 1677 * multi-task, and make the performance up. See
1678 * btrfs_wait_ordered_range for an explanation of the ASYNC check.
1664 */ 1679 */
1665 atomic_inc(&BTRFS_I(inode)->sync_writers); 1680 atomic_inc(&BTRFS_I(inode)->sync_writers);
1666 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 1681 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
1682 if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
1683 &BTRFS_I(inode)->runtime_flags))
1684 ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
1667 atomic_dec(&BTRFS_I(inode)->sync_writers); 1685 atomic_dec(&BTRFS_I(inode)->sync_writers);
1668 if (ret) 1686 if (ret)
1669 return ret; 1687 return ret;
@@ -1675,7 +1693,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1675 * range being left. 1693 * range being left.
1676 */ 1694 */
1677 atomic_inc(&root->log_batch); 1695 atomic_inc(&root->log_batch);
1678 btrfs_wait_ordered_range(inode, start, end - start + 1); 1696 full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
1697 &BTRFS_I(inode)->runtime_flags);
1698 if (full_sync)
1699 btrfs_wait_ordered_range(inode, start, end - start + 1);
1679 atomic_inc(&root->log_batch); 1700 atomic_inc(&root->log_batch);
1680 1701
1681 /* 1702 /*
@@ -1742,13 +1763,25 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
1742 1763
1743 if (ret != BTRFS_NO_LOG_SYNC) { 1764 if (ret != BTRFS_NO_LOG_SYNC) {
1744 if (ret > 0) { 1765 if (ret > 0) {
1766 /*
1767 * If we didn't already wait for ordered extents we need
1768 * to do that now.
1769 */
1770 if (!full_sync)
1771 btrfs_wait_ordered_range(inode, start,
1772 end - start + 1);
1745 ret = btrfs_commit_transaction(trans, root); 1773 ret = btrfs_commit_transaction(trans, root);
1746 } else { 1774 } else {
1747 ret = btrfs_sync_log(trans, root); 1775 ret = btrfs_sync_log(trans, root);
1748 if (ret == 0) 1776 if (ret == 0) {
1749 ret = btrfs_end_transaction(trans, root); 1777 ret = btrfs_end_transaction(trans, root);
1750 else 1778 } else {
1779 if (!full_sync)
1780 btrfs_wait_ordered_range(inode, start,
1781 end -
1782 start + 1);
1751 ret = btrfs_commit_transaction(trans, root); 1783 ret = btrfs_commit_transaction(trans, root);
1784 }
1752 } 1785 }
1753 } else { 1786 } else {
1754 ret = btrfs_end_transaction(trans, root); 1787 ret = btrfs_end_transaction(trans, root);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0be7a8742a43..c8090f18c217 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1356,6 +1356,8 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
1356 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit; 1356 u64 bytes_per_bg = BITS_PER_BITMAP * ctl->unit;
1357 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg); 1357 int max_bitmaps = div64_u64(size + bytes_per_bg - 1, bytes_per_bg);
1358 1358
1359 max_bitmaps = max(max_bitmaps, 1);
1360
1359 BUG_ON(ctl->total_bitmaps > max_bitmaps); 1361 BUG_ON(ctl->total_bitmaps > max_bitmaps);
1360 1362
1361 /* 1363 /*
@@ -1636,10 +1638,14 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl,
1636 } 1638 }
1637 1639
1638 /* 1640 /*
1639 * some block groups are so tiny they can't be enveloped by a bitmap, so 1641 * The original block groups from mkfs can be really small, like 8
1640 * don't even bother to create a bitmap for this 1642 * megabytes, so don't bother with a bitmap for those entries. However
1643 * some block groups can be smaller than what a bitmap would cover but
1644 * are still large enough that they could overflow the 32k memory limit,
1645 * so allow those block groups to still be allowed to have a bitmap
1646 * entry.
1641 */ 1647 */
1642 if (BITS_PER_BITMAP * ctl->unit > block_group->key.offset) 1648 if (((BITS_PER_BITMAP * ctl->unit) >> 1) > block_group->key.offset)
1643 return false; 1649 return false;
1644 1650
1645 return true; 1651 return true;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index cc93b23ca352..1aa98be54ce0 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -39,12 +39,12 @@
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/ratelimit.h> 40#include <linux/ratelimit.h>
41#include <linux/mount.h> 41#include <linux/mount.h>
42#include <linux/btrfs.h>
42#include "compat.h" 43#include "compat.h"
43#include "ctree.h" 44#include "ctree.h"
44#include "disk-io.h" 45#include "disk-io.h"
45#include "transaction.h" 46#include "transaction.h"
46#include "btrfs_inode.h" 47#include "btrfs_inode.h"
47#include "ioctl.h"
48#include "print-tree.h" 48#include "print-tree.h"
49#include "ordered-data.h" 49#include "ordered-data.h"
50#include "xattr.h" 50#include "xattr.h"
@@ -608,7 +608,7 @@ static noinline int submit_compressed_extents(struct inode *inode,
608 if (list_empty(&async_cow->extents)) 608 if (list_empty(&async_cow->extents))
609 return 0; 609 return 0;
610 610
611 611again:
612 while (!list_empty(&async_cow->extents)) { 612 while (!list_empty(&async_cow->extents)) {
613 async_extent = list_entry(async_cow->extents.next, 613 async_extent = list_entry(async_cow->extents.next,
614 struct async_extent, list); 614 struct async_extent, list);
@@ -648,6 +648,8 @@ retry:
648 async_extent->ram_size - 1, 648 async_extent->ram_size - 1,
649 btrfs_get_extent, 649 btrfs_get_extent,
650 WB_SYNC_ALL); 650 WB_SYNC_ALL);
651 else if (ret)
652 unlock_page(async_cow->locked_page);
651 kfree(async_extent); 653 kfree(async_extent);
652 cond_resched(); 654 cond_resched();
653 continue; 655 continue;
@@ -672,6 +674,7 @@ retry:
672 674
673 if (ret) { 675 if (ret) {
674 int i; 676 int i;
677
675 for (i = 0; i < async_extent->nr_pages; i++) { 678 for (i = 0; i < async_extent->nr_pages; i++) {
676 WARN_ON(async_extent->pages[i]->mapping); 679 WARN_ON(async_extent->pages[i]->mapping);
677 page_cache_release(async_extent->pages[i]); 680 page_cache_release(async_extent->pages[i]);
@@ -679,12 +682,10 @@ retry:
679 kfree(async_extent->pages); 682 kfree(async_extent->pages);
680 async_extent->nr_pages = 0; 683 async_extent->nr_pages = 0;
681 async_extent->pages = NULL; 684 async_extent->pages = NULL;
682 unlock_extent(io_tree, async_extent->start, 685
683 async_extent->start +
684 async_extent->ram_size - 1);
685 if (ret == -ENOSPC) 686 if (ret == -ENOSPC)
686 goto retry; 687 goto retry;
687 goto out_free; /* JDM: Requeue? */ 688 goto out_free;
688 } 689 }
689 690
690 /* 691 /*
@@ -696,10 +697,13 @@ retry:
696 async_extent->ram_size - 1, 0); 697 async_extent->ram_size - 1, 0);
697 698
698 em = alloc_extent_map(); 699 em = alloc_extent_map();
699 BUG_ON(!em); /* -ENOMEM */ 700 if (!em)
701 goto out_free_reserve;
700 em->start = async_extent->start; 702 em->start = async_extent->start;
701 em->len = async_extent->ram_size; 703 em->len = async_extent->ram_size;
702 em->orig_start = em->start; 704 em->orig_start = em->start;
705 em->mod_start = em->start;
706 em->mod_len = em->len;
703 707
704 em->block_start = ins.objectid; 708 em->block_start = ins.objectid;
705 em->block_len = ins.offset; 709 em->block_len = ins.offset;
@@ -726,6 +730,9 @@ retry:
726 async_extent->ram_size - 1, 0); 730 async_extent->ram_size - 1, 0);
727 } 731 }
728 732
733 if (ret)
734 goto out_free_reserve;
735
729 ret = btrfs_add_ordered_extent_compress(inode, 736 ret = btrfs_add_ordered_extent_compress(inode,
730 async_extent->start, 737 async_extent->start,
731 ins.objectid, 738 ins.objectid,
@@ -733,7 +740,8 @@ retry:
733 ins.offset, 740 ins.offset,
734 BTRFS_ORDERED_COMPRESSED, 741 BTRFS_ORDERED_COMPRESSED,
735 async_extent->compress_type); 742 async_extent->compress_type);
736 BUG_ON(ret); /* -ENOMEM */ 743 if (ret)
744 goto out_free_reserve;
737 745
738 /* 746 /*
739 * clear dirty, set writeback and unlock the pages. 747 * clear dirty, set writeback and unlock the pages.
@@ -754,18 +762,30 @@ retry:
754 ins.objectid, 762 ins.objectid,
755 ins.offset, async_extent->pages, 763 ins.offset, async_extent->pages,
756 async_extent->nr_pages); 764 async_extent->nr_pages);
757
758 BUG_ON(ret); /* -ENOMEM */
759 alloc_hint = ins.objectid + ins.offset; 765 alloc_hint = ins.objectid + ins.offset;
760 kfree(async_extent); 766 kfree(async_extent);
767 if (ret)
768 goto out;
761 cond_resched(); 769 cond_resched();
762 } 770 }
763 ret = 0; 771 ret = 0;
764out: 772out:
765 return ret; 773 return ret;
774out_free_reserve:
775 btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
766out_free: 776out_free:
777 extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
778 async_extent->start,
779 async_extent->start +
780 async_extent->ram_size - 1,
781 NULL, EXTENT_CLEAR_UNLOCK_PAGE |
782 EXTENT_CLEAR_UNLOCK |
783 EXTENT_CLEAR_DELALLOC |
784 EXTENT_CLEAR_DIRTY |
785 EXTENT_SET_WRITEBACK |
786 EXTENT_END_WRITEBACK);
767 kfree(async_extent); 787 kfree(async_extent);
768 goto out; 788 goto again;
769} 789}
770 790
771static u64 get_extent_allocation_hint(struct inode *inode, u64 start, 791static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
@@ -892,6 +912,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
892 em->orig_start = em->start; 912 em->orig_start = em->start;
893 ram_size = ins.offset; 913 ram_size = ins.offset;
894 em->len = ins.offset; 914 em->len = ins.offset;
915 em->mod_start = em->start;
916 em->mod_len = em->len;
895 917
896 em->block_start = ins.objectid; 918 em->block_start = ins.objectid;
897 em->block_len = ins.offset; 919 em->block_len = ins.offset;
@@ -1338,6 +1360,8 @@ out_check:
1338 em->block_start = disk_bytenr; 1360 em->block_start = disk_bytenr;
1339 em->orig_block_len = disk_num_bytes; 1361 em->orig_block_len = disk_num_bytes;
1340 em->bdev = root->fs_info->fs_devices->latest_bdev; 1362 em->bdev = root->fs_info->fs_devices->latest_bdev;
1363 em->mod_start = em->start;
1364 em->mod_len = em->len;
1341 set_bit(EXTENT_FLAG_PINNED, &em->flags); 1365 set_bit(EXTENT_FLAG_PINNED, &em->flags);
1342 set_bit(EXTENT_FLAG_FILLING, &em->flags); 1366 set_bit(EXTENT_FLAG_FILLING, &em->flags);
1343 em->generation = -1; 1367 em->generation = -1;
@@ -1508,14 +1532,22 @@ static void btrfs_set_bit_hook(struct inode *inode,
1508 spin_unlock(&BTRFS_I(inode)->lock); 1532 spin_unlock(&BTRFS_I(inode)->lock);
1509 } 1533 }
1510 1534
1511 spin_lock(&root->fs_info->delalloc_lock); 1535 __percpu_counter_add(&root->fs_info->delalloc_bytes, len,
1536 root->fs_info->delalloc_batch);
1537 spin_lock(&BTRFS_I(inode)->lock);
1512 BTRFS_I(inode)->delalloc_bytes += len; 1538 BTRFS_I(inode)->delalloc_bytes += len;
1513 root->fs_info->delalloc_bytes += len; 1539 if (do_list && !test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1514 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1540 &BTRFS_I(inode)->runtime_flags)) {
1515 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1541 spin_lock(&root->fs_info->delalloc_lock);
1516 &root->fs_info->delalloc_inodes); 1542 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1543 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1544 &root->fs_info->delalloc_inodes);
1545 set_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1546 &BTRFS_I(inode)->runtime_flags);
1547 }
1548 spin_unlock(&root->fs_info->delalloc_lock);
1517 } 1549 }
1518 spin_unlock(&root->fs_info->delalloc_lock); 1550 spin_unlock(&BTRFS_I(inode)->lock);
1519 } 1551 }
1520} 1552}
1521 1553
@@ -1550,15 +1582,22 @@ static void btrfs_clear_bit_hook(struct inode *inode,
1550 && do_list) 1582 && do_list)
1551 btrfs_free_reserved_data_space(inode, len); 1583 btrfs_free_reserved_data_space(inode, len);
1552 1584
1553 spin_lock(&root->fs_info->delalloc_lock); 1585 __percpu_counter_add(&root->fs_info->delalloc_bytes, -len,
1554 root->fs_info->delalloc_bytes -= len; 1586 root->fs_info->delalloc_batch);
1587 spin_lock(&BTRFS_I(inode)->lock);
1555 BTRFS_I(inode)->delalloc_bytes -= len; 1588 BTRFS_I(inode)->delalloc_bytes -= len;
1556
1557 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 && 1589 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1558 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1590 test_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1559 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1591 &BTRFS_I(inode)->runtime_flags)) {
1592 spin_lock(&root->fs_info->delalloc_lock);
1593 if (!list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1594 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1595 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
1596 &BTRFS_I(inode)->runtime_flags);
1597 }
1598 spin_unlock(&root->fs_info->delalloc_lock);
1560 } 1599 }
1561 spin_unlock(&root->fs_info->delalloc_lock); 1600 spin_unlock(&BTRFS_I(inode)->lock);
1562 } 1601 }
1563} 1602}
1564 1603
@@ -2001,11 +2040,23 @@ out:
2001 if (trans) 2040 if (trans)
2002 btrfs_end_transaction(trans, root); 2041 btrfs_end_transaction(trans, root);
2003 2042
2004 if (ret) 2043 if (ret) {
2005 clear_extent_uptodate(io_tree, ordered_extent->file_offset, 2044 clear_extent_uptodate(io_tree, ordered_extent->file_offset,
2006 ordered_extent->file_offset + 2045 ordered_extent->file_offset +
2007 ordered_extent->len - 1, NULL, GFP_NOFS); 2046 ordered_extent->len - 1, NULL, GFP_NOFS);
2008 2047
2048 /*
2049 * If the ordered extent had an IOERR or something else went
2050 * wrong we need to return the space for this ordered extent
2051 * back to the allocator.
2052 */
2053 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
2054 !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
2055 btrfs_free_reserved_extent(root, ordered_extent->start,
2056 ordered_extent->disk_len);
2057 }
2058
2059
2009 /* 2060 /*
2010 * This needs to be done to make sure anybody waiting knows we are done 2061 * This needs to be done to make sure anybody waiting knows we are done
2011 * updating everything for this ordered extent. 2062 * updating everything for this ordered extent.
@@ -2062,7 +2113,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
2062static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, 2113static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
2063 struct extent_state *state, int mirror) 2114 struct extent_state *state, int mirror)
2064{ 2115{
2065 size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); 2116 size_t offset = start - page_offset(page);
2066 struct inode *inode = page->mapping->host; 2117 struct inode *inode = page->mapping->host;
2067 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2118 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2068 char *kaddr; 2119 char *kaddr;
@@ -2167,11 +2218,6 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root)
2167 } 2218 }
2168} 2219}
2169 2220
2170enum btrfs_orphan_cleanup_state {
2171 ORPHAN_CLEANUP_STARTED = 1,
2172 ORPHAN_CLEANUP_DONE = 2,
2173};
2174
2175/* 2221/*
2176 * This is called in transaction commit time. If there are no orphan 2222 * This is called in transaction commit time. If there are no orphan
2177 * files in the subvolume, it removes orphan item and frees block_rsv 2223 * files in the subvolume, it removes orphan item and frees block_rsv
@@ -2469,6 +2515,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2469 */ 2515 */
2470 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM, 2516 set_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
2471 &BTRFS_I(inode)->runtime_flags); 2517 &BTRFS_I(inode)->runtime_flags);
2518 atomic_inc(&root->orphan_inodes);
2472 2519
2473 /* if we have links, this was a truncate, lets do that */ 2520 /* if we have links, this was a truncate, lets do that */
2474 if (inode->i_nlink) { 2521 if (inode->i_nlink) {
@@ -2491,6 +2538,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2491 goto out; 2538 goto out;
2492 2539
2493 ret = btrfs_truncate(inode); 2540 ret = btrfs_truncate(inode);
2541 if (ret)
2542 btrfs_orphan_del(NULL, inode);
2494 } else { 2543 } else {
2495 nr_unlink++; 2544 nr_unlink++;
2496 } 2545 }
@@ -2709,34 +2758,41 @@ static void fill_inode_item(struct btrfs_trans_handle *trans,
2709 struct btrfs_inode_item *item, 2758 struct btrfs_inode_item *item,
2710 struct inode *inode) 2759 struct inode *inode)
2711{ 2760{
2712 btrfs_set_inode_uid(leaf, item, i_uid_read(inode)); 2761 struct btrfs_map_token token;
2713 btrfs_set_inode_gid(leaf, item, i_gid_read(inode)); 2762
2714 btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); 2763 btrfs_init_map_token(&token);
2715 btrfs_set_inode_mode(leaf, item, inode->i_mode);
2716 btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
2717 2764
2718 btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item), 2765 btrfs_set_token_inode_uid(leaf, item, i_uid_read(inode), &token);
2719 inode->i_atime.tv_sec); 2766 btrfs_set_token_inode_gid(leaf, item, i_gid_read(inode), &token);
2720 btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item), 2767 btrfs_set_token_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size,
2721 inode->i_atime.tv_nsec); 2768 &token);
2769 btrfs_set_token_inode_mode(leaf, item, inode->i_mode, &token);
2770 btrfs_set_token_inode_nlink(leaf, item, inode->i_nlink, &token);
2722 2771
2723 btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item), 2772 btrfs_set_token_timespec_sec(leaf, btrfs_inode_atime(item),
2724 inode->i_mtime.tv_sec); 2773 inode->i_atime.tv_sec, &token);
2725 btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item), 2774 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_atime(item),
2726 inode->i_mtime.tv_nsec); 2775 inode->i_atime.tv_nsec, &token);
2727 2776
2728 btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item), 2777 btrfs_set_token_timespec_sec(leaf, btrfs_inode_mtime(item),
2729 inode->i_ctime.tv_sec); 2778 inode->i_mtime.tv_sec, &token);
2730 btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item), 2779 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_mtime(item),
2731 inode->i_ctime.tv_nsec); 2780 inode->i_mtime.tv_nsec, &token);
2732 2781
2733 btrfs_set_inode_nbytes(leaf, item, inode_get_bytes(inode)); 2782 btrfs_set_token_timespec_sec(leaf, btrfs_inode_ctime(item),
2734 btrfs_set_inode_generation(leaf, item, BTRFS_I(inode)->generation); 2783 inode->i_ctime.tv_sec, &token);
2735 btrfs_set_inode_sequence(leaf, item, inode->i_version); 2784 btrfs_set_token_timespec_nsec(leaf, btrfs_inode_ctime(item),
2736 btrfs_set_inode_transid(leaf, item, trans->transid); 2785 inode->i_ctime.tv_nsec, &token);
2737 btrfs_set_inode_rdev(leaf, item, inode->i_rdev); 2786
2738 btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); 2787 btrfs_set_token_inode_nbytes(leaf, item, inode_get_bytes(inode),
2739 btrfs_set_inode_block_group(leaf, item, 0); 2788 &token);
2789 btrfs_set_token_inode_generation(leaf, item, BTRFS_I(inode)->generation,
2790 &token);
2791 btrfs_set_token_inode_sequence(leaf, item, inode->i_version, &token);
2792 btrfs_set_token_inode_transid(leaf, item, trans->transid, &token);
2793 btrfs_set_token_inode_rdev(leaf, item, inode->i_rdev, &token);
2794 btrfs_set_token_inode_flags(leaf, item, BTRFS_I(inode)->flags, &token);
2795 btrfs_set_token_inode_block_group(leaf, item, 0, &token);
2740} 2796}
2741 2797
2742/* 2798/*
@@ -3832,6 +3888,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
3832 3888
3833 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 3889 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3834 truncate_setsize(inode, newsize); 3890 truncate_setsize(inode, newsize);
3891
3892 /* Disable nonlocked read DIO to avoid the end less truncate */
3893 btrfs_inode_block_unlocked_dio(inode);
3894 inode_dio_wait(inode);
3895 btrfs_inode_resume_unlocked_dio(inode);
3896
3835 ret = btrfs_truncate(inode); 3897 ret = btrfs_truncate(inode);
3836 if (ret && inode->i_nlink) 3898 if (ret && inode->i_nlink)
3837 btrfs_orphan_del(NULL, inode); 3899 btrfs_orphan_del(NULL, inode);
@@ -3904,6 +3966,12 @@ void btrfs_evict_inode(struct inode *inode)
3904 goto no_delete; 3966 goto no_delete;
3905 } 3967 }
3906 3968
3969 ret = btrfs_commit_inode_delayed_inode(inode);
3970 if (ret) {
3971 btrfs_orphan_del(NULL, inode);
3972 goto no_delete;
3973 }
3974
3907 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP); 3975 rsv = btrfs_alloc_block_rsv(root, BTRFS_BLOCK_RSV_TEMP);
3908 if (!rsv) { 3976 if (!rsv) {
3909 btrfs_orphan_del(NULL, inode); 3977 btrfs_orphan_del(NULL, inode);
@@ -3941,7 +4009,7 @@ void btrfs_evict_inode(struct inode *inode)
3941 goto no_delete; 4009 goto no_delete;
3942 } 4010 }
3943 4011
3944 trans = btrfs_start_transaction_lflush(root, 1); 4012 trans = btrfs_join_transaction(root);
3945 if (IS_ERR(trans)) { 4013 if (IS_ERR(trans)) {
3946 btrfs_orphan_del(NULL, inode); 4014 btrfs_orphan_del(NULL, inode);
3947 btrfs_free_block_rsv(root, rsv); 4015 btrfs_free_block_rsv(root, rsv);
@@ -3955,9 +4023,6 @@ void btrfs_evict_inode(struct inode *inode)
3955 break; 4023 break;
3956 4024
3957 trans->block_rsv = &root->fs_info->trans_block_rsv; 4025 trans->block_rsv = &root->fs_info->trans_block_rsv;
3958 ret = btrfs_update_inode(trans, root, inode);
3959 BUG_ON(ret);
3960
3961 btrfs_end_transaction(trans, root); 4026 btrfs_end_transaction(trans, root);
3962 trans = NULL; 4027 trans = NULL;
3963 btrfs_btree_balance_dirty(root); 4028 btrfs_btree_balance_dirty(root);
@@ -5006,12 +5071,6 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
5006 goto out_unlock; 5071 goto out_unlock;
5007 } 5072 }
5008 5073
5009 err = btrfs_update_inode(trans, root, inode);
5010 if (err) {
5011 drop_inode = 1;
5012 goto out_unlock;
5013 }
5014
5015 /* 5074 /*
5016 * If the active LSM wants to access the inode during 5075 * If the active LSM wants to access the inode during
5017 * d_instantiate it needs these. Smack checks to see 5076 * d_instantiate it needs these. Smack checks to see
@@ -5949,6 +6008,8 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
5949 6008
5950 em->start = start; 6009 em->start = start;
5951 em->orig_start = orig_start; 6010 em->orig_start = orig_start;
6011 em->mod_start = start;
6012 em->mod_len = len;
5952 em->len = len; 6013 em->len = len;
5953 em->block_len = block_len; 6014 em->block_len = block_len;
5954 em->block_start = block_start; 6015 em->block_start = block_start;
@@ -5990,16 +6051,15 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5990 u64 len = bh_result->b_size; 6051 u64 len = bh_result->b_size;
5991 struct btrfs_trans_handle *trans; 6052 struct btrfs_trans_handle *trans;
5992 int unlock_bits = EXTENT_LOCKED; 6053 int unlock_bits = EXTENT_LOCKED;
5993 int ret; 6054 int ret = 0;
5994 6055
5995 if (create) { 6056 if (create) {
5996 ret = btrfs_delalloc_reserve_space(inode, len); 6057 spin_lock(&BTRFS_I(inode)->lock);
5997 if (ret) 6058 BTRFS_I(inode)->outstanding_extents++;
5998 return ret; 6059 spin_unlock(&BTRFS_I(inode)->lock);
5999 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; 6060 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
6000 } else { 6061 } else
6001 len = min_t(u64, len, root->sectorsize); 6062 len = min_t(u64, len, root->sectorsize);
6002 }
6003 6063
6004 lockstart = start; 6064 lockstart = start;
6005 lockend = start + len - 1; 6065 lockend = start + len - 1;
@@ -6011,14 +6071,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6011 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) 6071 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
6012 return -ENOTBLK; 6072 return -ENOTBLK;
6013 6073
6014 if (create) {
6015 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6016 lockend, EXTENT_DELALLOC, NULL,
6017 &cached_state, GFP_NOFS);
6018 if (ret)
6019 goto unlock_err;
6020 }
6021
6022 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 6074 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
6023 if (IS_ERR(em)) { 6075 if (IS_ERR(em)) {
6024 ret = PTR_ERR(em); 6076 ret = PTR_ERR(em);
@@ -6050,7 +6102,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
6050 if (!create && (em->block_start == EXTENT_MAP_HOLE || 6102 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
6051 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 6103 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
6052 free_extent_map(em); 6104 free_extent_map(em);
6053 ret = 0;
6054 goto unlock_err; 6105 goto unlock_err;
6055 } 6106 }
6056 6107
@@ -6148,6 +6199,11 @@ unlock:
6148 */ 6199 */
6149 if (start + len > i_size_read(inode)) 6200 if (start + len > i_size_read(inode))
6150 i_size_write(inode, start + len); 6201 i_size_write(inode, start + len);
6202
6203 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6204 lockstart + len - 1, EXTENT_DELALLOC, NULL,
6205 &cached_state, GFP_NOFS);
6206 BUG_ON(ret);
6151 } 6207 }
6152 6208
6153 /* 6209 /*
@@ -6156,24 +6212,9 @@ unlock:
6156 * aren't using if there is any left over space. 6212 * aren't using if there is any left over space.
6157 */ 6213 */
6158 if (lockstart < lockend) { 6214 if (lockstart < lockend) {
6159 if (create && len < lockend - lockstart) { 6215 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6160 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, 6216 lockend, unlock_bits, 1, 0,
6161 lockstart + len - 1, 6217 &cached_state, GFP_NOFS);
6162 unlock_bits | EXTENT_DEFRAG, 1, 0,
6163 &cached_state, GFP_NOFS);
6164 /*
6165 * Beside unlock, we also need to cleanup reserved space
6166 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6167 */
6168 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6169 lockstart + len, lockend,
6170 unlock_bits | EXTENT_DO_ACCOUNTING |
6171 EXTENT_DEFRAG, 1, 0, NULL, GFP_NOFS);
6172 } else {
6173 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6174 lockend, unlock_bits, 1, 0,
6175 &cached_state, GFP_NOFS);
6176 }
6177 } else { 6218 } else {
6178 free_extent_state(cached_state); 6219 free_extent_state(cached_state);
6179 } 6220 }
@@ -6183,9 +6224,6 @@ unlock:
6183 return 0; 6224 return 0;
6184 6225
6185unlock_err: 6226unlock_err:
6186 if (create)
6187 unlock_bits |= EXTENT_DO_ACCOUNTING;
6188
6189 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, 6227 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6190 unlock_bits, 1, 0, &cached_state, GFP_NOFS); 6228 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6191 return ret; 6229 return ret;
@@ -6623,15 +6661,63 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6623{ 6661{
6624 struct file *file = iocb->ki_filp; 6662 struct file *file = iocb->ki_filp;
6625 struct inode *inode = file->f_mapping->host; 6663 struct inode *inode = file->f_mapping->host;
6664 size_t count = 0;
6665 int flags = 0;
6666 bool wakeup = true;
6667 bool relock = false;
6668 ssize_t ret;
6626 6669
6627 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6670 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6628 offset, nr_segs)) 6671 offset, nr_segs))
6629 return 0; 6672 return 0;
6630 6673
6631 return __blockdev_direct_IO(rw, iocb, inode, 6674 atomic_inc(&inode->i_dio_count);
6632 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6675 smp_mb__after_atomic_inc();
6633 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6676
6634 btrfs_submit_direct, 0); 6677 if (rw & WRITE) {
6678 count = iov_length(iov, nr_segs);
6679 /*
6680 * If the write DIO is beyond the EOF, we need update
6681 * the isize, but it is protected by i_mutex. So we can
6682 * not unlock the i_mutex at this case.
6683 */
6684 if (offset + count <= inode->i_size) {
6685 mutex_unlock(&inode->i_mutex);
6686 relock = true;
6687 }
6688 ret = btrfs_delalloc_reserve_space(inode, count);
6689 if (ret)
6690 goto out;
6691 } else if (unlikely(test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
6692 &BTRFS_I(inode)->runtime_flags))) {
6693 inode_dio_done(inode);
6694 flags = DIO_LOCKING | DIO_SKIP_HOLES;
6695 wakeup = false;
6696 }
6697
6698 ret = __blockdev_direct_IO(rw, iocb, inode,
6699 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6700 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6701 btrfs_submit_direct, flags);
6702 if (rw & WRITE) {
6703 if (ret < 0 && ret != -EIOCBQUEUED)
6704 btrfs_delalloc_release_space(inode, count);
6705 else if (ret > 0 && (size_t)ret < count) {
6706 spin_lock(&BTRFS_I(inode)->lock);
6707 BTRFS_I(inode)->outstanding_extents++;
6708 spin_unlock(&BTRFS_I(inode)->lock);
6709 btrfs_delalloc_release_space(inode,
6710 count - (size_t)ret);
6711 }
6712 btrfs_delalloc_release_metadata(inode, 0);
6713 }
6714out:
6715 if (wakeup)
6716 inode_dio_done(inode);
6717 if (relock)
6718 mutex_lock(&inode->i_mutex);
6719
6720 return ret;
6635} 6721}
6636 6722
6637#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) 6723#define BTRFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC)
@@ -6735,8 +6821,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
6735 return; 6821 return;
6736 } 6822 }
6737 lock_extent_bits(tree, page_start, page_end, 0, &cached_state); 6823 lock_extent_bits(tree, page_start, page_end, 0, &cached_state);
6738 ordered = btrfs_lookup_ordered_extent(inode, 6824 ordered = btrfs_lookup_ordered_extent(inode, page_offset(page));
6739 page_offset(page));
6740 if (ordered) { 6825 if (ordered) {
6741 /* 6826 /*
6742 * IO on this page will never be started, so we need 6827 * IO on this page will never be started, so we need
@@ -7216,8 +7301,9 @@ int btrfs_drop_inode(struct inode *inode)
7216{ 7301{
7217 struct btrfs_root *root = BTRFS_I(inode)->root; 7302 struct btrfs_root *root = BTRFS_I(inode)->root;
7218 7303
7304 /* the snap/subvol tree is on deleting */
7219 if (btrfs_root_refs(&root->root_item) == 0 && 7305 if (btrfs_root_refs(&root->root_item) == 0 &&
7220 !btrfs_is_free_space_inode(inode)) 7306 root != root->fs_info->tree_root)
7221 return 1; 7307 return 1;
7222 else 7308 else
7223 return generic_drop_inode(inode); 7309 return generic_drop_inode(inode);
@@ -7299,14 +7385,19 @@ fail:
7299static int btrfs_getattr(struct vfsmount *mnt, 7385static int btrfs_getattr(struct vfsmount *mnt,
7300 struct dentry *dentry, struct kstat *stat) 7386 struct dentry *dentry, struct kstat *stat)
7301{ 7387{
7388 u64 delalloc_bytes;
7302 struct inode *inode = dentry->d_inode; 7389 struct inode *inode = dentry->d_inode;
7303 u32 blocksize = inode->i_sb->s_blocksize; 7390 u32 blocksize = inode->i_sb->s_blocksize;
7304 7391
7305 generic_fillattr(inode, stat); 7392 generic_fillattr(inode, stat);
7306 stat->dev = BTRFS_I(inode)->root->anon_dev; 7393 stat->dev = BTRFS_I(inode)->root->anon_dev;
7307 stat->blksize = PAGE_CACHE_SIZE; 7394 stat->blksize = PAGE_CACHE_SIZE;
7395
7396 spin_lock(&BTRFS_I(inode)->lock);
7397 delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
7398 spin_unlock(&BTRFS_I(inode)->lock);
7308 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) + 7399 stat->blocks = (ALIGN(inode_get_bytes(inode), blocksize) +
7309 ALIGN(BTRFS_I(inode)->delalloc_bytes, blocksize)) >> 9; 7400 ALIGN(delalloc_bytes, blocksize)) >> 9;
7310 return 0; 7401 return 0;
7311} 7402}
7312 7403
@@ -7583,7 +7674,7 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7583 7674
7584 INIT_LIST_HEAD(&works); 7675 INIT_LIST_HEAD(&works);
7585 INIT_LIST_HEAD(&splice); 7676 INIT_LIST_HEAD(&splice);
7586again: 7677
7587 spin_lock(&root->fs_info->delalloc_lock); 7678 spin_lock(&root->fs_info->delalloc_lock);
7588 list_splice_init(&root->fs_info->delalloc_inodes, &splice); 7679 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
7589 while (!list_empty(&splice)) { 7680 while (!list_empty(&splice)) {
@@ -7593,8 +7684,11 @@ again:
7593 list_del_init(&binode->delalloc_inodes); 7684 list_del_init(&binode->delalloc_inodes);
7594 7685
7595 inode = igrab(&binode->vfs_inode); 7686 inode = igrab(&binode->vfs_inode);
7596 if (!inode) 7687 if (!inode) {
7688 clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
7689 &binode->runtime_flags);
7597 continue; 7690 continue;
7691 }
7598 7692
7599 list_add_tail(&binode->delalloc_inodes, 7693 list_add_tail(&binode->delalloc_inodes,
7600 &root->fs_info->delalloc_inodes); 7694 &root->fs_info->delalloc_inodes);
@@ -7619,13 +7713,6 @@ again:
7619 btrfs_wait_and_free_delalloc_work(work); 7713 btrfs_wait_and_free_delalloc_work(work);
7620 } 7714 }
7621 7715
7622 spin_lock(&root->fs_info->delalloc_lock);
7623 if (!list_empty(&root->fs_info->delalloc_inodes)) {
7624 spin_unlock(&root->fs_info->delalloc_lock);
7625 goto again;
7626 }
7627 spin_unlock(&root->fs_info->delalloc_lock);
7628
7629 /* the filemap_flush will queue IO into the worker threads, but 7716 /* the filemap_flush will queue IO into the worker threads, but
7630 * we have to make sure the IO is actually started and that 7717 * we have to make sure the IO is actually started and that
7631 * ordered extents get created before we return 7718 * ordered extents get created before we return
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 338f2597bf7f..059546aa8fdf 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -42,12 +42,12 @@
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/blkdev.h> 43#include <linux/blkdev.h>
44#include <linux/uuid.h> 44#include <linux/uuid.h>
45#include <linux/btrfs.h>
45#include "compat.h" 46#include "compat.h"
46#include "ctree.h" 47#include "ctree.h"
47#include "disk-io.h" 48#include "disk-io.h"
48#include "transaction.h" 49#include "transaction.h"
49#include "btrfs_inode.h" 50#include "btrfs_inode.h"
50#include "ioctl.h"
51#include "print-tree.h" 51#include "print-tree.h"
52#include "volumes.h" 52#include "volumes.h"
53#include "locking.h" 53#include "locking.h"
@@ -367,7 +367,7 @@ static noinline int create_subvol(struct btrfs_root *root,
367 struct dentry *dentry, 367 struct dentry *dentry,
368 char *name, int namelen, 368 char *name, int namelen,
369 u64 *async_transid, 369 u64 *async_transid,
370 struct btrfs_qgroup_inherit **inherit) 370 struct btrfs_qgroup_inherit *inherit)
371{ 371{
372 struct btrfs_trans_handle *trans; 372 struct btrfs_trans_handle *trans;
373 struct btrfs_key key; 373 struct btrfs_key key;
@@ -401,8 +401,7 @@ static noinline int create_subvol(struct btrfs_root *root,
401 if (IS_ERR(trans)) 401 if (IS_ERR(trans))
402 return PTR_ERR(trans); 402 return PTR_ERR(trans);
403 403
404 ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, 404 ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, inherit);
405 inherit ? *inherit : NULL);
406 if (ret) 405 if (ret)
407 goto fail; 406 goto fail;
408 407
@@ -533,7 +532,7 @@ fail:
533 532
534static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, 533static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
535 char *name, int namelen, u64 *async_transid, 534 char *name, int namelen, u64 *async_transid,
536 bool readonly, struct btrfs_qgroup_inherit **inherit) 535 bool readonly, struct btrfs_qgroup_inherit *inherit)
537{ 536{
538 struct inode *inode; 537 struct inode *inode;
539 struct btrfs_pending_snapshot *pending_snapshot; 538 struct btrfs_pending_snapshot *pending_snapshot;
@@ -552,10 +551,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
552 pending_snapshot->dentry = dentry; 551 pending_snapshot->dentry = dentry;
553 pending_snapshot->root = root; 552 pending_snapshot->root = root;
554 pending_snapshot->readonly = readonly; 553 pending_snapshot->readonly = readonly;
555 if (inherit) { 554 pending_snapshot->inherit = inherit;
556 pending_snapshot->inherit = *inherit;
557 *inherit = NULL; /* take responsibility to free it */
558 }
559 555
560 trans = btrfs_start_transaction(root->fs_info->extent_root, 6); 556 trans = btrfs_start_transaction(root->fs_info->extent_root, 6);
561 if (IS_ERR(trans)) { 557 if (IS_ERR(trans)) {
@@ -695,7 +691,7 @@ static noinline int btrfs_mksubvol(struct path *parent,
695 char *name, int namelen, 691 char *name, int namelen,
696 struct btrfs_root *snap_src, 692 struct btrfs_root *snap_src,
697 u64 *async_transid, bool readonly, 693 u64 *async_transid, bool readonly,
698 struct btrfs_qgroup_inherit **inherit) 694 struct btrfs_qgroup_inherit *inherit)
699{ 695{
700 struct inode *dir = parent->dentry->d_inode; 696 struct inode *dir = parent->dentry->d_inode;
701 struct dentry *dentry; 697 struct dentry *dentry;
@@ -818,7 +814,7 @@ static int find_new_extents(struct btrfs_root *root,
818 814
819 while(1) { 815 while(1) {
820 ret = btrfs_search_forward(root, &min_key, &max_key, 816 ret = btrfs_search_forward(root, &min_key, &max_key,
821 path, 0, newer_than); 817 path, newer_than);
822 if (ret != 0) 818 if (ret != 0)
823 goto none; 819 goto none;
824 if (min_key.objectid != ino) 820 if (min_key.objectid != ino)
@@ -1206,6 +1202,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
1206 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1202 if (!(inode->i_sb->s_flags & MS_ACTIVE))
1207 break; 1203 break;
1208 1204
1205 if (btrfs_defrag_cancelled(root->fs_info)) {
1206 printk(KERN_DEBUG "btrfs: defrag_file cancelled\n");
1207 ret = -EAGAIN;
1208 break;
1209 }
1210
1209 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1211 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
1210 extent_thresh, &last_len, &skip, 1212 extent_thresh, &last_len, &skip,
1211 &defrag_end, range->flags & 1213 &defrag_end, range->flags &
@@ -1329,9 +1331,6 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1329 int ret = 0; 1331 int ret = 0;
1330 int mod = 0; 1332 int mod = 0;
1331 1333
1332 if (root->fs_info->sb->s_flags & MS_RDONLY)
1333 return -EROFS;
1334
1335 if (!capable(CAP_SYS_ADMIN)) 1334 if (!capable(CAP_SYS_ADMIN))
1336 return -EPERM; 1335 return -EPERM;
1337 1336
@@ -1363,6 +1362,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1363 *devstr = '\0'; 1362 *devstr = '\0';
1364 devstr = vol_args->name; 1363 devstr = vol_args->name;
1365 devid = simple_strtoull(devstr, &end, 10); 1364 devid = simple_strtoull(devstr, &end, 10);
1365 if (!devid) {
1366 ret = -EINVAL;
1367 goto out_free;
1368 }
1366 printk(KERN_INFO "btrfs: resizing devid %llu\n", 1369 printk(KERN_INFO "btrfs: resizing devid %llu\n",
1367 (unsigned long long)devid); 1370 (unsigned long long)devid);
1368 } 1371 }
@@ -1371,7 +1374,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1371 if (!device) { 1374 if (!device) {
1372 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1375 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
1373 (unsigned long long)devid); 1376 (unsigned long long)devid);
1374 ret = -EINVAL; 1377 ret = -ENODEV;
1375 goto out_free; 1378 goto out_free;
1376 } 1379 }
1377 1380
@@ -1379,7 +1382,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1379 printk(KERN_INFO "btrfs: resizer unable to apply on " 1382 printk(KERN_INFO "btrfs: resizer unable to apply on "
1380 "readonly device %llu\n", 1383 "readonly device %llu\n",
1381 (unsigned long long)devid); 1384 (unsigned long long)devid);
1382 ret = -EINVAL; 1385 ret = -EPERM;
1383 goto out_free; 1386 goto out_free;
1384 } 1387 }
1385 1388
@@ -1401,7 +1404,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1401 } 1404 }
1402 1405
1403 if (device->is_tgtdev_for_dev_replace) { 1406 if (device->is_tgtdev_for_dev_replace) {
1404 ret = -EINVAL; 1407 ret = -EPERM;
1405 goto out_free; 1408 goto out_free;
1406 } 1409 }
1407 1410
@@ -1457,7 +1460,7 @@ out:
1457static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1460static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
1458 char *name, unsigned long fd, int subvol, 1461 char *name, unsigned long fd, int subvol,
1459 u64 *transid, bool readonly, 1462 u64 *transid, bool readonly,
1460 struct btrfs_qgroup_inherit **inherit) 1463 struct btrfs_qgroup_inherit *inherit)
1461{ 1464{
1462 int namelen; 1465 int namelen;
1463 int ret = 0; 1466 int ret = 0;
@@ -1566,7 +1569,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
1566 1569
1567 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1570 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name,
1568 vol_args->fd, subvol, ptr, 1571 vol_args->fd, subvol, ptr,
1569 readonly, &inherit); 1572 readonly, inherit);
1570 1573
1571 if (ret == 0 && ptr && 1574 if (ret == 0 && ptr &&
1572 copy_to_user(arg + 1575 copy_to_user(arg +
@@ -1863,7 +1866,7 @@ static noinline int search_ioctl(struct inode *inode,
1863 path->keep_locks = 1; 1866 path->keep_locks = 1;
1864 1867
1865 while(1) { 1868 while(1) {
1866 ret = btrfs_search_forward(root, &key, &max_key, path, 0, 1869 ret = btrfs_search_forward(root, &key, &max_key, path,
1867 sk->min_transid); 1870 sk->min_transid);
1868 if (ret != 0) { 1871 if (ret != 0) {
1869 if (ret > 0) 1872 if (ret > 0)
@@ -2171,6 +2174,12 @@ out_unlock:
2171 shrink_dcache_sb(root->fs_info->sb); 2174 shrink_dcache_sb(root->fs_info->sb);
2172 btrfs_invalidate_inodes(dest); 2175 btrfs_invalidate_inodes(dest);
2173 d_delete(dentry); 2176 d_delete(dentry);
2177
2178 /* the last ref */
2179 if (dest->cache_inode) {
2180 iput(dest->cache_inode);
2181 dest->cache_inode = NULL;
2182 }
2174 } 2183 }
2175out_dput: 2184out_dput:
2176 dput(dentry); 2185 dput(dentry);
@@ -2211,10 +2220,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2211 ret = -EPERM; 2220 ret = -EPERM;
2212 goto out; 2221 goto out;
2213 } 2222 }
2214 ret = btrfs_defrag_root(root, 0); 2223 ret = btrfs_defrag_root(root);
2215 if (ret) 2224 if (ret)
2216 goto out; 2225 goto out;
2217 ret = btrfs_defrag_root(root->fs_info->extent_root, 0); 2226 ret = btrfs_defrag_root(root->fs_info->extent_root);
2218 break; 2227 break;
2219 case S_IFREG: 2228 case S_IFREG:
2220 if (!(file->f_mode & FMODE_WRITE)) { 2229 if (!(file->f_mode & FMODE_WRITE)) {
@@ -3111,7 +3120,7 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
3111 u64 transid; 3120 u64 transid;
3112 int ret; 3121 int ret;
3113 3122
3114 trans = btrfs_attach_transaction(root); 3123 trans = btrfs_attach_transaction_barrier(root);
3115 if (IS_ERR(trans)) { 3124 if (IS_ERR(trans)) {
3116 if (PTR_ERR(trans) != -ENOENT) 3125 if (PTR_ERR(trans) != -ENOENT)
3117 return PTR_ERR(trans); 3126 return PTR_ERR(trans);
@@ -3289,7 +3298,7 @@ static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg)
3289 struct inode_fs_paths *ipath = NULL; 3298 struct inode_fs_paths *ipath = NULL;
3290 struct btrfs_path *path; 3299 struct btrfs_path *path;
3291 3300
3292 if (!capable(CAP_SYS_ADMIN)) 3301 if (!capable(CAP_DAC_READ_SEARCH))
3293 return -EPERM; 3302 return -EPERM;
3294 3303
3295 path = btrfs_alloc_path(); 3304 path = btrfs_alloc_path();
@@ -3914,6 +3923,65 @@ out:
3914 return ret; 3923 return ret;
3915} 3924}
3916 3925
3926static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
3927{
3928 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3929 const char *label = root->fs_info->super_copy->label;
3930 size_t len = strnlen(label, BTRFS_LABEL_SIZE);
3931 int ret;
3932
3933 if (len == BTRFS_LABEL_SIZE) {
3934 pr_warn("btrfs: label is too long, return the first %zu bytes\n",
3935 --len);
3936 }
3937
3938 mutex_lock(&root->fs_info->volume_mutex);
3939 ret = copy_to_user(arg, label, len);
3940 mutex_unlock(&root->fs_info->volume_mutex);
3941
3942 return ret ? -EFAULT : 0;
3943}
3944
3945static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
3946{
3947 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
3948 struct btrfs_super_block *super_block = root->fs_info->super_copy;
3949 struct btrfs_trans_handle *trans;
3950 char label[BTRFS_LABEL_SIZE];
3951 int ret;
3952
3953 if (!capable(CAP_SYS_ADMIN))
3954 return -EPERM;
3955
3956 if (copy_from_user(label, arg, sizeof(label)))
3957 return -EFAULT;
3958
3959 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) {
3960 pr_err("btrfs: unable to set label with more than %d bytes\n",
3961 BTRFS_LABEL_SIZE - 1);
3962 return -EINVAL;
3963 }
3964
3965 ret = mnt_want_write_file(file);
3966 if (ret)
3967 return ret;
3968
3969 mutex_lock(&root->fs_info->volume_mutex);
3970 trans = btrfs_start_transaction(root, 0);
3971 if (IS_ERR(trans)) {
3972 ret = PTR_ERR(trans);
3973 goto out_unlock;
3974 }
3975
3976 strcpy(super_block->label, label);
3977 ret = btrfs_end_transaction(trans, root);
3978
3979out_unlock:
3980 mutex_unlock(&root->fs_info->volume_mutex);
3981 mnt_drop_write_file(file);
3982 return ret;
3983}
3984
3917long btrfs_ioctl(struct file *file, unsigned int 3985long btrfs_ioctl(struct file *file, unsigned int
3918 cmd, unsigned long arg) 3986 cmd, unsigned long arg)
3919{ 3987{
@@ -4014,6 +4082,10 @@ long btrfs_ioctl(struct file *file, unsigned int
4014 return btrfs_ioctl_qgroup_limit(file, argp); 4082 return btrfs_ioctl_qgroup_limit(file, argp);
4015 case BTRFS_IOC_DEV_REPLACE: 4083 case BTRFS_IOC_DEV_REPLACE:
4016 return btrfs_ioctl_dev_replace(root, argp); 4084 return btrfs_ioctl_dev_replace(root, argp);
4085 case BTRFS_IOC_GET_FSLABEL:
4086 return btrfs_ioctl_get_fslabel(file, argp);
4087 case BTRFS_IOC_SET_FSLABEL:
4088 return btrfs_ioctl_set_fslabel(file, argp);
4017 } 4089 }
4018 4090
4019 return -ENOTTY; 4091 return -ENOTTY;
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 2a1762c66041..e95df435d897 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -113,11 +113,10 @@ again:
113 read_unlock(&eb->lock); 113 read_unlock(&eb->lock);
114 return; 114 return;
115 } 115 }
116 read_unlock(&eb->lock);
117 wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0);
118 read_lock(&eb->lock);
119 if (atomic_read(&eb->blocking_writers)) { 116 if (atomic_read(&eb->blocking_writers)) {
120 read_unlock(&eb->lock); 117 read_unlock(&eb->lock);
118 wait_event(eb->write_lock_wq,
119 atomic_read(&eb->blocking_writers) == 0);
121 goto again; 120 goto again;
122 } 121 }
123 atomic_inc(&eb->read_locks); 122 atomic_inc(&eb->read_locks);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e5ed56729607..dc08d77b717e 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -196,6 +196,9 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
196 entry->file_offset = file_offset; 196 entry->file_offset = file_offset;
197 entry->start = start; 197 entry->start = start;
198 entry->len = len; 198 entry->len = len;
199 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) &&
200 !(type == BTRFS_ORDERED_NOCOW))
201 entry->csum_bytes_left = disk_len;
199 entry->disk_len = disk_len; 202 entry->disk_len = disk_len;
200 entry->bytes_left = len; 203 entry->bytes_left = len;
201 entry->inode = igrab(inode); 204 entry->inode = igrab(inode);
@@ -213,6 +216,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
213 INIT_LIST_HEAD(&entry->root_extent_list); 216 INIT_LIST_HEAD(&entry->root_extent_list);
214 INIT_LIST_HEAD(&entry->work_list); 217 INIT_LIST_HEAD(&entry->work_list);
215 init_completion(&entry->completion); 218 init_completion(&entry->completion);
219 INIT_LIST_HEAD(&entry->log_list);
216 220
217 trace_btrfs_ordered_extent_add(inode, entry); 221 trace_btrfs_ordered_extent_add(inode, entry);
218 222
@@ -270,6 +274,10 @@ void btrfs_add_ordered_sum(struct inode *inode,
270 tree = &BTRFS_I(inode)->ordered_tree; 274 tree = &BTRFS_I(inode)->ordered_tree;
271 spin_lock_irq(&tree->lock); 275 spin_lock_irq(&tree->lock);
272 list_add_tail(&sum->list, &entry->list); 276 list_add_tail(&sum->list, &entry->list);
277 WARN_ON(entry->csum_bytes_left < sum->len);
278 entry->csum_bytes_left -= sum->len;
279 if (entry->csum_bytes_left == 0)
280 wake_up(&entry->wait);
273 spin_unlock_irq(&tree->lock); 281 spin_unlock_irq(&tree->lock);
274} 282}
275 283
@@ -405,6 +413,66 @@ out:
405 return ret == 0; 413 return ret == 0;
406} 414}
407 415
416/* Needs to either be called under a log transaction or the log_mutex */
417void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode)
418{
419 struct btrfs_ordered_inode_tree *tree;
420 struct btrfs_ordered_extent *ordered;
421 struct rb_node *n;
422 int index = log->log_transid % 2;
423
424 tree = &BTRFS_I(inode)->ordered_tree;
425 spin_lock_irq(&tree->lock);
426 for (n = rb_first(&tree->tree); n; n = rb_next(n)) {
427 ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node);
428 spin_lock(&log->log_extents_lock[index]);
429 if (list_empty(&ordered->log_list)) {
430 list_add_tail(&ordered->log_list, &log->logged_list[index]);
431 atomic_inc(&ordered->refs);
432 }
433 spin_unlock(&log->log_extents_lock[index]);
434 }
435 spin_unlock_irq(&tree->lock);
436}
437
438void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid)
439{
440 struct btrfs_ordered_extent *ordered;
441 int index = transid % 2;
442
443 spin_lock_irq(&log->log_extents_lock[index]);
444 while (!list_empty(&log->logged_list[index])) {
445 ordered = list_first_entry(&log->logged_list[index],
446 struct btrfs_ordered_extent,
447 log_list);
448 list_del_init(&ordered->log_list);
449 spin_unlock_irq(&log->log_extents_lock[index]);
450 wait_event(ordered->wait, test_bit(BTRFS_ORDERED_IO_DONE,
451 &ordered->flags));
452 btrfs_put_ordered_extent(ordered);
453 spin_lock_irq(&log->log_extents_lock[index]);
454 }
455 spin_unlock_irq(&log->log_extents_lock[index]);
456}
457
458void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid)
459{
460 struct btrfs_ordered_extent *ordered;
461 int index = transid % 2;
462
463 spin_lock_irq(&log->log_extents_lock[index]);
464 while (!list_empty(&log->logged_list[index])) {
465 ordered = list_first_entry(&log->logged_list[index],
466 struct btrfs_ordered_extent,
467 log_list);
468 list_del_init(&ordered->log_list);
469 spin_unlock_irq(&log->log_extents_lock[index]);
470 btrfs_put_ordered_extent(ordered);
471 spin_lock_irq(&log->log_extents_lock[index]);
472 }
473 spin_unlock_irq(&log->log_extents_lock[index]);
474}
475
408/* 476/*
409 * used to drop a reference on an ordered extent. This will free 477 * used to drop a reference on an ordered extent. This will free
410 * the extent if the last reference is dropped 478 * the extent if the last reference is dropped
@@ -544,10 +612,12 @@ void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput)
544 * extra check to make sure the ordered operation list really is empty 612 * extra check to make sure the ordered operation list really is empty
545 * before we return 613 * before we return
546 */ 614 */
547int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) 615int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
616 struct btrfs_root *root, int wait)
548{ 617{
549 struct btrfs_inode *btrfs_inode; 618 struct btrfs_inode *btrfs_inode;
550 struct inode *inode; 619 struct inode *inode;
620 struct btrfs_transaction *cur_trans = trans->transaction;
551 struct list_head splice; 621 struct list_head splice;
552 struct list_head works; 622 struct list_head works;
553 struct btrfs_delalloc_work *work, *next; 623 struct btrfs_delalloc_work *work, *next;
@@ -558,14 +628,10 @@ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait)
558 628
559 mutex_lock(&root->fs_info->ordered_operations_mutex); 629 mutex_lock(&root->fs_info->ordered_operations_mutex);
560 spin_lock(&root->fs_info->ordered_extent_lock); 630 spin_lock(&root->fs_info->ordered_extent_lock);
561again: 631 list_splice_init(&cur_trans->ordered_operations, &splice);
562 list_splice_init(&root->fs_info->ordered_operations, &splice);
563
564 while (!list_empty(&splice)) { 632 while (!list_empty(&splice)) {
565
566 btrfs_inode = list_entry(splice.next, struct btrfs_inode, 633 btrfs_inode = list_entry(splice.next, struct btrfs_inode,
567 ordered_operations); 634 ordered_operations);
568
569 inode = &btrfs_inode->vfs_inode; 635 inode = &btrfs_inode->vfs_inode;
570 636
571 list_del_init(&btrfs_inode->ordered_operations); 637 list_del_init(&btrfs_inode->ordered_operations);
@@ -574,24 +640,22 @@ again:
574 * the inode may be getting freed (in sys_unlink path). 640 * the inode may be getting freed (in sys_unlink path).
575 */ 641 */
576 inode = igrab(inode); 642 inode = igrab(inode);
577
578 if (!wait && inode) {
579 list_add_tail(&BTRFS_I(inode)->ordered_operations,
580 &root->fs_info->ordered_operations);
581 }
582
583 if (!inode) 643 if (!inode)
584 continue; 644 continue;
645
646 if (!wait)
647 list_add_tail(&BTRFS_I(inode)->ordered_operations,
648 &cur_trans->ordered_operations);
585 spin_unlock(&root->fs_info->ordered_extent_lock); 649 spin_unlock(&root->fs_info->ordered_extent_lock);
586 650
587 work = btrfs_alloc_delalloc_work(inode, wait, 1); 651 work = btrfs_alloc_delalloc_work(inode, wait, 1);
588 if (!work) { 652 if (!work) {
653 spin_lock(&root->fs_info->ordered_extent_lock);
589 if (list_empty(&BTRFS_I(inode)->ordered_operations)) 654 if (list_empty(&BTRFS_I(inode)->ordered_operations))
590 list_add_tail(&btrfs_inode->ordered_operations, 655 list_add_tail(&btrfs_inode->ordered_operations,
591 &splice); 656 &splice);
592 spin_lock(&root->fs_info->ordered_extent_lock);
593 list_splice_tail(&splice, 657 list_splice_tail(&splice,
594 &root->fs_info->ordered_operations); 658 &cur_trans->ordered_operations);
595 spin_unlock(&root->fs_info->ordered_extent_lock); 659 spin_unlock(&root->fs_info->ordered_extent_lock);
596 ret = -ENOMEM; 660 ret = -ENOMEM;
597 goto out; 661 goto out;
@@ -603,9 +667,6 @@ again:
603 cond_resched(); 667 cond_resched();
604 spin_lock(&root->fs_info->ordered_extent_lock); 668 spin_lock(&root->fs_info->ordered_extent_lock);
605 } 669 }
606 if (wait && !list_empty(&root->fs_info->ordered_operations))
607 goto again;
608
609 spin_unlock(&root->fs_info->ordered_extent_lock); 670 spin_unlock(&root->fs_info->ordered_extent_lock);
610out: 671out:
611 list_for_each_entry_safe(work, next, &works, list) { 672 list_for_each_entry_safe(work, next, &works, list) {
@@ -974,6 +1035,7 @@ out:
974void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 1035void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
975 struct btrfs_root *root, struct inode *inode) 1036 struct btrfs_root *root, struct inode *inode)
976{ 1037{
1038 struct btrfs_transaction *cur_trans = trans->transaction;
977 u64 last_mod; 1039 u64 last_mod;
978 1040
979 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); 1041 last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
@@ -988,7 +1050,7 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
988 spin_lock(&root->fs_info->ordered_extent_lock); 1050 spin_lock(&root->fs_info->ordered_extent_lock);
989 if (list_empty(&BTRFS_I(inode)->ordered_operations)) { 1051 if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
990 list_add_tail(&BTRFS_I(inode)->ordered_operations, 1052 list_add_tail(&BTRFS_I(inode)->ordered_operations,
991 &root->fs_info->ordered_operations); 1053 &cur_trans->ordered_operations);
992 } 1054 }
993 spin_unlock(&root->fs_info->ordered_extent_lock); 1055 spin_unlock(&root->fs_info->ordered_extent_lock);
994} 1056}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f29d4bf5fbe7..8eadfe406cdd 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -79,6 +79,8 @@ struct btrfs_ordered_sum {
79#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent 79#define BTRFS_ORDERED_UPDATED_ISIZE 7 /* indicates whether this ordered extent
80 * has done its due diligence in updating 80 * has done its due diligence in updating
81 * the isize. */ 81 * the isize. */
82#define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
83 ordered extent */
82 84
83struct btrfs_ordered_extent { 85struct btrfs_ordered_extent {
84 /* logical offset in the file */ 86 /* logical offset in the file */
@@ -96,6 +98,9 @@ struct btrfs_ordered_extent {
96 /* number of bytes that still need writing */ 98 /* number of bytes that still need writing */
97 u64 bytes_left; 99 u64 bytes_left;
98 100
101 /* number of bytes that still need csumming */
102 u64 csum_bytes_left;
103
99 /* 104 /*
100 * the end of the ordered extent which is behind it but 105 * the end of the ordered extent which is behind it but
101 * didn't update disk_i_size. Please see the comment of 106 * didn't update disk_i_size. Please see the comment of
@@ -118,6 +123,9 @@ struct btrfs_ordered_extent {
118 /* list of checksums for insertion when the extent io is done */ 123 /* list of checksums for insertion when the extent io is done */
119 struct list_head list; 124 struct list_head list;
120 125
126 /* If we need to wait on this to be done */
127 struct list_head log_list;
128
121 /* used to wait for the BTRFS_ORDERED_COMPLETE bit */ 129 /* used to wait for the BTRFS_ORDERED_COMPLETE bit */
122 wait_queue_head_t wait; 130 wait_queue_head_t wait;
123 131
@@ -189,11 +197,15 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode,
189int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, 197int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
190 struct btrfs_ordered_extent *ordered); 198 struct btrfs_ordered_extent *ordered);
191int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); 199int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
192int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); 200int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
201 struct btrfs_root *root, int wait);
193void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, 202void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
194 struct btrfs_root *root, 203 struct btrfs_root *root,
195 struct inode *inode); 204 struct inode *inode);
196void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput); 205void btrfs_wait_ordered_extents(struct btrfs_root *root, int delay_iput);
206void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode);
207void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid);
208void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid);
197int __init ordered_data_init(void); 209int __init ordered_data_init(void);
198void ordered_data_exit(void); 210void ordered_data_exit(void);
199#endif 211#endif
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 50d95fd190a5..920957ecb27e 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -294,6 +294,7 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
294 btrfs_dev_extent_chunk_offset(l, dev_extent), 294 btrfs_dev_extent_chunk_offset(l, dev_extent),
295 (unsigned long long) 295 (unsigned long long)
296 btrfs_dev_extent_length(l, dev_extent)); 296 btrfs_dev_extent_length(l, dev_extent));
297 break;
297 case BTRFS_DEV_STATS_KEY: 298 case BTRFS_DEV_STATS_KEY:
298 printk(KERN_INFO "\t\tdevice stats\n"); 299 printk(KERN_INFO "\t\tdevice stats\n");
299 break; 300 break;
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a5c856234323..88ab785bbd73 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -23,13 +23,13 @@
23#include <linux/rbtree.h> 23#include <linux/rbtree.h>
24#include <linux/slab.h> 24#include <linux/slab.h>
25#include <linux/workqueue.h> 25#include <linux/workqueue.h>
26#include <linux/btrfs.h>
26 27
27#include "ctree.h" 28#include "ctree.h"
28#include "transaction.h" 29#include "transaction.h"
29#include "disk-io.h" 30#include "disk-io.h"
30#include "locking.h" 31#include "locking.h"
31#include "ulist.h" 32#include "ulist.h"
32#include "ioctl.h"
33#include "backref.h" 33#include "backref.h"
34 34
35/* TODO XXX FIXME 35/* TODO XXX FIXME
@@ -847,6 +847,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
847 int ret = 0; 847 int ret = 0;
848 848
849 spin_lock(&fs_info->qgroup_lock); 849 spin_lock(&fs_info->qgroup_lock);
850 if (!fs_info->quota_root) {
851 spin_unlock(&fs_info->qgroup_lock);
852 return 0;
853 }
850 fs_info->quota_enabled = 0; 854 fs_info->quota_enabled = 0;
851 fs_info->pending_quota_state = 0; 855 fs_info->pending_quota_state = 0;
852 quota_root = fs_info->quota_root; 856 quota_root = fs_info->quota_root;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 300e09ac3659..ba5a3210da9a 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3017,7 +3017,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
3017 } 3017 }
3018 } 3018 }
3019 3019
3020 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 3020 page_start = page_offset(page);
3021 page_end = page_start + PAGE_CACHE_SIZE - 1; 3021 page_end = page_start + PAGE_CACHE_SIZE - 1;
3022 3022
3023 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); 3023 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 67783e03d121..c78b2a3fc335 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -2708,7 +2708,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx,
2708 int ret; 2708 int ret;
2709 struct btrfs_root *root = sctx->dev_root; 2709 struct btrfs_root *root = sctx->dev_root;
2710 2710
2711 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 2711 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
2712 return -EIO; 2712 return -EIO;
2713 2713
2714 gen = root->fs_info->last_trans_committed; 2714 gen = root->fs_info->last_trans_committed;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 321b7fb4e441..68da757615ae 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -85,6 +85,7 @@ struct send_ctx {
85 u32 send_max_size; 85 u32 send_max_size;
86 u64 total_send_size; 86 u64 total_send_size;
87 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1]; 87 u64 cmd_send_size[BTRFS_SEND_C_MAX + 1];
88 u64 flags; /* 'flags' member of btrfs_ioctl_send_args is u64 */
88 89
89 struct vfsmount *mnt; 90 struct vfsmount *mnt;
90 91
@@ -3709,6 +3710,39 @@ out:
3709 return ret; 3710 return ret;
3710} 3711}
3711 3712
3713/*
3714 * Send an update extent command to user space.
3715 */
3716static int send_update_extent(struct send_ctx *sctx,
3717 u64 offset, u32 len)
3718{
3719 int ret = 0;
3720 struct fs_path *p;
3721
3722 p = fs_path_alloc(sctx);
3723 if (!p)
3724 return -ENOMEM;
3725
3726 ret = begin_cmd(sctx, BTRFS_SEND_C_UPDATE_EXTENT);
3727 if (ret < 0)
3728 goto out;
3729
3730 ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
3731 if (ret < 0)
3732 goto out;
3733
3734 TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
3735 TLV_PUT_U64(sctx, BTRFS_SEND_A_FILE_OFFSET, offset);
3736 TLV_PUT_U64(sctx, BTRFS_SEND_A_SIZE, len);
3737
3738 ret = send_cmd(sctx);
3739
3740tlv_put_failure:
3741out:
3742 fs_path_free(sctx, p);
3743 return ret;
3744}
3745
3712static int send_write_or_clone(struct send_ctx *sctx, 3746static int send_write_or_clone(struct send_ctx *sctx,
3713 struct btrfs_path *path, 3747 struct btrfs_path *path,
3714 struct btrfs_key *key, 3748 struct btrfs_key *key,
@@ -3744,7 +3778,11 @@ static int send_write_or_clone(struct send_ctx *sctx,
3744 goto out; 3778 goto out;
3745 } 3779 }
3746 3780
3747 if (!clone_root) { 3781 if (clone_root) {
3782 ret = send_clone(sctx, offset, len, clone_root);
3783 } else if (sctx->flags & BTRFS_SEND_FLAG_NO_FILE_DATA) {
3784 ret = send_update_extent(sctx, offset, len);
3785 } else {
3748 while (pos < len) { 3786 while (pos < len) {
3749 l = len - pos; 3787 l = len - pos;
3750 if (l > BTRFS_SEND_READ_SIZE) 3788 if (l > BTRFS_SEND_READ_SIZE)
@@ -3757,10 +3795,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
3757 pos += ret; 3795 pos += ret;
3758 } 3796 }
3759 ret = 0; 3797 ret = 0;
3760 } else {
3761 ret = send_clone(sctx, offset, len, clone_root);
3762 } 3798 }
3763
3764out: 3799out:
3765 return ret; 3800 return ret;
3766} 3801}
@@ -4536,7 +4571,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4536 struct btrfs_fs_info *fs_info; 4571 struct btrfs_fs_info *fs_info;
4537 struct btrfs_ioctl_send_args *arg = NULL; 4572 struct btrfs_ioctl_send_args *arg = NULL;
4538 struct btrfs_key key; 4573 struct btrfs_key key;
4539 struct file *filp = NULL;
4540 struct send_ctx *sctx = NULL; 4574 struct send_ctx *sctx = NULL;
4541 u32 i; 4575 u32 i;
4542 u64 *clone_sources_tmp = NULL; 4576 u64 *clone_sources_tmp = NULL;
@@ -4561,6 +4595,11 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4561 goto out; 4595 goto out;
4562 } 4596 }
4563 4597
4598 if (arg->flags & ~BTRFS_SEND_FLAG_NO_FILE_DATA) {
4599 ret = -EINVAL;
4600 goto out;
4601 }
4602
4564 sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS); 4603 sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
4565 if (!sctx) { 4604 if (!sctx) {
4566 ret = -ENOMEM; 4605 ret = -ENOMEM;
@@ -4572,6 +4611,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4572 INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS); 4611 INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
4573 INIT_LIST_HEAD(&sctx->name_cache_list); 4612 INIT_LIST_HEAD(&sctx->name_cache_list);
4574 4613
4614 sctx->flags = arg->flags;
4615
4575 sctx->send_filp = fget(arg->send_fd); 4616 sctx->send_filp = fget(arg->send_fd);
4576 if (IS_ERR(sctx->send_filp)) { 4617 if (IS_ERR(sctx->send_filp)) {
4577 ret = PTR_ERR(sctx->send_filp); 4618 ret = PTR_ERR(sctx->send_filp);
@@ -4673,8 +4714,6 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
4673 goto out; 4714 goto out;
4674 4715
4675out: 4716out:
4676 if (filp)
4677 fput(filp);
4678 kfree(arg); 4717 kfree(arg);
4679 vfree(clone_sources_tmp); 4718 vfree(clone_sources_tmp);
4680 4719
diff --git a/fs/btrfs/send.h b/fs/btrfs/send.h
index 1bf4f32fd4ef..8bb18f7ccaa6 100644
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@@ -86,6 +86,7 @@ enum btrfs_send_cmd {
86 BTRFS_SEND_C_UTIMES, 86 BTRFS_SEND_C_UTIMES,
87 87
88 BTRFS_SEND_C_END, 88 BTRFS_SEND_C_END,
89 BTRFS_SEND_C_UPDATE_EXTENT,
89 __BTRFS_SEND_C_MAX, 90 __BTRFS_SEND_C_MAX,
90}; 91};
91#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1) 92#define BTRFS_SEND_C_MAX (__BTRFS_SEND_C_MAX - 1)
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d8982e9601d3..db1ba9a2ed64 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -41,13 +41,13 @@
41#include <linux/slab.h> 41#include <linux/slab.h>
42#include <linux/cleancache.h> 42#include <linux/cleancache.h>
43#include <linux/ratelimit.h> 43#include <linux/ratelimit.h>
44#include <linux/btrfs.h>
44#include "compat.h" 45#include "compat.h"
45#include "delayed-inode.h" 46#include "delayed-inode.h"
46#include "ctree.h" 47#include "ctree.h"
47#include "disk-io.h" 48#include "disk-io.h"
48#include "transaction.h" 49#include "transaction.h"
49#include "btrfs_inode.h" 50#include "btrfs_inode.h"
50#include "ioctl.h"
51#include "print-tree.h" 51#include "print-tree.h"
52#include "xattr.h" 52#include "xattr.h"
53#include "volumes.h" 53#include "volumes.h"
@@ -63,8 +63,7 @@
63static const struct super_operations btrfs_super_ops; 63static const struct super_operations btrfs_super_ops;
64static struct file_system_type btrfs_fs_type; 64static struct file_system_type btrfs_fs_type;
65 65
66static const char *btrfs_decode_error(struct btrfs_fs_info *fs_info, int errno, 66static const char *btrfs_decode_error(int errno, char nbuf[16])
67 char nbuf[16])
68{ 67{
69 char *errstr = NULL; 68 char *errstr = NULL;
70 69
@@ -98,7 +97,7 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
98 * today we only save the error info into ram. Long term we'll 97 * today we only save the error info into ram. Long term we'll
99 * also send it down to the disk 98 * also send it down to the disk
100 */ 99 */
101 fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR; 100 set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
102} 101}
103 102
104static void save_error_info(struct btrfs_fs_info *fs_info) 103static void save_error_info(struct btrfs_fs_info *fs_info)
@@ -114,7 +113,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
114 if (sb->s_flags & MS_RDONLY) 113 if (sb->s_flags & MS_RDONLY)
115 return; 114 return;
116 115
117 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 116 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
118 sb->s_flags |= MS_RDONLY; 117 sb->s_flags |= MS_RDONLY;
119 printk(KERN_INFO "btrfs is forced readonly\n"); 118 printk(KERN_INFO "btrfs is forced readonly\n");
120 /* 119 /*
@@ -142,8 +141,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
142 struct super_block *sb = fs_info->sb; 141 struct super_block *sb = fs_info->sb;
143 char nbuf[16]; 142 char nbuf[16];
144 const char *errstr; 143 const char *errstr;
145 va_list args;
146 va_start(args, fmt);
147 144
148 /* 145 /*
149 * Special case: if the error is EROFS, and we're already 146 * Special case: if the error is EROFS, and we're already
@@ -152,15 +149,18 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
152 if (errno == -EROFS && (sb->s_flags & MS_RDONLY)) 149 if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
153 return; 150 return;
154 151
155 errstr = btrfs_decode_error(fs_info, errno, nbuf); 152 errstr = btrfs_decode_error(errno, nbuf);
156 if (fmt) { 153 if (fmt) {
157 struct va_format vaf = { 154 struct va_format vaf;
158 .fmt = fmt, 155 va_list args;
159 .va = &args, 156
160 }; 157 va_start(args, fmt);
158 vaf.fmt = fmt;
159 vaf.va = &args;
161 160
162 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n", 161 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s (%pV)\n",
163 sb->s_id, function, line, errstr, &vaf); 162 sb->s_id, function, line, errstr, &vaf);
163 va_end(args);
164 } else { 164 } else {
165 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n", 165 printk(KERN_CRIT "BTRFS error (device %s) in %s:%d: %s\n",
166 sb->s_id, function, line, errstr); 166 sb->s_id, function, line, errstr);
@@ -171,7 +171,6 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
171 save_error_info(fs_info); 171 save_error_info(fs_info);
172 btrfs_handle_error(fs_info); 172 btrfs_handle_error(fs_info);
173 } 173 }
174 va_end(args);
175} 174}
176 175
177static const char * const logtypes[] = { 176static const char * const logtypes[] = {
@@ -261,7 +260,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
261 char nbuf[16]; 260 char nbuf[16];
262 const char *errstr; 261 const char *errstr;
263 262
264 errstr = btrfs_decode_error(root->fs_info, errno, nbuf); 263 errstr = btrfs_decode_error(errno, nbuf);
265 btrfs_printk(root->fs_info, 264 btrfs_printk(root->fs_info,
266 "%s:%d: Aborting unused transaction(%s).\n", 265 "%s:%d: Aborting unused transaction(%s).\n",
267 function, line, errstr); 266 function, line, errstr);
@@ -289,8 +288,8 @@ void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
289 va_start(args, fmt); 288 va_start(args, fmt);
290 vaf.va = &args; 289 vaf.va = &args;
291 290
292 errstr = btrfs_decode_error(fs_info, errno, nbuf); 291 errstr = btrfs_decode_error(errno, nbuf);
293 if (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR) 292 if (fs_info && (fs_info->mount_opt & BTRFS_MOUNT_PANIC_ON_FATAL_ERROR))
294 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n", 293 panic(KERN_CRIT "BTRFS panic (device %s) in %s:%d: %pV (%s)\n",
295 s_id, function, line, &vaf, errstr); 294 s_id, function, line, &vaf, errstr);
296 295
@@ -438,6 +437,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
438 case Opt_compress_force: 437 case Opt_compress_force:
439 case Opt_compress_force_type: 438 case Opt_compress_force_type:
440 compress_force = true; 439 compress_force = true;
440 /* Fallthrough */
441 case Opt_compress: 441 case Opt_compress:
442 case Opt_compress_type: 442 case Opt_compress_type:
443 if (token == Opt_compress || 443 if (token == Opt_compress ||
@@ -519,7 +519,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
519 case Opt_alloc_start: 519 case Opt_alloc_start:
520 num = match_strdup(&args[0]); 520 num = match_strdup(&args[0]);
521 if (num) { 521 if (num) {
522 mutex_lock(&info->chunk_mutex);
522 info->alloc_start = memparse(num, NULL); 523 info->alloc_start = memparse(num, NULL);
524 mutex_unlock(&info->chunk_mutex);
523 kfree(num); 525 kfree(num);
524 printk(KERN_INFO 526 printk(KERN_INFO
525 "btrfs: allocations start at %llu\n", 527 "btrfs: allocations start at %llu\n",
@@ -876,7 +878,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
876 878
877 btrfs_wait_ordered_extents(root, 0); 879 btrfs_wait_ordered_extents(root, 0);
878 880
879 trans = btrfs_attach_transaction(root); 881 trans = btrfs_attach_transaction_barrier(root);
880 if (IS_ERR(trans)) { 882 if (IS_ERR(trans)) {
881 /* no transaction, don't bother */ 883 /* no transaction, don't bother */
882 if (PTR_ERR(trans) == -ENOENT) 884 if (PTR_ERR(trans) == -ENOENT)
@@ -1289,7 +1291,9 @@ restore:
1289 fs_info->mount_opt = old_opts; 1291 fs_info->mount_opt = old_opts;
1290 fs_info->compress_type = old_compress_type; 1292 fs_info->compress_type = old_compress_type;
1291 fs_info->max_inline = old_max_inline; 1293 fs_info->max_inline = old_max_inline;
1294 mutex_lock(&fs_info->chunk_mutex);
1292 fs_info->alloc_start = old_alloc_start; 1295 fs_info->alloc_start = old_alloc_start;
1296 mutex_unlock(&fs_info->chunk_mutex);
1293 btrfs_resize_thread_pool(fs_info, 1297 btrfs_resize_thread_pool(fs_info,
1294 old_thread_pool_size, fs_info->thread_pool_size); 1298 old_thread_pool_size, fs_info->thread_pool_size);
1295 fs_info->metadata_ratio = old_metadata_ratio; 1299 fs_info->metadata_ratio = old_metadata_ratio;
@@ -1559,7 +1563,7 @@ static int btrfs_freeze(struct super_block *sb)
1559 struct btrfs_trans_handle *trans; 1563 struct btrfs_trans_handle *trans;
1560 struct btrfs_root *root = btrfs_sb(sb)->tree_root; 1564 struct btrfs_root *root = btrfs_sb(sb)->tree_root;
1561 1565
1562 trans = btrfs_attach_transaction(root); 1566 trans = btrfs_attach_transaction_barrier(root);
1563 if (IS_ERR(trans)) { 1567 if (IS_ERR(trans)) {
1564 /* no transaction, don't bother */ 1568 /* no transaction, don't bother */
1565 if (PTR_ERR(trans) == -ENOENT) 1569 if (PTR_ERR(trans) == -ENOENT)
@@ -1684,10 +1688,14 @@ static int __init init_btrfs_fs(void)
1684 if (err) 1688 if (err)
1685 goto free_delayed_inode; 1689 goto free_delayed_inode;
1686 1690
1687 err = btrfs_interface_init(); 1691 err = btrfs_delayed_ref_init();
1688 if (err) 1692 if (err)
1689 goto free_auto_defrag; 1693 goto free_auto_defrag;
1690 1694
1695 err = btrfs_interface_init();
1696 if (err)
1697 goto free_delayed_ref;
1698
1691 err = register_filesystem(&btrfs_fs_type); 1699 err = register_filesystem(&btrfs_fs_type);
1692 if (err) 1700 if (err)
1693 goto unregister_ioctl; 1701 goto unregister_ioctl;
@@ -1699,6 +1707,8 @@ static int __init init_btrfs_fs(void)
1699 1707
1700unregister_ioctl: 1708unregister_ioctl:
1701 btrfs_interface_exit(); 1709 btrfs_interface_exit();
1710free_delayed_ref:
1711 btrfs_delayed_ref_exit();
1702free_auto_defrag: 1712free_auto_defrag:
1703 btrfs_auto_defrag_exit(); 1713 btrfs_auto_defrag_exit();
1704free_delayed_inode: 1714free_delayed_inode:
@@ -1720,6 +1730,7 @@ free_compress:
1720static void __exit exit_btrfs_fs(void) 1730static void __exit exit_btrfs_fs(void)
1721{ 1731{
1722 btrfs_destroy_cachep(); 1732 btrfs_destroy_cachep();
1733 btrfs_delayed_ref_exit();
1723 btrfs_auto_defrag_exit(); 1734 btrfs_auto_defrag_exit();
1724 btrfs_delayed_inode_exit(); 1735 btrfs_delayed_inode_exit();
1725 ordered_data_exit(); 1736 ordered_data_exit();
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index fc03aa60b684..955204ca0447 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -40,7 +40,6 @@ void put_transaction(struct btrfs_transaction *transaction)
40 if (atomic_dec_and_test(&transaction->use_count)) { 40 if (atomic_dec_and_test(&transaction->use_count)) {
41 BUG_ON(!list_empty(&transaction->list)); 41 BUG_ON(!list_empty(&transaction->list));
42 WARN_ON(transaction->delayed_refs.root.rb_node); 42 WARN_ON(transaction->delayed_refs.root.rb_node);
43 memset(transaction, 0, sizeof(*transaction));
44 kmem_cache_free(btrfs_transaction_cachep, transaction); 43 kmem_cache_free(btrfs_transaction_cachep, transaction);
45 } 44 }
46} 45}
@@ -51,6 +50,14 @@ static noinline void switch_commit_root(struct btrfs_root *root)
51 root->commit_root = btrfs_root_node(root); 50 root->commit_root = btrfs_root_node(root);
52} 51}
53 52
53static inline int can_join_transaction(struct btrfs_transaction *trans,
54 int type)
55{
56 return !(trans->in_commit &&
57 type != TRANS_JOIN &&
58 type != TRANS_JOIN_NOLOCK);
59}
60
54/* 61/*
55 * either allocate a new transaction or hop into the existing one 62 * either allocate a new transaction or hop into the existing one
56 */ 63 */
@@ -62,7 +69,7 @@ static noinline int join_transaction(struct btrfs_root *root, int type)
62 spin_lock(&fs_info->trans_lock); 69 spin_lock(&fs_info->trans_lock);
63loop: 70loop:
64 /* The file system has been taken offline. No new transactions. */ 71 /* The file system has been taken offline. No new transactions. */
65 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 72 if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
66 spin_unlock(&fs_info->trans_lock); 73 spin_unlock(&fs_info->trans_lock);
67 return -EROFS; 74 return -EROFS;
68 } 75 }
@@ -86,6 +93,10 @@ loop:
86 spin_unlock(&fs_info->trans_lock); 93 spin_unlock(&fs_info->trans_lock);
87 return cur_trans->aborted; 94 return cur_trans->aborted;
88 } 95 }
96 if (!can_join_transaction(cur_trans, type)) {
97 spin_unlock(&fs_info->trans_lock);
98 return -EBUSY;
99 }
89 atomic_inc(&cur_trans->use_count); 100 atomic_inc(&cur_trans->use_count);
90 atomic_inc(&cur_trans->num_writers); 101 atomic_inc(&cur_trans->num_writers);
91 cur_trans->num_joined++; 102 cur_trans->num_joined++;
@@ -114,7 +125,7 @@ loop:
114 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 125 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
115 cur_trans = fs_info->running_transaction; 126 cur_trans = fs_info->running_transaction;
116 goto loop; 127 goto loop;
117 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 128 } else if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
118 spin_unlock(&fs_info->trans_lock); 129 spin_unlock(&fs_info->trans_lock);
119 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 130 kmem_cache_free(btrfs_transaction_cachep, cur_trans);
120 return -EROFS; 131 return -EROFS;
@@ -158,6 +169,7 @@ loop:
158 spin_lock_init(&cur_trans->delayed_refs.lock); 169 spin_lock_init(&cur_trans->delayed_refs.lock);
159 170
160 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 171 INIT_LIST_HEAD(&cur_trans->pending_snapshots);
172 INIT_LIST_HEAD(&cur_trans->ordered_operations);
161 list_add_tail(&cur_trans->list, &fs_info->trans_list); 173 list_add_tail(&cur_trans->list, &fs_info->trans_list);
162 extent_io_tree_init(&cur_trans->dirty_pages, 174 extent_io_tree_init(&cur_trans->dirty_pages,
163 fs_info->btree_inode->i_mapping); 175 fs_info->btree_inode->i_mapping);
@@ -302,7 +314,7 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
302 int ret; 314 int ret;
303 u64 qgroup_reserved = 0; 315 u64 qgroup_reserved = 0;
304 316
305 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 317 if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
306 return ERR_PTR(-EROFS); 318 return ERR_PTR(-EROFS);
307 319
308 if (current->journal_info) { 320 if (current->journal_info) {
@@ -360,8 +372,11 @@ again:
360 372
361 do { 373 do {
362 ret = join_transaction(root, type); 374 ret = join_transaction(root, type);
363 if (ret == -EBUSY) 375 if (ret == -EBUSY) {
364 wait_current_trans(root); 376 wait_current_trans(root);
377 if (unlikely(type == TRANS_ATTACH))
378 ret = -ENOENT;
379 }
365 } while (ret == -EBUSY); 380 } while (ret == -EBUSY);
366 381
367 if (ret < 0) { 382 if (ret < 0) {
@@ -383,9 +398,10 @@ again:
383 h->block_rsv = NULL; 398 h->block_rsv = NULL;
384 h->orig_rsv = NULL; 399 h->orig_rsv = NULL;
385 h->aborted = 0; 400 h->aborted = 0;
386 h->qgroup_reserved = qgroup_reserved; 401 h->qgroup_reserved = 0;
387 h->delayed_ref_elem.seq = 0; 402 h->delayed_ref_elem.seq = 0;
388 h->type = type; 403 h->type = type;
404 h->allocating_chunk = false;
389 INIT_LIST_HEAD(&h->qgroup_ref_list); 405 INIT_LIST_HEAD(&h->qgroup_ref_list);
390 INIT_LIST_HEAD(&h->new_bgs); 406 INIT_LIST_HEAD(&h->new_bgs);
391 407
@@ -401,6 +417,7 @@ again:
401 h->block_rsv = &root->fs_info->trans_block_rsv; 417 h->block_rsv = &root->fs_info->trans_block_rsv;
402 h->bytes_reserved = num_bytes; 418 h->bytes_reserved = num_bytes;
403 } 419 }
420 h->qgroup_reserved = qgroup_reserved;
404 421
405got_it: 422got_it:
406 btrfs_record_root_in_trans(h, root); 423 btrfs_record_root_in_trans(h, root);
@@ -452,11 +469,43 @@ struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root
452 return start_transaction(root, 0, TRANS_USERSPACE, 0); 469 return start_transaction(root, 0, TRANS_USERSPACE, 0);
453} 470}
454 471
472/*
473 * btrfs_attach_transaction() - catch the running transaction
474 *
475 * It is used when we want to commit the current the transaction, but
476 * don't want to start a new one.
477 *
478 * Note: If this function return -ENOENT, it just means there is no
479 * running transaction. But it is possible that the inactive transaction
480 * is still in the memory, not fully on disk. If you hope there is no
481 * inactive transaction in the fs when -ENOENT is returned, you should
482 * invoke
483 * btrfs_attach_transaction_barrier()
484 */
455struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root) 485struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root)
456{ 486{
457 return start_transaction(root, 0, TRANS_ATTACH, 0); 487 return start_transaction(root, 0, TRANS_ATTACH, 0);
458} 488}
459 489
490/*
491 * btrfs_attach_transaction() - catch the running transaction
492 *
493 * It is similar to the above function, the differentia is this one
494 * will wait for all the inactive transactions until they fully
495 * complete.
496 */
497struct btrfs_trans_handle *
498btrfs_attach_transaction_barrier(struct btrfs_root *root)
499{
500 struct btrfs_trans_handle *trans;
501
502 trans = start_transaction(root, 0, TRANS_ATTACH, 0);
503 if (IS_ERR(trans) && PTR_ERR(trans) == -ENOENT)
504 btrfs_wait_for_commit(root, 0);
505
506 return trans;
507}
508
460/* wait for a transaction commit to be fully complete */ 509/* wait for a transaction commit to be fully complete */
461static noinline void wait_for_commit(struct btrfs_root *root, 510static noinline void wait_for_commit(struct btrfs_root *root,
462 struct btrfs_transaction *commit) 511 struct btrfs_transaction *commit)
@@ -645,12 +694,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
645 btrfs_run_delayed_iputs(root); 694 btrfs_run_delayed_iputs(root);
646 695
647 if (trans->aborted || 696 if (trans->aborted ||
648 root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 697 test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
649 err = -EIO; 698 err = -EIO;
650 }
651 assert_qgroups_uptodate(trans); 699 assert_qgroups_uptodate(trans);
652 700
653 memset(trans, 0, sizeof(*trans));
654 kmem_cache_free(btrfs_trans_handle_cachep, trans); 701 kmem_cache_free(btrfs_trans_handle_cachep, trans);
655 return err; 702 return err;
656} 703}
@@ -961,10 +1008,10 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
961} 1008}
962 1009
963/* 1010/*
964 * defrag a given btree. If cacheonly == 1, this won't read from the disk, 1011 * defrag a given btree.
965 * otherwise every leaf in the btree is read and defragged. 1012 * Every leaf in the btree is read and defragged.
966 */ 1013 */
967int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) 1014int btrfs_defrag_root(struct btrfs_root *root)
968{ 1015{
969 struct btrfs_fs_info *info = root->fs_info; 1016 struct btrfs_fs_info *info = root->fs_info;
970 struct btrfs_trans_handle *trans; 1017 struct btrfs_trans_handle *trans;
@@ -978,7 +1025,7 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
978 if (IS_ERR(trans)) 1025 if (IS_ERR(trans))
979 return PTR_ERR(trans); 1026 return PTR_ERR(trans);
980 1027
981 ret = btrfs_defrag_leaves(trans, root, cacheonly); 1028 ret = btrfs_defrag_leaves(trans, root);
982 1029
983 btrfs_end_transaction(trans, root); 1030 btrfs_end_transaction(trans, root);
984 btrfs_btree_balance_dirty(info->tree_root); 1031 btrfs_btree_balance_dirty(info->tree_root);
@@ -986,6 +1033,12 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly)
986 1033
987 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) 1034 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN)
988 break; 1035 break;
1036
1037 if (btrfs_defrag_cancelled(root->fs_info)) {
1038 printk(KERN_DEBUG "btrfs: defrag_root cancelled\n");
1039 ret = -EAGAIN;
1040 break;
1041 }
989 } 1042 }
990 root->defrag_running = 0; 1043 root->defrag_running = 0;
991 return ret; 1044 return ret;
@@ -1307,13 +1360,13 @@ static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1307struct btrfs_async_commit { 1360struct btrfs_async_commit {
1308 struct btrfs_trans_handle *newtrans; 1361 struct btrfs_trans_handle *newtrans;
1309 struct btrfs_root *root; 1362 struct btrfs_root *root;
1310 struct delayed_work work; 1363 struct work_struct work;
1311}; 1364};
1312 1365
1313static void do_async_commit(struct work_struct *work) 1366static void do_async_commit(struct work_struct *work)
1314{ 1367{
1315 struct btrfs_async_commit *ac = 1368 struct btrfs_async_commit *ac =
1316 container_of(work, struct btrfs_async_commit, work.work); 1369 container_of(work, struct btrfs_async_commit, work);
1317 1370
1318 /* 1371 /*
1319 * We've got freeze protection passed with the transaction. 1372 * We've got freeze protection passed with the transaction.
@@ -1341,7 +1394,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1341 if (!ac) 1394 if (!ac)
1342 return -ENOMEM; 1395 return -ENOMEM;
1343 1396
1344 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1397 INIT_WORK(&ac->work, do_async_commit);
1345 ac->root = root; 1398 ac->root = root;
1346 ac->newtrans = btrfs_join_transaction(root); 1399 ac->newtrans = btrfs_join_transaction(root);
1347 if (IS_ERR(ac->newtrans)) { 1400 if (IS_ERR(ac->newtrans)) {
@@ -1365,7 +1418,7 @@ int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1365 &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1], 1418 &root->fs_info->sb->s_writers.lock_map[SB_FREEZE_FS-1],
1366 1, _THIS_IP_); 1419 1, _THIS_IP_);
1367 1420
1368 schedule_delayed_work(&ac->work, 0); 1421 schedule_work(&ac->work);
1369 1422
1370 /* wait for transaction to start and unblock */ 1423 /* wait for transaction to start and unblock */
1371 if (wait_for_unblock) 1424 if (wait_for_unblock)
@@ -1428,7 +1481,9 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1428 } 1481 }
1429 1482
1430 if (flush_on_commit || snap_pending) { 1483 if (flush_on_commit || snap_pending) {
1431 btrfs_start_delalloc_inodes(root, 1); 1484 ret = btrfs_start_delalloc_inodes(root, 1);
1485 if (ret)
1486 return ret;
1432 btrfs_wait_ordered_extents(root, 1); 1487 btrfs_wait_ordered_extents(root, 1);
1433 } 1488 }
1434 1489
@@ -1450,9 +1505,9 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
1450 * it here and no for sure that nothing new will be added 1505 * it here and no for sure that nothing new will be added
1451 * to the list 1506 * to the list
1452 */ 1507 */
1453 btrfs_run_ordered_operations(root, 1); 1508 ret = btrfs_run_ordered_operations(trans, root, 1);
1454 1509
1455 return 0; 1510 return ret;
1456} 1511}
1457 1512
1458/* 1513/*
@@ -1473,27 +1528,35 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1473 int should_grow = 0; 1528 int should_grow = 0;
1474 unsigned long now = get_seconds(); 1529 unsigned long now = get_seconds();
1475 1530
1476 ret = btrfs_run_ordered_operations(root, 0); 1531 ret = btrfs_run_ordered_operations(trans, root, 0);
1477 if (ret) { 1532 if (ret) {
1478 btrfs_abort_transaction(trans, root, ret); 1533 btrfs_abort_transaction(trans, root, ret);
1479 goto cleanup_transaction; 1534 btrfs_end_transaction(trans, root);
1535 return ret;
1480 } 1536 }
1481 1537
1482 /* Stop the commit early if ->aborted is set */ 1538 /* Stop the commit early if ->aborted is set */
1483 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { 1539 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1484 ret = cur_trans->aborted; 1540 ret = cur_trans->aborted;
1485 goto cleanup_transaction; 1541 btrfs_end_transaction(trans, root);
1542 return ret;
1486 } 1543 }
1487 1544
1488 /* make a pass through all the delayed refs we have so far 1545 /* make a pass through all the delayed refs we have so far
1489 * any runnings procs may add more while we are here 1546 * any runnings procs may add more while we are here
1490 */ 1547 */
1491 ret = btrfs_run_delayed_refs(trans, root, 0); 1548 ret = btrfs_run_delayed_refs(trans, root, 0);
1492 if (ret) 1549 if (ret) {
1493 goto cleanup_transaction; 1550 btrfs_end_transaction(trans, root);
1551 return ret;
1552 }
1494 1553
1495 btrfs_trans_release_metadata(trans, root); 1554 btrfs_trans_release_metadata(trans, root);
1496 trans->block_rsv = NULL; 1555 trans->block_rsv = NULL;
1556 if (trans->qgroup_reserved) {
1557 btrfs_qgroup_free(root, trans->qgroup_reserved);
1558 trans->qgroup_reserved = 0;
1559 }
1497 1560
1498 cur_trans = trans->transaction; 1561 cur_trans = trans->transaction;
1499 1562
@@ -1507,8 +1570,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1507 btrfs_create_pending_block_groups(trans, root); 1570 btrfs_create_pending_block_groups(trans, root);
1508 1571
1509 ret = btrfs_run_delayed_refs(trans, root, 0); 1572 ret = btrfs_run_delayed_refs(trans, root, 0);
1510 if (ret) 1573 if (ret) {
1511 goto cleanup_transaction; 1574 btrfs_end_transaction(trans, root);
1575 return ret;
1576 }
1512 1577
1513 spin_lock(&cur_trans->commit_lock); 1578 spin_lock(&cur_trans->commit_lock);
1514 if (cur_trans->in_commit) { 1579 if (cur_trans->in_commit) {
@@ -1772,6 +1837,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1772cleanup_transaction: 1837cleanup_transaction:
1773 btrfs_trans_release_metadata(trans, root); 1838 btrfs_trans_release_metadata(trans, root);
1774 trans->block_rsv = NULL; 1839 trans->block_rsv = NULL;
1840 if (trans->qgroup_reserved) {
1841 btrfs_qgroup_free(root, trans->qgroup_reserved);
1842 trans->qgroup_reserved = 0;
1843 }
1775 btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); 1844 btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n");
1776// WARN_ON(1); 1845// WARN_ON(1);
1777 if (current->journal_info == trans) 1846 if (current->journal_info == trans)
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 0e8aa1e6c287..5afd7b1dceac 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -43,6 +43,7 @@ struct btrfs_transaction {
43 wait_queue_head_t writer_wait; 43 wait_queue_head_t writer_wait;
44 wait_queue_head_t commit_wait; 44 wait_queue_head_t commit_wait;
45 struct list_head pending_snapshots; 45 struct list_head pending_snapshots;
46 struct list_head ordered_operations;
46 struct btrfs_delayed_ref_root delayed_refs; 47 struct btrfs_delayed_ref_root delayed_refs;
47 int aborted; 48 int aborted;
48}; 49};
@@ -68,6 +69,7 @@ struct btrfs_trans_handle {
68 struct btrfs_block_rsv *orig_rsv; 69 struct btrfs_block_rsv *orig_rsv;
69 short aborted; 70 short aborted;
70 short adding_csums; 71 short adding_csums;
72 bool allocating_chunk;
71 enum btrfs_trans_type type; 73 enum btrfs_trans_type type;
72 /* 74 /*
73 * this root is only needed to validate that the root passed to 75 * this root is only needed to validate that the root passed to
@@ -110,13 +112,15 @@ struct btrfs_trans_handle *btrfs_start_transaction_lflush(
110struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); 112struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
111struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); 113struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
112struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); 114struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
115struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
116 struct btrfs_root *root);
113struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root); 117struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root);
114int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid); 118int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
115int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 119int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
116 struct btrfs_root *root); 120 struct btrfs_root *root);
117 121
118int btrfs_add_dead_root(struct btrfs_root *root); 122int btrfs_add_dead_root(struct btrfs_root *root);
119int btrfs_defrag_root(struct btrfs_root *root, int cacheonly); 123int btrfs_defrag_root(struct btrfs_root *root);
120int btrfs_clean_old_snapshots(struct btrfs_root *root); 124int btrfs_clean_old_snapshots(struct btrfs_root *root);
121int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 125int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
122 struct btrfs_root *root); 126 struct btrfs_root *root);
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index 3b580ee8ab1d..94e05c1f118a 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -23,13 +23,14 @@
23#include "transaction.h" 23#include "transaction.h"
24#include "locking.h" 24#include "locking.h"
25 25
26/* defrag all the leaves in a given btree. If cache_only == 1, don't read 26/*
27 * things from disk, otherwise read all the leaves and try to get key order to 27 * Defrag all the leaves in a given btree.
28 * Read all the leaves and try to get key order to
28 * better reflect disk order 29 * better reflect disk order
29 */ 30 */
30 31
31int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, 32int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
32 struct btrfs_root *root, int cache_only) 33 struct btrfs_root *root)
33{ 34{
34 struct btrfs_path *path = NULL; 35 struct btrfs_path *path = NULL;
35 struct btrfs_key key; 36 struct btrfs_key key;
@@ -41,9 +42,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
41 u64 last_ret = 0; 42 u64 last_ret = 0;
42 u64 min_trans = 0; 43 u64 min_trans = 0;
43 44
44 if (cache_only)
45 goto out;
46
47 if (root->fs_info->extent_root == root) { 45 if (root->fs_info->extent_root == root) {
48 /* 46 /*
49 * there's recursion here right now in the tree locking, 47 * there's recursion here right now in the tree locking,
@@ -86,11 +84,8 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
86 } 84 }
87 85
88 path->keep_locks = 1; 86 path->keep_locks = 1;
89 if (cache_only)
90 min_trans = root->defrag_trans_start;
91 87
92 ret = btrfs_search_forward(root, &key, NULL, path, 88 ret = btrfs_search_forward(root, &key, NULL, path, min_trans);
93 cache_only, min_trans);
94 if (ret < 0) 89 if (ret < 0)
95 goto out; 90 goto out;
96 if (ret > 0) { 91 if (ret > 0) {
@@ -109,11 +104,11 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
109 goto out; 104 goto out;
110 } 105 }
111 path->slots[1] = btrfs_header_nritems(path->nodes[1]); 106 path->slots[1] = btrfs_header_nritems(path->nodes[1]);
112 next_key_ret = btrfs_find_next_key(root, path, &key, 1, cache_only, 107 next_key_ret = btrfs_find_next_key(root, path, &key, 1,
113 min_trans); 108 min_trans);
114 ret = btrfs_realloc_node(trans, root, 109 ret = btrfs_realloc_node(trans, root,
115 path->nodes[1], 0, 110 path->nodes[1], 0,
116 cache_only, &last_ret, 111 &last_ret,
117 &root->defrag_progress); 112 &root->defrag_progress);
118 if (ret) { 113 if (ret) {
119 WARN_ON(ret == -EAGAIN); 114 WARN_ON(ret == -EAGAIN);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 9027bb1e7466..1a79087c4575 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -278,8 +278,7 @@ static int process_one_buffer(struct btrfs_root *log,
278 struct walk_control *wc, u64 gen) 278 struct walk_control *wc, u64 gen)
279{ 279{
280 if (wc->pin) 280 if (wc->pin)
281 btrfs_pin_extent_for_log_replay(wc->trans, 281 btrfs_pin_extent_for_log_replay(log->fs_info->extent_root,
282 log->fs_info->extent_root,
283 eb->start, eb->len); 282 eb->start, eb->len);
284 283
285 if (btrfs_buffer_uptodate(eb, gen, 0)) { 284 if (btrfs_buffer_uptodate(eb, gen, 0)) {
@@ -2281,6 +2280,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2281 unsigned long log_transid = 0; 2280 unsigned long log_transid = 0;
2282 2281
2283 mutex_lock(&root->log_mutex); 2282 mutex_lock(&root->log_mutex);
2283 log_transid = root->log_transid;
2284 index1 = root->log_transid % 2; 2284 index1 = root->log_transid % 2;
2285 if (atomic_read(&root->log_commit[index1])) { 2285 if (atomic_read(&root->log_commit[index1])) {
2286 wait_log_commit(trans, root, root->log_transid); 2286 wait_log_commit(trans, root, root->log_transid);
@@ -2308,11 +2308,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2308 /* bail out if we need to do a full commit */ 2308 /* bail out if we need to do a full commit */
2309 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2309 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2310 ret = -EAGAIN; 2310 ret = -EAGAIN;
2311 btrfs_free_logged_extents(log, log_transid);
2311 mutex_unlock(&root->log_mutex); 2312 mutex_unlock(&root->log_mutex);
2312 goto out; 2313 goto out;
2313 } 2314 }
2314 2315
2315 log_transid = root->log_transid;
2316 if (log_transid % 2 == 0) 2316 if (log_transid % 2 == 0)
2317 mark = EXTENT_DIRTY; 2317 mark = EXTENT_DIRTY;
2318 else 2318 else
@@ -2324,6 +2324,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2324 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark); 2324 ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
2325 if (ret) { 2325 if (ret) {
2326 btrfs_abort_transaction(trans, root, ret); 2326 btrfs_abort_transaction(trans, root, ret);
2327 btrfs_free_logged_extents(log, log_transid);
2327 mutex_unlock(&root->log_mutex); 2328 mutex_unlock(&root->log_mutex);
2328 goto out; 2329 goto out;
2329 } 2330 }
@@ -2363,6 +2364,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2363 } 2364 }
2364 root->fs_info->last_trans_log_full_commit = trans->transid; 2365 root->fs_info->last_trans_log_full_commit = trans->transid;
2365 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2366 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2367 btrfs_free_logged_extents(log, log_transid);
2366 mutex_unlock(&log_root_tree->log_mutex); 2368 mutex_unlock(&log_root_tree->log_mutex);
2367 ret = -EAGAIN; 2369 ret = -EAGAIN;
2368 goto out; 2370 goto out;
@@ -2373,6 +2375,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2373 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2375 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2374 wait_log_commit(trans, log_root_tree, 2376 wait_log_commit(trans, log_root_tree,
2375 log_root_tree->log_transid); 2377 log_root_tree->log_transid);
2378 btrfs_free_logged_extents(log, log_transid);
2376 mutex_unlock(&log_root_tree->log_mutex); 2379 mutex_unlock(&log_root_tree->log_mutex);
2377 ret = 0; 2380 ret = 0;
2378 goto out; 2381 goto out;
@@ -2392,6 +2395,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2392 */ 2395 */
2393 if (root->fs_info->last_trans_log_full_commit == trans->transid) { 2396 if (root->fs_info->last_trans_log_full_commit == trans->transid) {
2394 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2397 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2398 btrfs_free_logged_extents(log, log_transid);
2395 mutex_unlock(&log_root_tree->log_mutex); 2399 mutex_unlock(&log_root_tree->log_mutex);
2396 ret = -EAGAIN; 2400 ret = -EAGAIN;
2397 goto out_wake_log_root; 2401 goto out_wake_log_root;
@@ -2402,10 +2406,12 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
2402 EXTENT_DIRTY | EXTENT_NEW); 2406 EXTENT_DIRTY | EXTENT_NEW);
2403 if (ret) { 2407 if (ret) {
2404 btrfs_abort_transaction(trans, root, ret); 2408 btrfs_abort_transaction(trans, root, ret);
2409 btrfs_free_logged_extents(log, log_transid);
2405 mutex_unlock(&log_root_tree->log_mutex); 2410 mutex_unlock(&log_root_tree->log_mutex);
2406 goto out_wake_log_root; 2411 goto out_wake_log_root;
2407 } 2412 }
2408 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); 2413 btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
2414 btrfs_wait_logged_extents(log, log_transid);
2409 2415
2410 btrfs_set_super_log_root(root->fs_info->super_for_commit, 2416 btrfs_set_super_log_root(root->fs_info->super_for_commit,
2411 log_root_tree->node->start); 2417 log_root_tree->node->start);
@@ -2475,6 +2481,14 @@ static void free_log_tree(struct btrfs_trans_handle *trans,
2475 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); 2481 EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
2476 } 2482 }
2477 2483
2484 /*
2485 * We may have short-circuited the log tree with the full commit logic
2486 * and left ordered extents on our list, so clear these out to keep us
2487 * from leaking inodes and memory.
2488 */
2489 btrfs_free_logged_extents(log, 0);
2490 btrfs_free_logged_extents(log, 1);
2491
2478 free_extent_buffer(log->node); 2492 free_extent_buffer(log->node);
2479 kfree(log); 2493 kfree(log);
2480} 2494}
@@ -2724,7 +2738,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
2724 path->keep_locks = 1; 2738 path->keep_locks = 1;
2725 2739
2726 ret = btrfs_search_forward(root, &min_key, &max_key, 2740 ret = btrfs_search_forward(root, &min_key, &max_key,
2727 path, 0, trans->transid); 2741 path, trans->transid);
2728 2742
2729 /* 2743 /*
2730 * we didn't find anything from this transaction, see if there 2744 * we didn't find anything from this transaction, see if there
@@ -3271,14 +3285,18 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3271 struct btrfs_root *log = root->log_root; 3285 struct btrfs_root *log = root->log_root;
3272 struct btrfs_file_extent_item *fi; 3286 struct btrfs_file_extent_item *fi;
3273 struct extent_buffer *leaf; 3287 struct extent_buffer *leaf;
3288 struct btrfs_ordered_extent *ordered;
3274 struct list_head ordered_sums; 3289 struct list_head ordered_sums;
3275 struct btrfs_map_token token; 3290 struct btrfs_map_token token;
3276 struct btrfs_key key; 3291 struct btrfs_key key;
3277 u64 csum_offset = em->mod_start - em->start; 3292 u64 mod_start = em->mod_start;
3278 u64 csum_len = em->mod_len; 3293 u64 mod_len = em->mod_len;
3294 u64 csum_offset;
3295 u64 csum_len;
3279 u64 extent_offset = em->start - em->orig_start; 3296 u64 extent_offset = em->start - em->orig_start;
3280 u64 block_len; 3297 u64 block_len;
3281 int ret; 3298 int ret;
3299 int index = log->log_transid % 2;
3282 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3300 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3283 3301
3284 INIT_LIST_HEAD(&ordered_sums); 3302 INIT_LIST_HEAD(&ordered_sums);
@@ -3362,6 +3380,92 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3362 csum_len = block_len; 3380 csum_len = block_len;
3363 } 3381 }
3364 3382
3383 /*
3384 * First check and see if our csums are on our outstanding ordered
3385 * extents.
3386 */
3387again:
3388 spin_lock_irq(&log->log_extents_lock[index]);
3389 list_for_each_entry(ordered, &log->logged_list[index], log_list) {
3390 struct btrfs_ordered_sum *sum;
3391
3392 if (!mod_len)
3393 break;
3394
3395 if (ordered->inode != inode)
3396 continue;
3397
3398 if (ordered->file_offset + ordered->len <= mod_start ||
3399 mod_start + mod_len <= ordered->file_offset)
3400 continue;
3401
3402 /*
3403 * We are going to copy all the csums on this ordered extent, so
3404 * go ahead and adjust mod_start and mod_len in case this
3405 * ordered extent has already been logged.
3406 */
3407 if (ordered->file_offset > mod_start) {
3408 if (ordered->file_offset + ordered->len >=
3409 mod_start + mod_len)
3410 mod_len = ordered->file_offset - mod_start;
3411 /*
3412 * If we have this case
3413 *
3414 * |--------- logged extent ---------|
3415 * |----- ordered extent ----|
3416 *
3417 * Just don't mess with mod_start and mod_len, we'll
3418 * just end up logging more csums than we need and it
3419 * will be ok.
3420 */
3421 } else {
3422 if (ordered->file_offset + ordered->len <
3423 mod_start + mod_len) {
3424 mod_len = (mod_start + mod_len) -
3425 (ordered->file_offset + ordered->len);
3426 mod_start = ordered->file_offset +
3427 ordered->len;
3428 } else {
3429 mod_len = 0;
3430 }
3431 }
3432
3433 /*
3434 * To keep us from looping for the above case of an ordered
3435 * extent that falls inside of the logged extent.
3436 */
3437 if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM,
3438 &ordered->flags))
3439 continue;
3440 atomic_inc(&ordered->refs);
3441 spin_unlock_irq(&log->log_extents_lock[index]);
3442 /*
3443 * we've dropped the lock, we must either break or
3444 * start over after this.
3445 */
3446
3447 wait_event(ordered->wait, ordered->csum_bytes_left == 0);
3448
3449 list_for_each_entry(sum, &ordered->list, list) {
3450 ret = btrfs_csum_file_blocks(trans, log, sum);
3451 if (ret) {
3452 btrfs_put_ordered_extent(ordered);
3453 goto unlocked;
3454 }
3455 }
3456 btrfs_put_ordered_extent(ordered);
3457 goto again;
3458
3459 }
3460 spin_unlock_irq(&log->log_extents_lock[index]);
3461unlocked:
3462
3463 if (!mod_len || ret)
3464 return ret;
3465
3466 csum_offset = mod_start - em->start;
3467 csum_len = mod_len;
3468
3365 /* block start is already adjusted for the file extent offset. */ 3469 /* block start is already adjusted for the file extent offset. */
3366 ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 3470 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3367 em->block_start + csum_offset, 3471 em->block_start + csum_offset,
@@ -3393,6 +3497,7 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3393 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree; 3497 struct extent_map_tree *tree = &BTRFS_I(inode)->extent_tree;
3394 u64 test_gen; 3498 u64 test_gen;
3395 int ret = 0; 3499 int ret = 0;
3500 int num = 0;
3396 3501
3397 INIT_LIST_HEAD(&extents); 3502 INIT_LIST_HEAD(&extents);
3398 3503
@@ -3401,16 +3506,31 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3401 3506
3402 list_for_each_entry_safe(em, n, &tree->modified_extents, list) { 3507 list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
3403 list_del_init(&em->list); 3508 list_del_init(&em->list);
3509
3510 /*
3511 * Just an arbitrary number, this can be really CPU intensive
3512 * once we start getting a lot of extents, and really once we
3513 * have a bunch of extents we just want to commit since it will
3514 * be faster.
3515 */
3516 if (++num > 32768) {
3517 list_del_init(&tree->modified_extents);
3518 ret = -EFBIG;
3519 goto process;
3520 }
3521
3404 if (em->generation <= test_gen) 3522 if (em->generation <= test_gen)
3405 continue; 3523 continue;
3406 /* Need a ref to keep it from getting evicted from cache */ 3524 /* Need a ref to keep it from getting evicted from cache */
3407 atomic_inc(&em->refs); 3525 atomic_inc(&em->refs);
3408 set_bit(EXTENT_FLAG_LOGGING, &em->flags); 3526 set_bit(EXTENT_FLAG_LOGGING, &em->flags);
3409 list_add_tail(&em->list, &extents); 3527 list_add_tail(&em->list, &extents);
3528 num++;
3410 } 3529 }
3411 3530
3412 list_sort(NULL, &extents, extent_cmp); 3531 list_sort(NULL, &extents, extent_cmp);
3413 3532
3533process:
3414 while (!list_empty(&extents)) { 3534 while (!list_empty(&extents)) {
3415 em = list_entry(extents.next, struct extent_map, list); 3535 em = list_entry(extents.next, struct extent_map, list);
3416 3536
@@ -3513,6 +3633,8 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3513 3633
3514 mutex_lock(&BTRFS_I(inode)->log_mutex); 3634 mutex_lock(&BTRFS_I(inode)->log_mutex);
3515 3635
3636 btrfs_get_logged_extents(log, inode);
3637
3516 /* 3638 /*
3517 * a brute force approach to making sure we get the most uptodate 3639 * a brute force approach to making sure we get the most uptodate
3518 * copies of everything. 3640 * copies of everything.
@@ -3558,7 +3680,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
3558 while (1) { 3680 while (1) {
3559 ins_nr = 0; 3681 ins_nr = 0;
3560 ret = btrfs_search_forward(root, &min_key, &max_key, 3682 ret = btrfs_search_forward(root, &min_key, &max_key,
3561 path, 0, trans->transid); 3683 path, trans->transid);
3562 if (ret != 0) 3684 if (ret != 0)
3563 break; 3685 break;
3564again: 3686again:
@@ -3656,6 +3778,8 @@ log_extents:
3656 BTRFS_I(inode)->logged_trans = trans->transid; 3778 BTRFS_I(inode)->logged_trans = trans->transid;
3657 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; 3779 BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans;
3658out_unlock: 3780out_unlock:
3781 if (err)
3782 btrfs_free_logged_extents(log, log->log_transid);
3659 mutex_unlock(&BTRFS_I(inode)->log_mutex); 3783 mutex_unlock(&BTRFS_I(inode)->log_mutex);
3660 3784
3661 btrfs_free_path(path); 3785 btrfs_free_path(path);
@@ -3822,7 +3946,6 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
3822end_trans: 3946end_trans:
3823 dput(old_parent); 3947 dput(old_parent);
3824 if (ret < 0) { 3948 if (ret < 0) {
3825 WARN_ON(ret != -ENOSPC);
3826 root->fs_info->last_trans_log_full_commit = trans->transid; 3949 root->fs_info->last_trans_log_full_commit = trans->transid;
3827 ret = 1; 3950 ret = 1;
3828 } 3951 }
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5cbb7f4b1672..72b1cf1b2b5e 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -792,26 +792,76 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
792 return ret; 792 return ret;
793} 793}
794 794
795/*
796 * Look for a btrfs signature on a device. This may be called out of the mount path
797 * and we are not allowed to call set_blocksize during the scan. The superblock
798 * is read via pagecache
799 */
795int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, 800int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
796 struct btrfs_fs_devices **fs_devices_ret) 801 struct btrfs_fs_devices **fs_devices_ret)
797{ 802{
798 struct btrfs_super_block *disk_super; 803 struct btrfs_super_block *disk_super;
799 struct block_device *bdev; 804 struct block_device *bdev;
800 struct buffer_head *bh; 805 struct page *page;
801 int ret; 806 void *p;
807 int ret = -EINVAL;
802 u64 devid; 808 u64 devid;
803 u64 transid; 809 u64 transid;
804 u64 total_devices; 810 u64 total_devices;
811 u64 bytenr;
812 pgoff_t index;
805 813
814 /*
815 * we would like to check all the supers, but that would make
816 * a btrfs mount succeed after a mkfs from a different FS.
817 * So, we need to add a special mount option to scan for
818 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
819 */
820 bytenr = btrfs_sb_offset(0);
806 flags |= FMODE_EXCL; 821 flags |= FMODE_EXCL;
807 mutex_lock(&uuid_mutex); 822 mutex_lock(&uuid_mutex);
808 ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh); 823
809 if (ret) 824 bdev = blkdev_get_by_path(path, flags, holder);
825
826 if (IS_ERR(bdev)) {
827 ret = PTR_ERR(bdev);
828 printk(KERN_INFO "btrfs: open %s failed\n", path);
810 goto error; 829 goto error;
811 disk_super = (struct btrfs_super_block *)bh->b_data; 830 }
831
832 /* make sure our super fits in the device */
833 if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
834 goto error_bdev_put;
835
836 /* make sure our super fits in the page */
837 if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
838 goto error_bdev_put;
839
840 /* make sure our super doesn't straddle pages on disk */
841 index = bytenr >> PAGE_CACHE_SHIFT;
842 if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
843 goto error_bdev_put;
844
845 /* pull in the page with our super */
846 page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
847 index, GFP_NOFS);
848
849 if (IS_ERR_OR_NULL(page))
850 goto error_bdev_put;
851
852 p = kmap(page);
853
854 /* align our pointer to the offset of the super block */
855 disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
856
857 if (btrfs_super_bytenr(disk_super) != bytenr ||
858 disk_super->magic != cpu_to_le64(BTRFS_MAGIC))
859 goto error_unmap;
860
812 devid = btrfs_stack_device_id(&disk_super->dev_item); 861 devid = btrfs_stack_device_id(&disk_super->dev_item);
813 transid = btrfs_super_generation(disk_super); 862 transid = btrfs_super_generation(disk_super);
814 total_devices = btrfs_super_num_devices(disk_super); 863 total_devices = btrfs_super_num_devices(disk_super);
864
815 if (disk_super->label[0]) { 865 if (disk_super->label[0]) {
816 if (disk_super->label[BTRFS_LABEL_SIZE - 1]) 866 if (disk_super->label[BTRFS_LABEL_SIZE - 1])
817 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; 867 disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
@@ -819,12 +869,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
819 } else { 869 } else {
820 printk(KERN_INFO "device fsid %pU ", disk_super->fsid); 870 printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
821 } 871 }
872
822 printk(KERN_CONT "devid %llu transid %llu %s\n", 873 printk(KERN_CONT "devid %llu transid %llu %s\n",
823 (unsigned long long)devid, (unsigned long long)transid, path); 874 (unsigned long long)devid, (unsigned long long)transid, path);
875
824 ret = device_list_add(path, disk_super, devid, fs_devices_ret); 876 ret = device_list_add(path, disk_super, devid, fs_devices_ret);
825 if (!ret && fs_devices_ret) 877 if (!ret && fs_devices_ret)
826 (*fs_devices_ret)->total_devices = total_devices; 878 (*fs_devices_ret)->total_devices = total_devices;
827 brelse(bh); 879
880error_unmap:
881 kunmap(page);
882 page_cache_release(page);
883
884error_bdev_put:
828 blkdev_put(bdev, flags); 885 blkdev_put(bdev, flags);
829error: 886error:
830 mutex_unlock(&uuid_mutex); 887 mutex_unlock(&uuid_mutex);
@@ -1372,14 +1429,19 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1372 u64 devid; 1429 u64 devid;
1373 u64 num_devices; 1430 u64 num_devices;
1374 u8 *dev_uuid; 1431 u8 *dev_uuid;
1432 unsigned seq;
1375 int ret = 0; 1433 int ret = 0;
1376 bool clear_super = false; 1434 bool clear_super = false;
1377 1435
1378 mutex_lock(&uuid_mutex); 1436 mutex_lock(&uuid_mutex);
1379 1437
1380 all_avail = root->fs_info->avail_data_alloc_bits | 1438 do {
1381 root->fs_info->avail_system_alloc_bits | 1439 seq = read_seqbegin(&root->fs_info->profiles_lock);
1382 root->fs_info->avail_metadata_alloc_bits; 1440
1441 all_avail = root->fs_info->avail_data_alloc_bits |
1442 root->fs_info->avail_system_alloc_bits |
1443 root->fs_info->avail_metadata_alloc_bits;
1444 } while (read_seqretry(&root->fs_info->profiles_lock, seq));
1383 1445
1384 num_devices = root->fs_info->fs_devices->num_devices; 1446 num_devices = root->fs_info->fs_devices->num_devices;
1385 btrfs_dev_replace_lock(&root->fs_info->dev_replace); 1447 btrfs_dev_replace_lock(&root->fs_info->dev_replace);
@@ -2616,7 +2678,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
2616 chunk_used = btrfs_block_group_used(&cache->item); 2678 chunk_used = btrfs_block_group_used(&cache->item);
2617 2679
2618 if (bargs->usage == 0) 2680 if (bargs->usage == 0)
2619 user_thresh = 0; 2681 user_thresh = 1;
2620 else if (bargs->usage > 100) 2682 else if (bargs->usage > 100)
2621 user_thresh = cache->key.offset; 2683 user_thresh = cache->key.offset;
2622 else 2684 else
@@ -2985,6 +3047,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
2985 int mixed = 0; 3047 int mixed = 0;
2986 int ret; 3048 int ret;
2987 u64 num_devices; 3049 u64 num_devices;
3050 unsigned seq;
2988 3051
2989 if (btrfs_fs_closing(fs_info) || 3052 if (btrfs_fs_closing(fs_info) ||
2990 atomic_read(&fs_info->balance_pause_req) || 3053 atomic_read(&fs_info->balance_pause_req) ||
@@ -3068,22 +3131,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3068 /* allow to reduce meta or sys integrity only if force set */ 3131 /* allow to reduce meta or sys integrity only if force set */
3069 allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | 3132 allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
3070 BTRFS_BLOCK_GROUP_RAID10; 3133 BTRFS_BLOCK_GROUP_RAID10;
3071 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3134 do {
3072 (fs_info->avail_system_alloc_bits & allowed) && 3135 seq = read_seqbegin(&fs_info->profiles_lock);
3073 !(bctl->sys.target & allowed)) || 3136
3074 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && 3137 if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3075 (fs_info->avail_metadata_alloc_bits & allowed) && 3138 (fs_info->avail_system_alloc_bits & allowed) &&
3076 !(bctl->meta.target & allowed))) { 3139 !(bctl->sys.target & allowed)) ||
3077 if (bctl->flags & BTRFS_BALANCE_FORCE) { 3140 ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3078 printk(KERN_INFO "btrfs: force reducing metadata " 3141 (fs_info->avail_metadata_alloc_bits & allowed) &&
3079 "integrity\n"); 3142 !(bctl->meta.target & allowed))) {
3080 } else { 3143 if (bctl->flags & BTRFS_BALANCE_FORCE) {
3081 printk(KERN_ERR "btrfs: balance will reduce metadata " 3144 printk(KERN_INFO "btrfs: force reducing metadata "
3082 "integrity, use force if you want this\n"); 3145 "integrity\n");
3083 ret = -EINVAL; 3146 } else {
3084 goto out; 3147 printk(KERN_ERR "btrfs: balance will reduce metadata "
3148 "integrity, use force if you want this\n");
3149 ret = -EINVAL;
3150 goto out;
3151 }
3085 } 3152 }
3086 } 3153 } while (read_seqretry(&fs_info->profiles_lock, seq));
3087 3154
3088 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { 3155 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3089 int num_tolerated_disk_barrier_failures; 3156 int num_tolerated_disk_barrier_failures;
@@ -3127,6 +3194,11 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3127 mutex_lock(&fs_info->balance_mutex); 3194 mutex_lock(&fs_info->balance_mutex);
3128 atomic_dec(&fs_info->balance_running); 3195 atomic_dec(&fs_info->balance_running);
3129 3196
3197 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3198 fs_info->num_tolerated_disk_barrier_failures =
3199 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
3200 }
3201
3130 if (bargs) { 3202 if (bargs) {
3131 memset(bargs, 0, sizeof(*bargs)); 3203 memset(bargs, 0, sizeof(*bargs));
3132 update_ioctl_balance_args(fs_info, 0, bargs); 3204 update_ioctl_balance_args(fs_info, 0, bargs);
@@ -3137,11 +3209,6 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3137 __cancel_balance(fs_info); 3209 __cancel_balance(fs_info);
3138 } 3210 }
3139 3211
3140 if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
3141 fs_info->num_tolerated_disk_barrier_failures =
3142 btrfs_calc_num_tolerated_disk_barrier_failures(fs_info);
3143 }
3144
3145 wake_up(&fs_info->balance_wait_q); 3212 wake_up(&fs_info->balance_wait_q);
3146 3213
3147 return ret; 3214 return ret;
@@ -3504,13 +3571,48 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
3504} 3571}
3505 3572
3506struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = { 3573struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
3507 { 2, 1, 0, 4, 2, 2 /* raid10 */ }, 3574 [BTRFS_RAID_RAID10] = {
3508 { 1, 1, 2, 2, 2, 2 /* raid1 */ }, 3575 .sub_stripes = 2,
3509 { 1, 2, 1, 1, 1, 2 /* dup */ }, 3576 .dev_stripes = 1,
3510 { 1, 1, 0, 2, 1, 1 /* raid0 */ }, 3577 .devs_max = 0, /* 0 == as many as possible */
3511 { 1, 1, 1, 1, 1, 1 /* single */ }, 3578 .devs_min = 4,
3579 .devs_increment = 2,
3580 .ncopies = 2,
3581 },
3582 [BTRFS_RAID_RAID1] = {
3583 .sub_stripes = 1,
3584 .dev_stripes = 1,
3585 .devs_max = 2,
3586 .devs_min = 2,
3587 .devs_increment = 2,
3588 .ncopies = 2,
3589 },
3590 [BTRFS_RAID_DUP] = {
3591 .sub_stripes = 1,
3592 .dev_stripes = 2,
3593 .devs_max = 1,
3594 .devs_min = 1,
3595 .devs_increment = 1,
3596 .ncopies = 2,
3597 },
3598 [BTRFS_RAID_RAID0] = {
3599 .sub_stripes = 1,
3600 .dev_stripes = 1,
3601 .devs_max = 0,
3602 .devs_min = 2,
3603 .devs_increment = 1,
3604 .ncopies = 1,
3605 },
3606 [BTRFS_RAID_SINGLE] = {
3607 .sub_stripes = 1,
3608 .dev_stripes = 1,
3609 .devs_max = 1,
3610 .devs_min = 1,
3611 .devs_increment = 1,
3612 .ncopies = 1,
3613 },
3512}; 3614};
3513 3615
3514static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 3616static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3515 struct btrfs_root *extent_root, 3617 struct btrfs_root *extent_root,
3516 struct map_lookup **map_ret, 3618 struct map_lookup **map_ret,
@@ -3631,12 +3733,16 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3631 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes) 3733 if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
3632 continue; 3734 continue;
3633 3735
3736 if (ndevs == fs_devices->rw_devices) {
3737 WARN(1, "%s: found more than %llu devices\n",
3738 __func__, fs_devices->rw_devices);
3739 break;
3740 }
3634 devices_info[ndevs].dev_offset = dev_offset; 3741 devices_info[ndevs].dev_offset = dev_offset;
3635 devices_info[ndevs].max_avail = max_avail; 3742 devices_info[ndevs].max_avail = max_avail;
3636 devices_info[ndevs].total_avail = total_avail; 3743 devices_info[ndevs].total_avail = total_avail;
3637 devices_info[ndevs].dev = device; 3744 devices_info[ndevs].dev = device;
3638 ++ndevs; 3745 ++ndevs;
3639 WARN_ON(ndevs > fs_devices->rw_devices);
3640 } 3746 }
3641 3747
3642 /* 3748 /*
@@ -3718,15 +3824,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3718 write_lock(&em_tree->lock); 3824 write_lock(&em_tree->lock);
3719 ret = add_extent_mapping(em_tree, em); 3825 ret = add_extent_mapping(em_tree, em);
3720 write_unlock(&em_tree->lock); 3826 write_unlock(&em_tree->lock);
3721 free_extent_map(em); 3827 if (ret) {
3722 if (ret) 3828 free_extent_map(em);
3723 goto error;
3724
3725 ret = btrfs_make_block_group(trans, extent_root, 0, type,
3726 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3727 start, num_bytes);
3728 if (ret)
3729 goto error; 3829 goto error;
3830 }
3730 3831
3731 for (i = 0; i < map->num_stripes; ++i) { 3832 for (i = 0; i < map->num_stripes; ++i) {
3732 struct btrfs_device *device; 3833 struct btrfs_device *device;
@@ -3739,15 +3840,42 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
3739 info->chunk_root->root_key.objectid, 3840 info->chunk_root->root_key.objectid,
3740 BTRFS_FIRST_CHUNK_TREE_OBJECTID, 3841 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3741 start, dev_offset, stripe_size); 3842 start, dev_offset, stripe_size);
3742 if (ret) { 3843 if (ret)
3743 btrfs_abort_transaction(trans, extent_root, ret); 3844 goto error_dev_extent;
3744 goto error;
3745 }
3746 } 3845 }
3747 3846
3847 ret = btrfs_make_block_group(trans, extent_root, 0, type,
3848 BTRFS_FIRST_CHUNK_TREE_OBJECTID,
3849 start, num_bytes);
3850 if (ret) {
3851 i = map->num_stripes - 1;
3852 goto error_dev_extent;
3853 }
3854
3855 free_extent_map(em);
3748 kfree(devices_info); 3856 kfree(devices_info);
3749 return 0; 3857 return 0;
3750 3858
3859error_dev_extent:
3860 for (; i >= 0; i--) {
3861 struct btrfs_device *device;
3862 int err;
3863
3864 device = map->stripes[i].dev;
3865 err = btrfs_free_dev_extent(trans, device, start);
3866 if (err) {
3867 btrfs_abort_transaction(trans, extent_root, err);
3868 break;
3869 }
3870 }
3871 write_lock(&em_tree->lock);
3872 remove_extent_mapping(em_tree, em);
3873 write_unlock(&em_tree->lock);
3874
3875 /* One for our allocation */
3876 free_extent_map(em);
3877 /* One for the tree reference */
3878 free_extent_map(em);
3751error: 3879error:
3752 kfree(map); 3880 kfree(map);
3753 kfree(devices_info); 3881 kfree(devices_info);
@@ -3887,10 +4015,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
3887 if (ret) 4015 if (ret)
3888 return ret; 4016 return ret;
3889 4017
3890 alloc_profile = BTRFS_BLOCK_GROUP_METADATA | 4018 alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
3891 fs_info->avail_metadata_alloc_bits;
3892 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
3893
3894 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size, 4019 ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
3895 &stripe_size, chunk_offset, alloc_profile); 4020 &stripe_size, chunk_offset, alloc_profile);
3896 if (ret) 4021 if (ret)
@@ -3898,10 +4023,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
3898 4023
3899 sys_chunk_offset = chunk_offset + chunk_size; 4024 sys_chunk_offset = chunk_offset + chunk_size;
3900 4025
3901 alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM | 4026 alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
3902 fs_info->avail_system_alloc_bits;
3903 alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
3904
3905 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map, 4027 ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
3906 &sys_chunk_size, &sys_stripe_size, 4028 &sys_chunk_size, &sys_stripe_size,
3907 sys_chunk_offset, alloc_profile); 4029 sys_chunk_offset, alloc_profile);
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index d3c3939ac751..12bb84166a5f 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -21,8 +21,8 @@
21 21
22#include <linux/bio.h> 22#include <linux/bio.h>
23#include <linux/sort.h> 23#include <linux/sort.h>
24#include <linux/btrfs.h>
24#include "async-thread.h" 25#include "async-thread.h"
25#include "ioctl.h"
26 26
27#define BTRFS_STRIPE_LEN (64 * 1024) 27#define BTRFS_STRIPE_LEN (64 * 1024)
28 28
diff --git a/include/linux/btrfs.h b/include/linux/btrfs.h
new file mode 100644
index 000000000000..22d799147db2
--- /dev/null
+++ b/include/linux/btrfs.h
@@ -0,0 +1,6 @@
1#ifndef _LINUX_BTRFS_H
2#define _LINUX_BTRFS_H
3
4#include <uapi/linux/btrfs.h>
5
6#endif /* _LINUX_BTRFS_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 4e67194fd2c3..5c8a1d25e21c 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -68,6 +68,7 @@ header-y += blkpg.h
68header-y += blktrace_api.h 68header-y += blktrace_api.h
69header-y += bpqether.h 69header-y += bpqether.h
70header-y += bsg.h 70header-y += bsg.h
71header-y += btrfs.h
71header-y += can.h 72header-y += can.h
72header-y += capability.h 73header-y += capability.h
73header-y += capi.h 74header-y += capi.h
diff --git a/fs/btrfs/ioctl.h b/include/uapi/linux/btrfs.h
index dabca9cc8c2e..fa3a5f9338fc 100644
--- a/fs/btrfs/ioctl.h
+++ b/include/uapi/linux/btrfs.h
@@ -16,8 +16,9 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#ifndef __IOCTL_ 19#ifndef _UAPI_LINUX_BTRFS_H
20#define __IOCTL_ 20#define _UAPI_LINUX_BTRFS_H
21#include <linux/types.h>
21#include <linux/ioctl.h> 22#include <linux/ioctl.h>
22 23
23#define BTRFS_IOCTL_MAGIC 0x94 24#define BTRFS_IOCTL_MAGIC 0x94
@@ -406,6 +407,13 @@ struct btrfs_ioctl_received_subvol_args {
406 __u64 reserved[16]; /* in */ 407 __u64 reserved[16]; /* in */
407}; 408};
408 409
410/*
411 * Caller doesn't want file data in the send stream, even if the
412 * search of clone sources doesn't find an extent. UPDATE_EXTENT
413 * commands will be sent instead of WRITE commands.
414 */
415#define BTRFS_SEND_FLAG_NO_FILE_DATA 0x1
416
409struct btrfs_ioctl_send_args { 417struct btrfs_ioctl_send_args {
410 __s64 send_fd; /* in */ 418 __s64 send_fd; /* in */
411 __u64 clone_sources_count; /* in */ 419 __u64 clone_sources_count; /* in */
@@ -494,9 +502,13 @@ struct btrfs_ioctl_send_args {
494 struct btrfs_ioctl_qgroup_create_args) 502 struct btrfs_ioctl_qgroup_create_args)
495#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \ 503#define BTRFS_IOC_QGROUP_LIMIT _IOR(BTRFS_IOCTL_MAGIC, 43, \
496 struct btrfs_ioctl_qgroup_limit_args) 504 struct btrfs_ioctl_qgroup_limit_args)
505#define BTRFS_IOC_GET_FSLABEL _IOR(BTRFS_IOCTL_MAGIC, 49, \
506 char[BTRFS_LABEL_SIZE])
507#define BTRFS_IOC_SET_FSLABEL _IOW(BTRFS_IOCTL_MAGIC, 50, \
508 char[BTRFS_LABEL_SIZE])
497#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \ 509#define BTRFS_IOC_GET_DEV_STATS _IOWR(BTRFS_IOCTL_MAGIC, 52, \
498 struct btrfs_ioctl_get_dev_stats) 510 struct btrfs_ioctl_get_dev_stats)
499#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \ 511#define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
500 struct btrfs_ioctl_dev_replace_args) 512 struct btrfs_ioctl_dev_replace_args)
501 513
502#endif 514#endif /* _UAPI_LINUX_BTRFS_H */