aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-10-30 12:05:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-10-30 12:05:48 -0400
commit925d169f5b86fe57e2f5264ea574cce9a89b719d (patch)
tree241d3156b427c6398bd3fc5efa9108635d0e189b
parentcdf01dd5443d0befc8c6a32cb2e3d2f568fd2558 (diff)
parent6418c96107a2b399848bb8cfc6e29f11ca74fb94 (diff)
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (39 commits) Btrfs: deal with errors from updating the tree log Btrfs: allow subvol deletion by unprivileged user with -o user_subvol_rm_allowed Btrfs: make SNAP_DESTROY async Btrfs: add SNAP_CREATE_ASYNC ioctl Btrfs: add START_SYNC, WAIT_SYNC ioctls Btrfs: async transaction commit Btrfs: fix deadlock in btrfs_commit_transaction Btrfs: fix lockdep warning on clone ioctl Btrfs: fix clone ioctl where range is adjacent to extent Btrfs: fix delalloc checks in clone ioctl Btrfs: drop unused variable in block_alloc_rsv Btrfs: cleanup warnings from gcc 4.6 (nonbugs) Btrfs: Fix variables set but not read (bugs found by gcc 4.6) Btrfs: Use ERR_CAST helpers Btrfs: use memdup_user helpers Btrfs: fix raid code for removing missing drives Btrfs: Switch the extent buffer rbtree into a radix tree Btrfs: restructure try_release_extent_buffer() Btrfs: use the flusher threads for delalloc throttling Btrfs: tune the chunk allocation to 5% of the FS as metadata ... Fix up trivial conflicts in fs/btrfs/super.c and fs/fs-writeback.c, and remove use of INIT_RCU_HEAD in fs/btrfs/extent_io.c (that init macro was useless and removed in commit 5e8067adfdba: "rcu head remove init")
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c57
-rw-r--r--fs/btrfs/ctree.h100
-rw-r--r--fs/btrfs/dir-item.c2
-rw-r--r--fs/btrfs/disk-io.c32
-rw-r--r--fs/btrfs/extent-tree.c694
-rw-r--r--fs/btrfs/extent_io.c168
-rw-r--r--fs/btrfs/extent_io.h4
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/free-space-cache.c751
-rw-r--r--fs/btrfs/free-space-cache.h18
-rw-r--r--fs/btrfs/inode.c202
-rw-r--r--fs/btrfs/ioctl.c398
-rw-r--r--fs/btrfs/ioctl.h13
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c109
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/super.c42
-rw-r--r--fs/btrfs/transaction.c234
-rw-r--r--fs/btrfs/transaction.h8
-rw-r--r--fs/btrfs/tree-defrag.c2
-rw-r--r--fs/btrfs/tree-log.c17
-rw-r--r--fs/btrfs/volumes.c7
-rw-r--r--fs/btrfs/xattr.c2
-rw-r--r--fs/btrfs/zlib.c5
-rw-r--r--fs/fs-writeback.c47
-rw-r--r--include/linux/writeback.h2
27 files changed, 2405 insertions, 519 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 396039b3a8a2..7845d1f7d1d9 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -163,7 +163,6 @@ fail:
163 */ 163 */
164static void end_compressed_bio_read(struct bio *bio, int err) 164static void end_compressed_bio_read(struct bio *bio, int err)
165{ 165{
166 struct extent_io_tree *tree;
167 struct compressed_bio *cb = bio->bi_private; 166 struct compressed_bio *cb = bio->bi_private;
168 struct inode *inode; 167 struct inode *inode;
169 struct page *page; 168 struct page *page;
@@ -187,7 +186,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
187 /* ok, we're the last bio for this extent, lets start 186 /* ok, we're the last bio for this extent, lets start
188 * the decompression. 187 * the decompression.
189 */ 188 */
190 tree = &BTRFS_I(inode)->io_tree;
191 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages, 189 ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
192 cb->start, 190 cb->start,
193 cb->orig_bio->bi_io_vec, 191 cb->orig_bio->bi_io_vec,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3df14ce2cc2..9ac171599258 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
200 struct extent_buffer **cow_ret, u64 new_root_objectid) 200 struct extent_buffer **cow_ret, u64 new_root_objectid)
201{ 201{
202 struct extent_buffer *cow; 202 struct extent_buffer *cow;
203 u32 nritems;
204 int ret = 0; 203 int ret = 0;
205 int level; 204 int level;
206 struct btrfs_disk_key disk_key; 205 struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
210 WARN_ON(root->ref_cows && trans->transid != root->last_trans); 209 WARN_ON(root->ref_cows && trans->transid != root->last_trans);
211 210
212 level = btrfs_header_level(buf); 211 level = btrfs_header_level(buf);
213 nritems = btrfs_header_nritems(buf);
214 if (level == 0) 212 if (level == 0)
215 btrfs_item_key(buf, &disk_key, 0); 213 btrfs_item_key(buf, &disk_key, 0);
216 else 214 else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1008 int wret; 1006 int wret;
1009 int pslot; 1007 int pslot;
1010 int orig_slot = path->slots[level]; 1008 int orig_slot = path->slots[level];
1011 int err_on_enospc = 0;
1012 u64 orig_ptr; 1009 u64 orig_ptr;
1013 1010
1014 if (level == 0) 1011 if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1071 BTRFS_NODEPTRS_PER_BLOCK(root) / 4) 1068 BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
1072 return 0; 1069 return 0;
1073 1070
1074 if (btrfs_header_nritems(mid) < 2) 1071 btrfs_header_nritems(mid);
1075 err_on_enospc = 1;
1076 1072
1077 left = read_node_slot(root, parent, pslot - 1); 1073 left = read_node_slot(root, parent, pslot - 1);
1078 if (left) { 1074 if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1103 wret = push_node_left(trans, root, left, mid, 1); 1099 wret = push_node_left(trans, root, left, mid, 1);
1104 if (wret < 0) 1100 if (wret < 0)
1105 ret = wret; 1101 ret = wret;
1106 if (btrfs_header_nritems(mid) < 2) 1102 btrfs_header_nritems(mid);
1107 err_on_enospc = 1;
1108 } 1103 }
1109 1104
1110 /* 1105 /*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
1224 int wret; 1219 int wret;
1225 int pslot; 1220 int pslot;
1226 int orig_slot = path->slots[level]; 1221 int orig_slot = path->slots[level];
1227 u64 orig_ptr;
1228 1222
1229 if (level == 0) 1223 if (level == 0)
1230 return 1; 1224 return 1;
1231 1225
1232 mid = path->nodes[level]; 1226 mid = path->nodes[level];
1233 WARN_ON(btrfs_header_generation(mid) != trans->transid); 1227 WARN_ON(btrfs_header_generation(mid) != trans->transid);
1234 orig_ptr = btrfs_node_blockptr(mid, orig_slot);
1235 1228
1236 if (level < BTRFS_MAX_LEVEL - 1) 1229 if (level < BTRFS_MAX_LEVEL - 1)
1237 parent = path->nodes[level + 1]; 1230 parent = path->nodes[level + 1];
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1577 blocksize = btrfs_level_size(root, level - 1); 1570 blocksize = btrfs_level_size(root, level - 1);
1578 1571
1579 tmp = btrfs_find_tree_block(root, blocknr, blocksize); 1572 tmp = btrfs_find_tree_block(root, blocknr, blocksize);
1580 if (tmp && btrfs_buffer_uptodate(tmp, gen)) { 1573 if (tmp) {
1581 /* 1574 if (btrfs_buffer_uptodate(tmp, 0)) {
1582 * we found an up to date block without sleeping, return 1575 if (btrfs_buffer_uptodate(tmp, gen)) {
1583 * right away 1576 /*
1584 */ 1577 * we found an up to date block without
1585 *eb_ret = tmp; 1578 * sleeping, return
1586 return 0; 1579 * right away
1580 */
1581 *eb_ret = tmp;
1582 return 0;
1583 }
1584 /* the pages were up to date, but we failed
1585 * the generation number check. Do a full
1586 * read for the generation number that is correct.
1587 * We must do this without dropping locks so
1588 * we can trust our generation number
1589 */
1590 free_extent_buffer(tmp);
1591 tmp = read_tree_block(root, blocknr, blocksize, gen);
1592 if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
1593 *eb_ret = tmp;
1594 return 0;
1595 }
1596 free_extent_buffer(tmp);
1597 btrfs_release_path(NULL, p);
1598 return -EIO;
1599 }
1587 } 1600 }
1588 1601
1589 /* 1602 /*
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
1596 btrfs_unlock_up_safe(p, level + 1); 1609 btrfs_unlock_up_safe(p, level + 1);
1597 btrfs_set_path_blocking(p); 1610 btrfs_set_path_blocking(p);
1598 1611
1599 if (tmp) 1612 free_extent_buffer(tmp);
1600 free_extent_buffer(tmp);
1601 if (p->reada) 1613 if (p->reada)
1602 reada_for_search(root, p, level, slot, key->objectid); 1614 reada_for_search(root, p, level, slot, key->objectid);
1603 1615
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2548{ 2560{
2549 struct btrfs_disk_key disk_key; 2561 struct btrfs_disk_key disk_key;
2550 struct extent_buffer *right = path->nodes[0]; 2562 struct extent_buffer *right = path->nodes[0];
2551 int slot;
2552 int i; 2563 int i;
2553 int push_space = 0; 2564 int push_space = 0;
2554 int push_items = 0; 2565 int push_items = 0;
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
2560 u32 this_item_size; 2571 u32 this_item_size;
2561 u32 old_left_item_size; 2572 u32 old_left_item_size;
2562 2573
2563 slot = path->slots[1];
2564
2565 if (empty) 2574 if (empty)
2566 nr = min(right_nritems, max_slot); 2575 nr = min(right_nritems, max_slot);
2567 else 2576 else
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3330{ 3339{
3331 int ret = 0; 3340 int ret = 0;
3332 int slot; 3341 int slot;
3333 int slot_orig;
3334 struct extent_buffer *leaf; 3342 struct extent_buffer *leaf;
3335 struct btrfs_item *item; 3343 struct btrfs_item *item;
3336 u32 nritems; 3344 u32 nritems;
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
3340 unsigned int size_diff; 3348 unsigned int size_diff;
3341 int i; 3349 int i;
3342 3350
3343 slot_orig = path->slots[0];
3344 leaf = path->nodes[0]; 3351 leaf = path->nodes[0];
3345 slot = path->slots[0]; 3352 slot = path->slots[0];
3346 3353
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3445{ 3452{
3446 int ret = 0; 3453 int ret = 0;
3447 int slot; 3454 int slot;
3448 int slot_orig;
3449 struct extent_buffer *leaf; 3455 struct extent_buffer *leaf;
3450 struct btrfs_item *item; 3456 struct btrfs_item *item;
3451 u32 nritems; 3457 u32 nritems;
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
3454 unsigned int old_size; 3460 unsigned int old_size;
3455 int i; 3461 int i;
3456 3462
3457 slot_orig = path->slots[0];
3458 leaf = path->nodes[0]; 3463 leaf = path->nodes[0];
3459 3464
3460 nritems = btrfs_header_nritems(leaf); 3465 nritems = btrfs_header_nritems(leaf);
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3787 struct btrfs_key *cpu_key, u32 *data_size, 3792 struct btrfs_key *cpu_key, u32 *data_size,
3788 int nr) 3793 int nr)
3789{ 3794{
3790 struct extent_buffer *leaf;
3791 int ret = 0; 3795 int ret = 0;
3792 int slot; 3796 int slot;
3793 int i; 3797 int i;
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
3804 if (ret < 0) 3808 if (ret < 0)
3805 goto out; 3809 goto out;
3806 3810
3807 leaf = path->nodes[0];
3808 slot = path->slots[0]; 3811 slot = path->slots[0];
3809 BUG_ON(slot < 0); 3812 BUG_ON(slot < 0);
3810 3813
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eaf286abad17..8db9234f6b41 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
99 */ 99 */
100#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL 100#define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
101 101
102/* For storing free space cache */
103#define BTRFS_FREE_SPACE_OBJECTID -11ULL
104
102/* dummy objectid represents multiple objectids */ 105/* dummy objectid represents multiple objectids */
103#define BTRFS_MULTIPLE_OBJECTIDS -255ULL 106#define BTRFS_MULTIPLE_OBJECTIDS -255ULL
104 107
@@ -265,6 +268,22 @@ struct btrfs_chunk {
265 /* additional stripes go here */ 268 /* additional stripes go here */
266} __attribute__ ((__packed__)); 269} __attribute__ ((__packed__));
267 270
271#define BTRFS_FREE_SPACE_EXTENT 1
272#define BTRFS_FREE_SPACE_BITMAP 2
273
274struct btrfs_free_space_entry {
275 __le64 offset;
276 __le64 bytes;
277 u8 type;
278} __attribute__ ((__packed__));
279
280struct btrfs_free_space_header {
281 struct btrfs_disk_key location;
282 __le64 generation;
283 __le64 num_entries;
284 __le64 num_bitmaps;
285} __attribute__ ((__packed__));
286
268static inline unsigned long btrfs_chunk_item_size(int num_stripes) 287static inline unsigned long btrfs_chunk_item_size(int num_stripes)
269{ 288{
270 BUG_ON(num_stripes == 0); 289 BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
365 384
366 char label[BTRFS_LABEL_SIZE]; 385 char label[BTRFS_LABEL_SIZE];
367 386
387 __le64 cache_generation;
388
368 /* future expansion */ 389 /* future expansion */
369 __le64 reserved[32]; 390 __le64 reserved[31];
370 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; 391 u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
371} __attribute__ ((__packed__)); 392} __attribute__ ((__packed__));
372 393
@@ -375,13 +396,15 @@ struct btrfs_super_block {
375 * ones specified below then we will fail to mount 396 * ones specified below then we will fail to mount
376 */ 397 */
377#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 398#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
378#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0) 399#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1)
400#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2)
379 401
380#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 402#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
381#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 403#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
382#define BTRFS_FEATURE_INCOMPAT_SUPP \ 404#define BTRFS_FEATURE_INCOMPAT_SUPP \
383 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 405 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
384 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL) 406 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
407 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
385 408
386/* 409/*
387 * A leaf is full of items. offset and size tell us where to find 410 * A leaf is full of items. offset and size tell us where to find
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
675struct btrfs_space_info { 698struct btrfs_space_info {
676 u64 flags; 699 u64 flags;
677 700
678 u64 total_bytes; /* total bytes in the space */ 701 u64 total_bytes; /* total bytes in the space,
702 this doesn't take mirrors into account */
679 u64 bytes_used; /* total bytes used, 703 u64 bytes_used; /* total bytes used,
680 this does't take mirrors into account */ 704 this does't take mirrors into account */
681 u64 bytes_pinned; /* total bytes pinned, will be freed when the 705 u64 bytes_pinned; /* total bytes pinned, will be freed when the
@@ -687,6 +711,8 @@ struct btrfs_space_info {
687 u64 bytes_may_use; /* number of bytes that may be used for 711 u64 bytes_may_use; /* number of bytes that may be used for
688 delalloc/allocations */ 712 delalloc/allocations */
689 u64 disk_used; /* total bytes used on disk */ 713 u64 disk_used; /* total bytes used on disk */
714 u64 disk_total; /* total bytes on disk, takes mirrors into
715 account */
690 716
691 int full; /* indicates that we cannot allocate any more 717 int full; /* indicates that we cannot allocate any more
692 chunks for this space */ 718 chunks for this space */
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
750 BTRFS_CACHE_FINISHED = 2, 776 BTRFS_CACHE_FINISHED = 2,
751}; 777};
752 778
779enum btrfs_disk_cache_state {
780 BTRFS_DC_WRITTEN = 0,
781 BTRFS_DC_ERROR = 1,
782 BTRFS_DC_CLEAR = 2,
783 BTRFS_DC_SETUP = 3,
784 BTRFS_DC_NEED_WRITE = 4,
785};
786
753struct btrfs_caching_control { 787struct btrfs_caching_control {
754 struct list_head list; 788 struct list_head list;
755 struct mutex mutex; 789 struct mutex mutex;
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
763 struct btrfs_key key; 797 struct btrfs_key key;
764 struct btrfs_block_group_item item; 798 struct btrfs_block_group_item item;
765 struct btrfs_fs_info *fs_info; 799 struct btrfs_fs_info *fs_info;
800 struct inode *inode;
766 spinlock_t lock; 801 spinlock_t lock;
767 u64 pinned; 802 u64 pinned;
768 u64 reserved; 803 u64 reserved;
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
773 int extents_thresh; 808 int extents_thresh;
774 int free_extents; 809 int free_extents;
775 int total_bitmaps; 810 int total_bitmaps;
776 int ro; 811 int ro:1;
777 int dirty; 812 int dirty:1;
813 int iref:1;
814
815 int disk_cache_state;
778 816
779 /* cache tracking stuff */ 817 /* cache tracking stuff */
780 int cached; 818 int cached;
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
863 struct btrfs_transaction *running_transaction; 901 struct btrfs_transaction *running_transaction;
864 wait_queue_head_t transaction_throttle; 902 wait_queue_head_t transaction_throttle;
865 wait_queue_head_t transaction_wait; 903 wait_queue_head_t transaction_wait;
904 wait_queue_head_t transaction_blocked_wait;
866 wait_queue_head_t async_submit_wait; 905 wait_queue_head_t async_submit_wait;
867 906
868 struct btrfs_super_block super_copy; 907 struct btrfs_super_block super_copy;
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
949 struct btrfs_workers endio_meta_workers; 988 struct btrfs_workers endio_meta_workers;
950 struct btrfs_workers endio_meta_write_workers; 989 struct btrfs_workers endio_meta_write_workers;
951 struct btrfs_workers endio_write_workers; 990 struct btrfs_workers endio_write_workers;
991 struct btrfs_workers endio_freespace_worker;
952 struct btrfs_workers submit_workers; 992 struct btrfs_workers submit_workers;
953 /* 993 /*
954 * fixup workers take dirty pages that didn't properly go through 994 * fixup workers take dirty pages that didn't properly go through
@@ -1192,6 +1232,9 @@ struct btrfs_root {
1192#define BTRFS_MOUNT_NOSSD (1 << 9) 1232#define BTRFS_MOUNT_NOSSD (1 << 9)
1193#define BTRFS_MOUNT_DISCARD (1 << 10) 1233#define BTRFS_MOUNT_DISCARD (1 << 10)
1194#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11) 1234#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
1235#define BTRFS_MOUNT_SPACE_CACHE (1 << 12)
1236#define BTRFS_MOUNT_CLEAR_CACHE (1 << 13)
1237#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
1195 1238
1196#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1239#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
1197#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1240#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
1665 write_eb_member(eb, item, struct btrfs_dir_item, location, key); 1708 write_eb_member(eb, item, struct btrfs_dir_item, location, key);
1666} 1709}
1667 1710
1711BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
1712 num_entries, 64);
1713BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
1714 num_bitmaps, 64);
1715BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
1716 generation, 64);
1717
1718static inline void btrfs_free_space_key(struct extent_buffer *eb,
1719 struct btrfs_free_space_header *h,
1720 struct btrfs_disk_key *key)
1721{
1722 read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
1723}
1724
1725static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
1726 struct btrfs_free_space_header *h,
1727 struct btrfs_disk_key *key)
1728{
1729 write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
1730}
1731
1668/* struct btrfs_disk_key */ 1732/* struct btrfs_disk_key */
1669BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, 1733BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
1670 objectid, 64); 1734 objectid, 64);
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
1876 incompat_flags, 64); 1940 incompat_flags, 64);
1877BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, 1941BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
1878 csum_type, 16); 1942 csum_type, 16);
1943BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
1944 cache_generation, 64);
1879 1945
1880static inline int btrfs_super_csum_size(struct btrfs_super_block *s) 1946static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
1881{ 1947{
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
1988 return file->f_path.dentry; 2054 return file->f_path.dentry;
1989} 2055}
1990 2056
2057static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
2058{
2059 return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
2060 (space_info->flags & BTRFS_BLOCK_GROUP_DATA));
2061}
2062
1991/* extent-tree.c */ 2063/* extent-tree.c */
1992void btrfs_put_block_group(struct btrfs_block_group_cache *cache); 2064void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1993int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2065int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
2079void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); 2151void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
2080int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 2152int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
2081 struct btrfs_root *root, 2153 struct btrfs_root *root,
2082 int num_items, int *retries); 2154 int num_items);
2083void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 2155void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
2084 struct btrfs_root *root); 2156 struct btrfs_root *root);
2085int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 2157int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
2100int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 2172int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
2101 struct btrfs_root *root, 2173 struct btrfs_root *root,
2102 struct btrfs_block_rsv *block_rsv, 2174 struct btrfs_block_rsv *block_rsv,
2103 u64 num_bytes, int *retries); 2175 u64 num_bytes);
2104int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 2176int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
2105 struct btrfs_root *root, 2177 struct btrfs_root *root,
2106 struct btrfs_block_rsv *block_rsv, 2178 struct btrfs_block_rsv *block_rsv,
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
2115 struct btrfs_block_group_cache *cache); 2187 struct btrfs_block_group_cache *cache);
2116int btrfs_set_block_group_rw(struct btrfs_root *root, 2188int btrfs_set_block_group_rw(struct btrfs_root *root,
2117 struct btrfs_block_group_cache *cache); 2189 struct btrfs_block_group_cache *cache);
2190void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
2118/* ctree.c */ 2191/* ctree.c */
2119int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, 2192int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
2120 int level, int *slot); 2193 int level, int *slot);
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2373 u32 min_type); 2446 u32 min_type);
2374 2447
2375int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2448int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2376int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); 2449int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
2450 int sync);
2377int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, 2451int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2378 struct extent_state **cached_state); 2452 struct extent_state **cached_state);
2379int btrfs_writepages(struct address_space *mapping, 2453int btrfs_writepages(struct address_space *mapping,
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
2426int btrfs_prealloc_file_range(struct inode *inode, int mode, 2500int btrfs_prealloc_file_range(struct inode *inode, int mode,
2427 u64 start, u64 num_bytes, u64 min_size, 2501 u64 start, u64 num_bytes, u64 min_size,
2428 loff_t actual_len, u64 *alloc_hint); 2502 loff_t actual_len, u64 *alloc_hint);
2503int btrfs_prealloc_file_range_trans(struct inode *inode,
2504 struct btrfs_trans_handle *trans, int mode,
2505 u64 start, u64 num_bytes, u64 min_size,
2506 loff_t actual_len, u64 *alloc_hint);
2429extern const struct dentry_operations btrfs_dentry_operations; 2507extern const struct dentry_operations btrfs_dentry_operations;
2430 2508
2431/* ioctl.c */ 2509/* ioctl.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index e9103b3baa49..f0cad5ae5be7 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
427 ret = btrfs_truncate_item(trans, root, path, 427 ret = btrfs_truncate_item(trans, root, path,
428 item_len - sub_item_len, 1); 428 item_len - sub_item_len, 1);
429 } 429 }
430 return 0; 430 return ret;
431} 431}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5e789f4a3ed0..fb827d0d7181 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -338,7 +338,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
338 struct extent_io_tree *tree; 338 struct extent_io_tree *tree;
339 u64 start = (u64)page->index << PAGE_CACHE_SHIFT; 339 u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
340 u64 found_start; 340 u64 found_start;
341 int found_level;
342 unsigned long len; 341 unsigned long len;
343 struct extent_buffer *eb; 342 struct extent_buffer *eb;
344 int ret; 343 int ret;
@@ -369,8 +368,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
369 WARN_ON(1); 368 WARN_ON(1);
370 goto err; 369 goto err;
371 } 370 }
372 found_level = btrfs_header_level(eb);
373
374 csum_tree_block(root, eb, 0); 371 csum_tree_block(root, eb, 0);
375err: 372err:
376 free_extent_buffer(eb); 373 free_extent_buffer(eb);
@@ -481,9 +478,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
481 end_io_wq->work.flags = 0; 478 end_io_wq->work.flags = 0;
482 479
483 if (bio->bi_rw & REQ_WRITE) { 480 if (bio->bi_rw & REQ_WRITE) {
484 if (end_io_wq->metadata) 481 if (end_io_wq->metadata == 1)
485 btrfs_queue_worker(&fs_info->endio_meta_write_workers, 482 btrfs_queue_worker(&fs_info->endio_meta_write_workers,
486 &end_io_wq->work); 483 &end_io_wq->work);
484 else if (end_io_wq->metadata == 2)
485 btrfs_queue_worker(&fs_info->endio_freespace_worker,
486 &end_io_wq->work);
487 else 487 else
488 btrfs_queue_worker(&fs_info->endio_write_workers, 488 btrfs_queue_worker(&fs_info->endio_write_workers,
489 &end_io_wq->work); 489 &end_io_wq->work);
@@ -497,6 +497,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
497 } 497 }
498} 498}
499 499
500/*
501 * For the metadata arg you want
502 *
503 * 0 - if data
504 * 1 - if normal metadta
505 * 2 - if writing to the free space cache area
506 */
500int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, 507int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
501 int metadata) 508 int metadata)
502{ 509{
@@ -533,11 +540,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
533 540
534static void run_one_async_start(struct btrfs_work *work) 541static void run_one_async_start(struct btrfs_work *work)
535{ 542{
536 struct btrfs_fs_info *fs_info;
537 struct async_submit_bio *async; 543 struct async_submit_bio *async;
538 544
539 async = container_of(work, struct async_submit_bio, work); 545 async = container_of(work, struct async_submit_bio, work);
540 fs_info = BTRFS_I(async->inode)->root->fs_info;
541 async->submit_bio_start(async->inode, async->rw, async->bio, 546 async->submit_bio_start(async->inode, async->rw, async->bio,
542 async->mirror_num, async->bio_flags, 547 async->mirror_num, async->bio_flags,
543 async->bio_offset); 548 async->bio_offset);
@@ -850,12 +855,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
850 u32 blocksize, u64 parent_transid) 855 u32 blocksize, u64 parent_transid)
851{ 856{
852 struct extent_buffer *buf = NULL; 857 struct extent_buffer *buf = NULL;
853 struct inode *btree_inode = root->fs_info->btree_inode;
854 struct extent_io_tree *io_tree;
855 int ret; 858 int ret;
856 859
857 io_tree = &BTRFS_I(btree_inode)->io_tree;
858
859 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 860 buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
860 if (!buf) 861 if (!buf)
861 return NULL; 862 return NULL;
@@ -1377,7 +1378,6 @@ static int bio_ready_for_csum(struct bio *bio)
1377 u64 start = 0; 1378 u64 start = 0;
1378 struct page *page; 1379 struct page *page;
1379 struct extent_io_tree *io_tree = NULL; 1380 struct extent_io_tree *io_tree = NULL;
1380 struct btrfs_fs_info *info = NULL;
1381 struct bio_vec *bvec; 1381 struct bio_vec *bvec;
1382 int i; 1382 int i;
1383 int ret; 1383 int ret;
@@ -1396,7 +1396,6 @@ static int bio_ready_for_csum(struct bio *bio)
1396 buf_len = page->private >> 2; 1396 buf_len = page->private >> 2;
1397 start = page_offset(page) + bvec->bv_offset; 1397 start = page_offset(page) + bvec->bv_offset;
1398 io_tree = &BTRFS_I(page->mapping->host)->io_tree; 1398 io_tree = &BTRFS_I(page->mapping->host)->io_tree;
1399 info = BTRFS_I(page->mapping->host)->root->fs_info;
1400 } 1399 }
1401 /* are we fully contained in this bio? */ 1400 /* are we fully contained in this bio? */
1402 if (buf_len <= length) 1401 if (buf_len <= length)
@@ -1680,12 +1679,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1680 1679
1681 init_waitqueue_head(&fs_info->transaction_throttle); 1680 init_waitqueue_head(&fs_info->transaction_throttle);
1682 init_waitqueue_head(&fs_info->transaction_wait); 1681 init_waitqueue_head(&fs_info->transaction_wait);
1682 init_waitqueue_head(&fs_info->transaction_blocked_wait);
1683 init_waitqueue_head(&fs_info->async_submit_wait); 1683 init_waitqueue_head(&fs_info->async_submit_wait);
1684 1684
1685 __setup_root(4096, 4096, 4096, 4096, tree_root, 1685 __setup_root(4096, 4096, 4096, 4096, tree_root,
1686 fs_info, BTRFS_ROOT_TREE_OBJECTID); 1686 fs_info, BTRFS_ROOT_TREE_OBJECTID);
1687 1687
1688
1689 bh = btrfs_read_dev_super(fs_devices->latest_bdev); 1688 bh = btrfs_read_dev_super(fs_devices->latest_bdev);
1690 if (!bh) 1689 if (!bh)
1691 goto fail_iput; 1690 goto fail_iput;
@@ -1775,6 +1774,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1775 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", 1774 btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
1776 fs_info->thread_pool_size, 1775 fs_info->thread_pool_size,
1777 &fs_info->generic_worker); 1776 &fs_info->generic_worker);
1777 btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
1778 1, &fs_info->generic_worker);
1778 1779
1779 /* 1780 /*
1780 * endios are largely parallel and should have a very 1781 * endios are largely parallel and should have a very
@@ -1795,6 +1796,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1795 btrfs_start_workers(&fs_info->endio_meta_workers, 1); 1796 btrfs_start_workers(&fs_info->endio_meta_workers, 1);
1796 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); 1797 btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
1797 btrfs_start_workers(&fs_info->endio_write_workers, 1); 1798 btrfs_start_workers(&fs_info->endio_write_workers, 1);
1799 btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
1798 1800
1799 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); 1801 fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
1800 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, 1802 fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1993,6 +1995,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1993 if (!(sb->s_flags & MS_RDONLY)) { 1995 if (!(sb->s_flags & MS_RDONLY)) {
1994 down_read(&fs_info->cleanup_work_sem); 1996 down_read(&fs_info->cleanup_work_sem);
1995 btrfs_orphan_cleanup(fs_info->fs_root); 1997 btrfs_orphan_cleanup(fs_info->fs_root);
1998 btrfs_orphan_cleanup(fs_info->tree_root);
1996 up_read(&fs_info->cleanup_work_sem); 1999 up_read(&fs_info->cleanup_work_sem);
1997 } 2000 }
1998 2001
@@ -2035,6 +2038,7 @@ fail_sb_buffer:
2035 btrfs_stop_workers(&fs_info->endio_meta_workers); 2038 btrfs_stop_workers(&fs_info->endio_meta_workers);
2036 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2039 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2037 btrfs_stop_workers(&fs_info->endio_write_workers); 2040 btrfs_stop_workers(&fs_info->endio_write_workers);
2041 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2038 btrfs_stop_workers(&fs_info->submit_workers); 2042 btrfs_stop_workers(&fs_info->submit_workers);
2039fail_iput: 2043fail_iput:
2040 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2044 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2410,6 +2414,7 @@ int close_ctree(struct btrfs_root *root)
2410 fs_info->closing = 1; 2414 fs_info->closing = 1;
2411 smp_mb(); 2415 smp_mb();
2412 2416
2417 btrfs_put_block_group_cache(fs_info);
2413 if (!(fs_info->sb->s_flags & MS_RDONLY)) { 2418 if (!(fs_info->sb->s_flags & MS_RDONLY)) {
2414 ret = btrfs_commit_super(root); 2419 ret = btrfs_commit_super(root);
2415 if (ret) 2420 if (ret)
@@ -2456,6 +2461,7 @@ int close_ctree(struct btrfs_root *root)
2456 btrfs_stop_workers(&fs_info->endio_meta_workers); 2461 btrfs_stop_workers(&fs_info->endio_meta_workers);
2457 btrfs_stop_workers(&fs_info->endio_meta_write_workers); 2462 btrfs_stop_workers(&fs_info->endio_meta_write_workers);
2458 btrfs_stop_workers(&fs_info->endio_write_workers); 2463 btrfs_stop_workers(&fs_info->endio_write_workers);
2464 btrfs_stop_workers(&fs_info->endio_freespace_worker);
2459 btrfs_stop_workers(&fs_info->submit_workers); 2465 btrfs_stop_workers(&fs_info->submit_workers);
2460 2466
2461 btrfs_close_devices(fs_info->fs_devices); 2467 btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b81ecdb101c..0c097f3aec41 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
242 return NULL; 242 return NULL;
243 } 243 }
244 244
245 /* We're loading it the fast way, so we don't have a caching_ctl. */
246 if (!cache->caching_ctl) {
247 spin_unlock(&cache->lock);
248 return NULL;
249 }
250
245 ctl = cache->caching_ctl; 251 ctl = cache->caching_ctl;
246 atomic_inc(&ctl->count); 252 atomic_inc(&ctl->count);
247 spin_unlock(&cache->lock); 253 spin_unlock(&cache->lock);
@@ -421,7 +427,9 @@ err:
421 return 0; 427 return 0;
422} 428}
423 429
424static int cache_block_group(struct btrfs_block_group_cache *cache) 430static int cache_block_group(struct btrfs_block_group_cache *cache,
431 struct btrfs_trans_handle *trans,
432 int load_cache_only)
425{ 433{
426 struct btrfs_fs_info *fs_info = cache->fs_info; 434 struct btrfs_fs_info *fs_info = cache->fs_info;
427 struct btrfs_caching_control *caching_ctl; 435 struct btrfs_caching_control *caching_ctl;
@@ -432,6 +440,36 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
432 if (cache->cached != BTRFS_CACHE_NO) 440 if (cache->cached != BTRFS_CACHE_NO)
433 return 0; 441 return 0;
434 442
443 /*
444 * We can't do the read from on-disk cache during a commit since we need
445 * to have the normal tree locking.
446 */
447 if (!trans->transaction->in_commit) {
448 spin_lock(&cache->lock);
449 if (cache->cached != BTRFS_CACHE_NO) {
450 spin_unlock(&cache->lock);
451 return 0;
452 }
453 cache->cached = BTRFS_CACHE_STARTED;
454 spin_unlock(&cache->lock);
455
456 ret = load_free_space_cache(fs_info, cache);
457
458 spin_lock(&cache->lock);
459 if (ret == 1) {
460 cache->cached = BTRFS_CACHE_FINISHED;
461 cache->last_byte_to_unpin = (u64)-1;
462 } else {
463 cache->cached = BTRFS_CACHE_NO;
464 }
465 spin_unlock(&cache->lock);
466 if (ret == 1)
467 return 0;
468 }
469
470 if (load_cache_only)
471 return 0;
472
435 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 473 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
436 BUG_ON(!caching_ctl); 474 BUG_ON(!caching_ctl);
437 475
@@ -509,7 +547,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
509 547
510 rcu_read_lock(); 548 rcu_read_lock();
511 list_for_each_entry_rcu(found, head, list) { 549 list_for_each_entry_rcu(found, head, list) {
512 if (found->flags == flags) { 550 if (found->flags & flags) {
513 rcu_read_unlock(); 551 rcu_read_unlock();
514 return found; 552 return found;
515 } 553 }
@@ -542,6 +580,15 @@ static u64 div_factor(u64 num, int factor)
542 return num; 580 return num;
543} 581}
544 582
583static u64 div_factor_fine(u64 num, int factor)
584{
585 if (factor == 100)
586 return num;
587 num *= factor;
588 do_div(num, 100);
589 return num;
590}
591
545u64 btrfs_find_block_group(struct btrfs_root *root, 592u64 btrfs_find_block_group(struct btrfs_root *root,
546 u64 search_start, u64 search_hint, int owner) 593 u64 search_start, u64 search_hint, int owner)
547{ 594{
@@ -2687,6 +2734,109 @@ next_block_group(struct btrfs_root *root,
2687 return cache; 2734 return cache;
2688} 2735}
2689 2736
2737static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2738 struct btrfs_trans_handle *trans,
2739 struct btrfs_path *path)
2740{
2741 struct btrfs_root *root = block_group->fs_info->tree_root;
2742 struct inode *inode = NULL;
2743 u64 alloc_hint = 0;
2744 int num_pages = 0;
2745 int retries = 0;
2746 int ret = 0;
2747
2748 /*
2749 * If this block group is smaller than 100 megs don't bother caching the
2750 * block group.
2751 */
2752 if (block_group->key.offset < (100 * 1024 * 1024)) {
2753 spin_lock(&block_group->lock);
2754 block_group->disk_cache_state = BTRFS_DC_WRITTEN;
2755 spin_unlock(&block_group->lock);
2756 return 0;
2757 }
2758
2759again:
2760 inode = lookup_free_space_inode(root, block_group, path);
2761 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
2762 ret = PTR_ERR(inode);
2763 btrfs_release_path(root, path);
2764 goto out;
2765 }
2766
2767 if (IS_ERR(inode)) {
2768 BUG_ON(retries);
2769 retries++;
2770
2771 if (block_group->ro)
2772 goto out_free;
2773
2774 ret = create_free_space_inode(root, trans, block_group, path);
2775 if (ret)
2776 goto out_free;
2777 goto again;
2778 }
2779
2780 /*
2781 * We want to set the generation to 0, that way if anything goes wrong
2782 * from here on out we know not to trust this cache when we load up next
2783 * time.
2784 */
2785 BTRFS_I(inode)->generation = 0;
2786 ret = btrfs_update_inode(trans, root, inode);
2787 WARN_ON(ret);
2788
2789 if (i_size_read(inode) > 0) {
2790 ret = btrfs_truncate_free_space_cache(root, trans, path,
2791 inode);
2792 if (ret)
2793 goto out_put;
2794 }
2795
2796 spin_lock(&block_group->lock);
2797 if (block_group->cached != BTRFS_CACHE_FINISHED) {
2798 spin_unlock(&block_group->lock);
2799 goto out_put;
2800 }
2801 spin_unlock(&block_group->lock);
2802
2803 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
2804 if (!num_pages)
2805 num_pages = 1;
2806
2807 /*
2808 * Just to make absolutely sure we have enough space, we're going to
2809 * preallocate 12 pages worth of space for each block group. In
2810 * practice we ought to use at most 8, but we need extra space so we can
2811 * add our header and have a terminator between the extents and the
2812 * bitmaps.
2813 */
2814 num_pages *= 16;
2815 num_pages *= PAGE_CACHE_SIZE;
2816
2817 ret = btrfs_check_data_free_space(inode, num_pages);
2818 if (ret)
2819 goto out_put;
2820
2821 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2822 num_pages, num_pages,
2823 &alloc_hint);
2824 btrfs_free_reserved_data_space(inode, num_pages);
2825out_put:
2826 iput(inode);
2827out_free:
2828 btrfs_release_path(root, path);
2829out:
2830 spin_lock(&block_group->lock);
2831 if (ret)
2832 block_group->disk_cache_state = BTRFS_DC_ERROR;
2833 else
2834 block_group->disk_cache_state = BTRFS_DC_SETUP;
2835 spin_unlock(&block_group->lock);
2836
2837 return ret;
2838}
2839
2690int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2840int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2691 struct btrfs_root *root) 2841 struct btrfs_root *root)
2692{ 2842{
@@ -2699,6 +2849,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2699 if (!path) 2849 if (!path)
2700 return -ENOMEM; 2850 return -ENOMEM;
2701 2851
2852again:
2853 while (1) {
2854 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2855 while (cache) {
2856 if (cache->disk_cache_state == BTRFS_DC_CLEAR)
2857 break;
2858 cache = next_block_group(root, cache);
2859 }
2860 if (!cache) {
2861 if (last == 0)
2862 break;
2863 last = 0;
2864 continue;
2865 }
2866 err = cache_save_setup(cache, trans, path);
2867 last = cache->key.objectid + cache->key.offset;
2868 btrfs_put_block_group(cache);
2869 }
2870
2702 while (1) { 2871 while (1) {
2703 if (last == 0) { 2872 if (last == 0) {
2704 err = btrfs_run_delayed_refs(trans, root, 2873 err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2877,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2708 2877
2709 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2878 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2710 while (cache) { 2879 while (cache) {
2880 if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
2881 btrfs_put_block_group(cache);
2882 goto again;
2883 }
2884
2711 if (cache->dirty) 2885 if (cache->dirty)
2712 break; 2886 break;
2713 cache = next_block_group(root, cache); 2887 cache = next_block_group(root, cache);
@@ -2719,6 +2893,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2719 continue; 2893 continue;
2720 } 2894 }
2721 2895
2896 if (cache->disk_cache_state == BTRFS_DC_SETUP)
2897 cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
2722 cache->dirty = 0; 2898 cache->dirty = 0;
2723 last = cache->key.objectid + cache->key.offset; 2899 last = cache->key.objectid + cache->key.offset;
2724 2900
@@ -2727,6 +2903,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
2727 btrfs_put_block_group(cache); 2903 btrfs_put_block_group(cache);
2728 } 2904 }
2729 2905
2906 while (1) {
2907 /*
2908 * I don't think this is needed since we're just marking our
2909 * preallocated extent as written, but just in case it can't
2910 * hurt.
2911 */
2912 if (last == 0) {
2913 err = btrfs_run_delayed_refs(trans, root,
2914 (unsigned long)-1);
2915 BUG_ON(err);
2916 }
2917
2918 cache = btrfs_lookup_first_block_group(root->fs_info, last);
2919 while (cache) {
2920 /*
2921 * Really this shouldn't happen, but it could if we
2922 * couldn't write the entire preallocated extent and
2923 * splitting the extent resulted in a new block.
2924 */
2925 if (cache->dirty) {
2926 btrfs_put_block_group(cache);
2927 goto again;
2928 }
2929 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2930 break;
2931 cache = next_block_group(root, cache);
2932 }
2933 if (!cache) {
2934 if (last == 0)
2935 break;
2936 last = 0;
2937 continue;
2938 }
2939
2940 btrfs_write_out_cache(root, trans, cache, path);
2941
2942 /*
2943 * If we didn't have an error then the cache state is still
2944 * NEED_WRITE, so we can set it to WRITTEN.
2945 */
2946 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
2947 cache->disk_cache_state = BTRFS_DC_WRITTEN;
2948 last = cache->key.objectid + cache->key.offset;
2949 btrfs_put_block_group(cache);
2950 }
2951
2730 btrfs_free_path(path); 2952 btrfs_free_path(path);
2731 return 0; 2953 return 0;
2732} 2954}
@@ -2762,6 +2984,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2762 if (found) { 2984 if (found) {
2763 spin_lock(&found->lock); 2985 spin_lock(&found->lock);
2764 found->total_bytes += total_bytes; 2986 found->total_bytes += total_bytes;
2987 found->disk_total += total_bytes * factor;
2765 found->bytes_used += bytes_used; 2988 found->bytes_used += bytes_used;
2766 found->disk_used += bytes_used * factor; 2989 found->disk_used += bytes_used * factor;
2767 found->full = 0; 2990 found->full = 0;
@@ -2781,6 +3004,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2781 BTRFS_BLOCK_GROUP_SYSTEM | 3004 BTRFS_BLOCK_GROUP_SYSTEM |
2782 BTRFS_BLOCK_GROUP_METADATA); 3005 BTRFS_BLOCK_GROUP_METADATA);
2783 found->total_bytes = total_bytes; 3006 found->total_bytes = total_bytes;
3007 found->disk_total = total_bytes * factor;
2784 found->bytes_used = bytes_used; 3008 found->bytes_used = bytes_used;
2785 found->disk_used = bytes_used * factor; 3009 found->disk_used = bytes_used * factor;
2786 found->bytes_pinned = 0; 3010 found->bytes_pinned = 0;
@@ -2882,11 +3106,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
2882 struct btrfs_space_info *data_sinfo; 3106 struct btrfs_space_info *data_sinfo;
2883 struct btrfs_root *root = BTRFS_I(inode)->root; 3107 struct btrfs_root *root = BTRFS_I(inode)->root;
2884 u64 used; 3108 u64 used;
2885 int ret = 0, committed = 0; 3109 int ret = 0, committed = 0, alloc_chunk = 1;
2886 3110
2887 /* make sure bytes are sectorsize aligned */ 3111 /* make sure bytes are sectorsize aligned */
2888 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3112 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
2889 3113
3114 if (root == root->fs_info->tree_root) {
3115 alloc_chunk = 0;
3116 committed = 1;
3117 }
3118
2890 data_sinfo = BTRFS_I(inode)->space_info; 3119 data_sinfo = BTRFS_I(inode)->space_info;
2891 if (!data_sinfo) 3120 if (!data_sinfo)
2892 goto alloc; 3121 goto alloc;
@@ -2905,7 +3134,7 @@ again:
2905 * if we don't have enough free bytes in this space then we need 3134 * if we don't have enough free bytes in this space then we need
2906 * to alloc a new chunk. 3135 * to alloc a new chunk.
2907 */ 3136 */
2908 if (!data_sinfo->full) { 3137 if (!data_sinfo->full && alloc_chunk) {
2909 u64 alloc_target; 3138 u64 alloc_target;
2910 3139
2911 data_sinfo->force_alloc = 1; 3140 data_sinfo->force_alloc = 1;
@@ -2997,10 +3226,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
2997 rcu_read_unlock(); 3226 rcu_read_unlock();
2998} 3227}
2999 3228
3000static int should_alloc_chunk(struct btrfs_space_info *sinfo, 3229static int should_alloc_chunk(struct btrfs_root *root,
3001 u64 alloc_bytes) 3230 struct btrfs_space_info *sinfo, u64 alloc_bytes)
3002{ 3231{
3003 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3232 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
3233 u64 thresh;
3004 3234
3005 if (sinfo->bytes_used + sinfo->bytes_reserved + 3235 if (sinfo->bytes_used + sinfo->bytes_reserved +
3006 alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3236 alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3240,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
3010 alloc_bytes < div_factor(num_bytes, 8)) 3240 alloc_bytes < div_factor(num_bytes, 8))
3011 return 0; 3241 return 0;
3012 3242
3243 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
3244 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
3245
3246 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
3247 return 0;
3248
3013 return 1; 3249 return 1;
3014} 3250}
3015 3251
@@ -3041,13 +3277,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
3041 goto out; 3277 goto out;
3042 } 3278 }
3043 3279
3044 if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { 3280 if (!force && !should_alloc_chunk(extent_root, space_info,
3281 alloc_bytes)) {
3045 spin_unlock(&space_info->lock); 3282 spin_unlock(&space_info->lock);
3046 goto out; 3283 goto out;
3047 } 3284 }
3048 spin_unlock(&space_info->lock); 3285 spin_unlock(&space_info->lock);
3049 3286
3050 /* 3287 /*
3288 * If we have mixed data/metadata chunks we want to make sure we keep
3289 * allocating mixed chunks instead of individual chunks.
3290 */
3291 if (btrfs_mixed_space_info(space_info))
3292 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
3293
3294 /*
3051 * if we're doing a data chunk, go ahead and make sure that 3295 * if we're doing a data chunk, go ahead and make sure that
3052 * we keep a reasonable number of metadata chunks allocated in the 3296 * we keep a reasonable number of metadata chunks allocated in the
3053 * FS as well. 3297 * FS as well.
@@ -3072,55 +3316,25 @@ out:
3072 return ret; 3316 return ret;
3073} 3317}
3074 3318
3075static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
3076 struct btrfs_root *root,
3077 struct btrfs_space_info *sinfo, u64 num_bytes)
3078{
3079 int ret;
3080 int end_trans = 0;
3081
3082 if (sinfo->full)
3083 return 0;
3084
3085 spin_lock(&sinfo->lock);
3086 ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
3087 spin_unlock(&sinfo->lock);
3088 if (!ret)
3089 return 0;
3090
3091 if (!trans) {
3092 trans = btrfs_join_transaction(root, 1);
3093 BUG_ON(IS_ERR(trans));
3094 end_trans = 1;
3095 }
3096
3097 ret = do_chunk_alloc(trans, root->fs_info->extent_root,
3098 num_bytes + 2 * 1024 * 1024,
3099 get_alloc_profile(root, sinfo->flags), 0);
3100
3101 if (end_trans)
3102 btrfs_end_transaction(trans, root);
3103
3104 return ret == 1 ? 1 : 0;
3105}
3106
3107/* 3319/*
3108 * shrink metadata reservation for delalloc 3320 * shrink metadata reservation for delalloc
3109 */ 3321 */
3110static int shrink_delalloc(struct btrfs_trans_handle *trans, 3322static int shrink_delalloc(struct btrfs_trans_handle *trans,
3111 struct btrfs_root *root, u64 to_reclaim) 3323 struct btrfs_root *root, u64 to_reclaim, int sync)
3112{ 3324{
3113 struct btrfs_block_rsv *block_rsv; 3325 struct btrfs_block_rsv *block_rsv;
3326 struct btrfs_space_info *space_info;
3114 u64 reserved; 3327 u64 reserved;
3115 u64 max_reclaim; 3328 u64 max_reclaim;
3116 u64 reclaimed = 0; 3329 u64 reclaimed = 0;
3117 int pause = 1; 3330 int pause = 1;
3118 int ret; 3331 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
3119 3332
3120 block_rsv = &root->fs_info->delalloc_block_rsv; 3333 block_rsv = &root->fs_info->delalloc_block_rsv;
3121 spin_lock(&block_rsv->lock); 3334 space_info = block_rsv->space_info;
3122 reserved = block_rsv->reserved; 3335
3123 spin_unlock(&block_rsv->lock); 3336 smp_mb();
3337 reserved = space_info->bytes_reserved;
3124 3338
3125 if (reserved == 0) 3339 if (reserved == 0)
3126 return 0; 3340 return 0;
@@ -3128,104 +3342,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
3128 max_reclaim = min(reserved, to_reclaim); 3342 max_reclaim = min(reserved, to_reclaim);
3129 3343
3130 while (1) { 3344 while (1) {
3131 ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); 3345 /* have the flusher threads jump in and do some IO */
3132 if (!ret) { 3346 smp_mb();
3133 __set_current_state(TASK_INTERRUPTIBLE); 3347 nr_pages = min_t(unsigned long, nr_pages,
3134 schedule_timeout(pause); 3348 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
3135 pause <<= 1; 3349 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
3136 if (pause > HZ / 10)
3137 pause = HZ / 10;
3138 } else {
3139 pause = 1;
3140 }
3141 3350
3142 spin_lock(&block_rsv->lock); 3351 spin_lock(&space_info->lock);
3143 if (reserved > block_rsv->reserved) 3352 if (reserved > space_info->bytes_reserved)
3144 reclaimed = reserved - block_rsv->reserved; 3353 reclaimed += reserved - space_info->bytes_reserved;
3145 reserved = block_rsv->reserved; 3354 reserved = space_info->bytes_reserved;
3146 spin_unlock(&block_rsv->lock); 3355 spin_unlock(&space_info->lock);
3147 3356
3148 if (reserved == 0 || reclaimed >= max_reclaim) 3357 if (reserved == 0 || reclaimed >= max_reclaim)
3149 break; 3358 break;
3150 3359
3151 if (trans && trans->transaction->blocked) 3360 if (trans && trans->transaction->blocked)
3152 return -EAGAIN; 3361 return -EAGAIN;
3362
3363 __set_current_state(TASK_INTERRUPTIBLE);
3364 schedule_timeout(pause);
3365 pause <<= 1;
3366 if (pause > HZ / 10)
3367 pause = HZ / 10;
3368
3153 } 3369 }
3154 return reclaimed >= to_reclaim; 3370 return reclaimed >= to_reclaim;
3155} 3371}
3156 3372
3157static int should_retry_reserve(struct btrfs_trans_handle *trans, 3373/*
3158 struct btrfs_root *root, 3374 * Retries tells us how many times we've called reserve_metadata_bytes. The
3159 struct btrfs_block_rsv *block_rsv, 3375 * idea is if this is the first call (retries == 0) then we will add to our
3160 u64 num_bytes, int *retries) 3376 * reserved count if we can't make the allocation in order to hold our place
3377 * while we go and try and free up space. That way for retries > 1 we don't try
3378 * and add space, we just check to see if the amount of unused space is >= the
3379 * total space, meaning that our reservation is valid.
3380 *
3381 * However if we don't intend to retry this reservation, pass -1 as retries so
3382 * that it short circuits this logic.
3383 */
3384static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
3385 struct btrfs_root *root,
3386 struct btrfs_block_rsv *block_rsv,
3387 u64 orig_bytes, int flush)
3161{ 3388{
3162 struct btrfs_space_info *space_info = block_rsv->space_info; 3389 struct btrfs_space_info *space_info = block_rsv->space_info;
3163 int ret; 3390 u64 unused;
3391 u64 num_bytes = orig_bytes;
3392 int retries = 0;
3393 int ret = 0;
3394 bool reserved = false;
3395 bool committed = false;
3164 3396
3165 if ((*retries) > 2) 3397again:
3166 return -ENOSPC; 3398 ret = -ENOSPC;
3399 if (reserved)
3400 num_bytes = 0;
3167 3401
3168 ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); 3402 spin_lock(&space_info->lock);
3169 if (ret) 3403 unused = space_info->bytes_used + space_info->bytes_reserved +
3170 return 1; 3404 space_info->bytes_pinned + space_info->bytes_readonly +
3405 space_info->bytes_may_use;
3171 3406
3172 if (trans && trans->transaction->in_commit) 3407 /*
3173 return -ENOSPC; 3408 * The idea here is that we've not already over-reserved the block group
3409 * then we can go ahead and save our reservation first and then start
3410 * flushing if we need to. Otherwise if we've already overcommitted
3411 * lets start flushing stuff first and then come back and try to make
3412 * our reservation.
3413 */
3414 if (unused <= space_info->total_bytes) {
3415 unused -= space_info->total_bytes;
3416 if (unused >= num_bytes) {
3417 if (!reserved)
3418 space_info->bytes_reserved += orig_bytes;
3419 ret = 0;
3420 } else {
3421 /*
3422 * Ok set num_bytes to orig_bytes since we aren't
3423 * overocmmitted, this way we only try and reclaim what
3424 * we need.
3425 */
3426 num_bytes = orig_bytes;
3427 }
3428 } else {
3429 /*
3430 * Ok we're over committed, set num_bytes to the overcommitted
3431 * amount plus the amount of bytes that we need for this
3432 * reservation.
3433 */
3434 num_bytes = unused - space_info->total_bytes +
3435 (orig_bytes * (retries + 1));
3436 }
3174 3437
3175 ret = shrink_delalloc(trans, root, num_bytes); 3438 /*
3176 if (ret) 3439 * Couldn't make our reservation, save our place so while we're trying
3177 return ret; 3440 * to reclaim space we can actually use it instead of somebody else
3441 * stealing it from us.
3442 */
3443 if (ret && !reserved) {
3444 space_info->bytes_reserved += orig_bytes;
3445 reserved = true;
3446 }
3178 3447
3179 spin_lock(&space_info->lock);
3180 if (space_info->bytes_pinned < num_bytes)
3181 ret = 1;
3182 spin_unlock(&space_info->lock); 3448 spin_unlock(&space_info->lock);
3183 if (ret)
3184 return -ENOSPC;
3185
3186 (*retries)++;
3187 3449
3188 if (trans) 3450 if (!ret)
3189 return -EAGAIN; 3451 return 0;
3190 3452
3191 trans = btrfs_join_transaction(root, 1); 3453 if (!flush)
3192 BUG_ON(IS_ERR(trans)); 3454 goto out;
3193 ret = btrfs_commit_transaction(trans, root);
3194 BUG_ON(ret);
3195 3455
3196 return 1; 3456 /*
3197} 3457 * We do synchronous shrinking since we don't actually unreserve
3458 * metadata until after the IO is completed.
3459 */
3460 ret = shrink_delalloc(trans, root, num_bytes, 1);
3461 if (ret > 0)
3462 return 0;
3463 else if (ret < 0)
3464 goto out;
3198 3465
3199static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, 3466 /*
3200 u64 num_bytes) 3467 * So if we were overcommitted it's possible that somebody else flushed
3201{ 3468 * out enough space and we simply didn't have enough space to reclaim,
3202 struct btrfs_space_info *space_info = block_rsv->space_info; 3469 * so go back around and try again.
3203 u64 unused; 3470 */
3204 int ret = -ENOSPC; 3471 if (retries < 2) {
3472 retries++;
3473 goto again;
3474 }
3205 3475
3206 spin_lock(&space_info->lock); 3476 spin_lock(&space_info->lock);
3207 unused = space_info->bytes_used + space_info->bytes_reserved + 3477 /*
3208 space_info->bytes_pinned + space_info->bytes_readonly; 3478 * Not enough space to be reclaimed, don't bother committing the
3479 * transaction.
3480 */
3481 if (space_info->bytes_pinned < orig_bytes)
3482 ret = -ENOSPC;
3483 spin_unlock(&space_info->lock);
3484 if (ret)
3485 goto out;
3209 3486
3210 if (unused < space_info->total_bytes) 3487 ret = -EAGAIN;
3211 unused = space_info->total_bytes - unused; 3488 if (trans || committed)
3212 else 3489 goto out;
3213 unused = 0;
3214 3490
3215 if (unused >= num_bytes) { 3491 ret = -ENOSPC;
3216 if (block_rsv->priority >= 10) { 3492 trans = btrfs_join_transaction(root, 1);
3217 space_info->bytes_reserved += num_bytes; 3493 if (IS_ERR(trans))
3218 ret = 0; 3494 goto out;
3219 } else { 3495 ret = btrfs_commit_transaction(trans, root);
3220 if ((unused + block_rsv->reserved) * 3496 if (!ret) {
3221 block_rsv->priority >= 3497 trans = NULL;
3222 (num_bytes + block_rsv->reserved) * 10) { 3498 committed = true;
3223 space_info->bytes_reserved += num_bytes; 3499 goto again;
3224 ret = 0; 3500 }
3225 } 3501
3226 } 3502out:
3503 if (reserved) {
3504 spin_lock(&space_info->lock);
3505 space_info->bytes_reserved -= orig_bytes;
3506 spin_unlock(&space_info->lock);
3227 } 3507 }
3228 spin_unlock(&space_info->lock);
3229 3508
3230 return ret; 3509 return ret;
3231} 3510}
@@ -3327,18 +3606,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
3327{ 3606{
3328 struct btrfs_block_rsv *block_rsv; 3607 struct btrfs_block_rsv *block_rsv;
3329 struct btrfs_fs_info *fs_info = root->fs_info; 3608 struct btrfs_fs_info *fs_info = root->fs_info;
3330 u64 alloc_target;
3331 3609
3332 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3610 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
3333 if (!block_rsv) 3611 if (!block_rsv)
3334 return NULL; 3612 return NULL;
3335 3613
3336 btrfs_init_block_rsv(block_rsv); 3614 btrfs_init_block_rsv(block_rsv);
3337
3338 alloc_target = btrfs_get_alloc_profile(root, 0);
3339 block_rsv->space_info = __find_space_info(fs_info, 3615 block_rsv->space_info = __find_space_info(fs_info,
3340 BTRFS_BLOCK_GROUP_METADATA); 3616 BTRFS_BLOCK_GROUP_METADATA);
3341
3342 return block_rsv; 3617 return block_rsv;
3343} 3618}
3344 3619
@@ -3369,23 +3644,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
3369int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3644int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
3370 struct btrfs_root *root, 3645 struct btrfs_root *root,
3371 struct btrfs_block_rsv *block_rsv, 3646 struct btrfs_block_rsv *block_rsv,
3372 u64 num_bytes, int *retries) 3647 u64 num_bytes)
3373{ 3648{
3374 int ret; 3649 int ret;
3375 3650
3376 if (num_bytes == 0) 3651 if (num_bytes == 0)
3377 return 0; 3652 return 0;
3378again: 3653
3379 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3654 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
3380 if (!ret) { 3655 if (!ret) {
3381 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3656 block_rsv_add_bytes(block_rsv, num_bytes, 1);
3382 return 0; 3657 return 0;
3383 } 3658 }
3384 3659
3385 ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
3386 if (ret > 0)
3387 goto again;
3388
3389 return ret; 3660 return ret;
3390} 3661}
3391 3662
@@ -3420,7 +3691,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
3420 return 0; 3691 return 0;
3421 3692
3422 if (block_rsv->refill_used) { 3693 if (block_rsv->refill_used) {
3423 ret = reserve_metadata_bytes(block_rsv, num_bytes); 3694 ret = reserve_metadata_bytes(trans, root, block_rsv,
3695 num_bytes, 0);
3424 if (!ret) { 3696 if (!ret) {
3425 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3697 block_rsv_add_bytes(block_rsv, num_bytes, 0);
3426 return 0; 3698 return 0;
@@ -3499,6 +3771,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
3499 3771
3500 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3772 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
3501 spin_lock(&sinfo->lock); 3773 spin_lock(&sinfo->lock);
3774 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
3775 data_used = 0;
3502 meta_used = sinfo->bytes_used; 3776 meta_used = sinfo->bytes_used;
3503 spin_unlock(&sinfo->lock); 3777 spin_unlock(&sinfo->lock);
3504 3778
@@ -3526,7 +3800,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
3526 block_rsv->size = num_bytes; 3800 block_rsv->size = num_bytes;
3527 3801
3528 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3802 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
3529 sinfo->bytes_reserved + sinfo->bytes_readonly; 3803 sinfo->bytes_reserved + sinfo->bytes_readonly +
3804 sinfo->bytes_may_use;
3530 3805
3531 if (sinfo->total_bytes > num_bytes) { 3806 if (sinfo->total_bytes > num_bytes) {
3532 num_bytes = sinfo->total_bytes - num_bytes; 3807 num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3872,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
3597 3872
3598int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3873int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3599 struct btrfs_root *root, 3874 struct btrfs_root *root,
3600 int num_items, int *retries) 3875 int num_items)
3601{ 3876{
3602 u64 num_bytes; 3877 u64 num_bytes;
3603 int ret; 3878 int ret;
@@ -3607,7 +3882,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
3607 3882
3608 num_bytes = calc_trans_metadata_size(root, num_items); 3883 num_bytes = calc_trans_metadata_size(root, num_items);
3609 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3884 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
3610 num_bytes, retries); 3885 num_bytes);
3611 if (!ret) { 3886 if (!ret) {
3612 trans->bytes_reserved += num_bytes; 3887 trans->bytes_reserved += num_bytes;
3613 trans->block_rsv = &root->fs_info->trans_block_rsv; 3888 trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3956,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
3681 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3956 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
3682 u64 to_reserve; 3957 u64 to_reserve;
3683 int nr_extents; 3958 int nr_extents;
3684 int retries = 0;
3685 int ret; 3959 int ret;
3686 3960
3687 if (btrfs_transaction_in_commit(root->fs_info)) 3961 if (btrfs_transaction_in_commit(root->fs_info))
3688 schedule_timeout(1); 3962 schedule_timeout(1);
3689 3963
3690 num_bytes = ALIGN(num_bytes, root->sectorsize); 3964 num_bytes = ALIGN(num_bytes, root->sectorsize);
3691again: 3965
3692 spin_lock(&BTRFS_I(inode)->accounting_lock); 3966 spin_lock(&BTRFS_I(inode)->accounting_lock);
3693 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3967 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
3694 if (nr_extents > BTRFS_I(inode)->reserved_extents) { 3968 if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3972,14 @@ again:
3698 nr_extents = 0; 3972 nr_extents = 0;
3699 to_reserve = 0; 3973 to_reserve = 0;
3700 } 3974 }
3975 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3701 3976
3702 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3977 to_reserve += calc_csum_metadata_size(inode, num_bytes);
3703 ret = reserve_metadata_bytes(block_rsv, to_reserve); 3978 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
3704 if (ret) { 3979 if (ret)
3705 spin_unlock(&BTRFS_I(inode)->accounting_lock);
3706 ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
3707 &retries);
3708 if (ret > 0)
3709 goto again;
3710 return ret; 3980 return ret;
3711 }
3712 3981
3982 spin_lock(&BTRFS_I(inode)->accounting_lock);
3713 BTRFS_I(inode)->reserved_extents += nr_extents; 3983 BTRFS_I(inode)->reserved_extents += nr_extents;
3714 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3984 atomic_inc(&BTRFS_I(inode)->outstanding_extents);
3715 spin_unlock(&BTRFS_I(inode)->accounting_lock); 3985 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3987,7 @@ again:
3717 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3987 block_rsv_add_bytes(block_rsv, to_reserve, 1);
3718 3988
3719 if (block_rsv->size > 512 * 1024 * 1024) 3989 if (block_rsv->size > 512 * 1024 * 1024)
3720 shrink_delalloc(NULL, root, to_reserve); 3990 shrink_delalloc(NULL, root, to_reserve, 0);
3721 3991
3722 return 0; 3992 return 0;
3723} 3993}
@@ -3776,12 +4046,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3776 struct btrfs_root *root, 4046 struct btrfs_root *root,
3777 u64 bytenr, u64 num_bytes, int alloc) 4047 u64 bytenr, u64 num_bytes, int alloc)
3778{ 4048{
3779 struct btrfs_block_group_cache *cache; 4049 struct btrfs_block_group_cache *cache = NULL;
3780 struct btrfs_fs_info *info = root->fs_info; 4050 struct btrfs_fs_info *info = root->fs_info;
3781 int factor;
3782 u64 total = num_bytes; 4051 u64 total = num_bytes;
3783 u64 old_val; 4052 u64 old_val;
3784 u64 byte_in_group; 4053 u64 byte_in_group;
4054 int factor;
3785 4055
3786 /* block accounting for super block */ 4056 /* block accounting for super block */
3787 spin_lock(&info->delalloc_lock); 4057 spin_lock(&info->delalloc_lock);
@@ -3803,11 +4073,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
3803 factor = 2; 4073 factor = 2;
3804 else 4074 else
3805 factor = 1; 4075 factor = 1;
4076 /*
4077 * If this block group has free space cache written out, we
4078 * need to make sure to load it if we are removing space. This
4079 * is because we need the unpinning stage to actually add the
4080 * space back to the block group, otherwise we will leak space.
4081 */
4082 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4083 cache_block_group(cache, trans, 1);
4084
3806 byte_in_group = bytenr - cache->key.objectid; 4085 byte_in_group = bytenr - cache->key.objectid;
3807 WARN_ON(byte_in_group > cache->key.offset); 4086 WARN_ON(byte_in_group > cache->key.offset);
3808 4087
3809 spin_lock(&cache->space_info->lock); 4088 spin_lock(&cache->space_info->lock);
3810 spin_lock(&cache->lock); 4089 spin_lock(&cache->lock);
4090
4091 if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
4092 cache->disk_cache_state < BTRFS_DC_CLEAR)
4093 cache->disk_cache_state = BTRFS_DC_CLEAR;
4094
3811 cache->dirty = 1; 4095 cache->dirty = 1;
3812 old_val = btrfs_block_group_used(&cache->item); 4096 old_val = btrfs_block_group_used(&cache->item);
3813 num_bytes = min(total, cache->key.offset - byte_in_group); 4097 num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4838,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4554 bool found_uncached_bg = false; 4838 bool found_uncached_bg = false;
4555 bool failed_cluster_refill = false; 4839 bool failed_cluster_refill = false;
4556 bool failed_alloc = false; 4840 bool failed_alloc = false;
4841 bool use_cluster = true;
4557 u64 ideal_cache_percent = 0; 4842 u64 ideal_cache_percent = 0;
4558 u64 ideal_cache_offset = 0; 4843 u64 ideal_cache_offset = 0;
4559 4844
@@ -4568,16 +4853,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4568 return -ENOSPC; 4853 return -ENOSPC;
4569 } 4854 }
4570 4855
4856 /*
4857 * If the space info is for both data and metadata it means we have a
4858 * small filesystem and we can't use the clustering stuff.
4859 */
4860 if (btrfs_mixed_space_info(space_info))
4861 use_cluster = false;
4862
4571 if (orig_root->ref_cows || empty_size) 4863 if (orig_root->ref_cows || empty_size)
4572 allowed_chunk_alloc = 1; 4864 allowed_chunk_alloc = 1;
4573 4865
4574 if (data & BTRFS_BLOCK_GROUP_METADATA) { 4866 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
4575 last_ptr = &root->fs_info->meta_alloc_cluster; 4867 last_ptr = &root->fs_info->meta_alloc_cluster;
4576 if (!btrfs_test_opt(root, SSD)) 4868 if (!btrfs_test_opt(root, SSD))
4577 empty_cluster = 64 * 1024; 4869 empty_cluster = 64 * 1024;
4578 } 4870 }
4579 4871
4580 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 4872 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
4873 btrfs_test_opt(root, SSD)) {
4581 last_ptr = &root->fs_info->data_alloc_cluster; 4874 last_ptr = &root->fs_info->data_alloc_cluster;
4582 } 4875 }
4583 4876
@@ -4641,6 +4934,10 @@ have_block_group:
4641 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4934 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4642 u64 free_percent; 4935 u64 free_percent;
4643 4936
4937 ret = cache_block_group(block_group, trans, 1);
4938 if (block_group->cached == BTRFS_CACHE_FINISHED)
4939 goto have_block_group;
4940
4644 free_percent = btrfs_block_group_used(&block_group->item); 4941 free_percent = btrfs_block_group_used(&block_group->item);
4645 free_percent *= 100; 4942 free_percent *= 100;
4646 free_percent = div64_u64(free_percent, 4943 free_percent = div64_u64(free_percent,
@@ -4661,7 +4958,7 @@ have_block_group:
4661 if (loop > LOOP_CACHING_NOWAIT || 4958 if (loop > LOOP_CACHING_NOWAIT ||
4662 (loop > LOOP_FIND_IDEAL && 4959 (loop > LOOP_FIND_IDEAL &&
4663 atomic_read(&space_info->caching_threads) < 2)) { 4960 atomic_read(&space_info->caching_threads) < 2)) {
4664 ret = cache_block_group(block_group); 4961 ret = cache_block_group(block_group, trans, 0);
4665 BUG_ON(ret); 4962 BUG_ON(ret);
4666 } 4963 }
4667 found_uncached_bg = true; 4964 found_uncached_bg = true;
@@ -5218,7 +5515,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5218 u64 num_bytes = ins->offset; 5515 u64 num_bytes = ins->offset;
5219 5516
5220 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5517 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5221 cache_block_group(block_group); 5518 cache_block_group(block_group, trans, 0);
5222 caching_ctl = get_caching_control(block_group); 5519 caching_ctl = get_caching_control(block_group);
5223 5520
5224 if (!caching_ctl) { 5521 if (!caching_ctl) {
@@ -5308,7 +5605,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5308 block_rsv = get_block_rsv(trans, root); 5605 block_rsv = get_block_rsv(trans, root);
5309 5606
5310 if (block_rsv->size == 0) { 5607 if (block_rsv->size == 0) {
5311 ret = reserve_metadata_bytes(block_rsv, blocksize); 5608 ret = reserve_metadata_bytes(trans, root, block_rsv,
5609 blocksize, 0);
5312 if (ret) 5610 if (ret)
5313 return ERR_PTR(ret); 5611 return ERR_PTR(ret);
5314 return block_rsv; 5612 return block_rsv;
@@ -5318,11 +5616,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
5318 if (!ret) 5616 if (!ret)
5319 return block_rsv; 5617 return block_rsv;
5320 5618
5321 WARN_ON(1);
5322 printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
5323 block_rsv->size, block_rsv->reserved,
5324 block_rsv->freed[0], block_rsv->freed[1]);
5325
5326 return ERR_PTR(-ENOSPC); 5619 return ERR_PTR(-ENOSPC);
5327} 5620}
5328 5621
@@ -5421,7 +5714,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
5421 u64 generation; 5714 u64 generation;
5422 u64 refs; 5715 u64 refs;
5423 u64 flags; 5716 u64 flags;
5424 u64 last = 0;
5425 u32 nritems; 5717 u32 nritems;
5426 u32 blocksize; 5718 u32 blocksize;
5427 struct btrfs_key key; 5719 struct btrfs_key key;
@@ -5489,7 +5781,6 @@ reada:
5489 generation); 5781 generation);
5490 if (ret) 5782 if (ret)
5491 break; 5783 break;
5492 last = bytenr + blocksize;
5493 nread++; 5784 nread++;
5494 } 5785 }
5495 wc->reada_slot = slot; 5786 wc->reada_slot = slot;
@@ -7813,6 +8104,40 @@ out:
7813 return ret; 8104 return ret;
7814} 8105}
7815 8106
8107void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
8108{
8109 struct btrfs_block_group_cache *block_group;
8110 u64 last = 0;
8111
8112 while (1) {
8113 struct inode *inode;
8114
8115 block_group = btrfs_lookup_first_block_group(info, last);
8116 while (block_group) {
8117 spin_lock(&block_group->lock);
8118 if (block_group->iref)
8119 break;
8120 spin_unlock(&block_group->lock);
8121 block_group = next_block_group(info->tree_root,
8122 block_group);
8123 }
8124 if (!block_group) {
8125 if (last == 0)
8126 break;
8127 last = 0;
8128 continue;
8129 }
8130
8131 inode = block_group->inode;
8132 block_group->iref = 0;
8133 block_group->inode = NULL;
8134 spin_unlock(&block_group->lock);
8135 iput(inode);
8136 last = block_group->key.objectid + block_group->key.offset;
8137 btrfs_put_block_group(block_group);
8138 }
8139}
8140
7816int btrfs_free_block_groups(struct btrfs_fs_info *info) 8141int btrfs_free_block_groups(struct btrfs_fs_info *info)
7817{ 8142{
7818 struct btrfs_block_group_cache *block_group; 8143 struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8221,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7896 struct btrfs_key key; 8221 struct btrfs_key key;
7897 struct btrfs_key found_key; 8222 struct btrfs_key found_key;
7898 struct extent_buffer *leaf; 8223 struct extent_buffer *leaf;
8224 int need_clear = 0;
8225 u64 cache_gen;
7899 8226
7900 root = info->extent_root; 8227 root = info->extent_root;
7901 key.objectid = 0; 8228 key.objectid = 0;
@@ -7905,6 +8232,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7905 if (!path) 8232 if (!path)
7906 return -ENOMEM; 8233 return -ENOMEM;
7907 8234
8235 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
8236 if (cache_gen != 0 &&
8237 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
8238 need_clear = 1;
8239 if (btrfs_test_opt(root, CLEAR_CACHE))
8240 need_clear = 1;
8241 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
8242 printk(KERN_INFO "btrfs: disk space caching is enabled\n");
8243
7908 while (1) { 8244 while (1) {
7909 ret = find_first_block_group(root, path, &key); 8245 ret = find_first_block_group(root, path, &key);
7910 if (ret > 0) 8246 if (ret > 0)
@@ -7927,6 +8263,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
7927 INIT_LIST_HEAD(&cache->list); 8263 INIT_LIST_HEAD(&cache->list);
7928 INIT_LIST_HEAD(&cache->cluster_list); 8264 INIT_LIST_HEAD(&cache->cluster_list);
7929 8265
8266 if (need_clear)
8267 cache->disk_cache_state = BTRFS_DC_CLEAR;
8268
7930 /* 8269 /*
7931 * we only want to have 32k of ram per block group for keeping 8270 * we only want to have 32k of ram per block group for keeping
7932 * track of free space, and if we pass 1/2 of that we want to 8271 * track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8370,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
8031 cache->key.offset = size; 8370 cache->key.offset = size;
8032 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 8371 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
8033 cache->sectorsize = root->sectorsize; 8372 cache->sectorsize = root->sectorsize;
8373 cache->fs_info = root->fs_info;
8034 8374
8035 /* 8375 /*
8036 * we only want to have 32k of ram per block group for keeping track 8376 * we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8427,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8087 struct btrfs_path *path; 8427 struct btrfs_path *path;
8088 struct btrfs_block_group_cache *block_group; 8428 struct btrfs_block_group_cache *block_group;
8089 struct btrfs_free_cluster *cluster; 8429 struct btrfs_free_cluster *cluster;
8430 struct btrfs_root *tree_root = root->fs_info->tree_root;
8090 struct btrfs_key key; 8431 struct btrfs_key key;
8432 struct inode *inode;
8091 int ret; 8433 int ret;
8434 int factor;
8092 8435
8093 root = root->fs_info->extent_root; 8436 root = root->fs_info->extent_root;
8094 8437
@@ -8097,6 +8440,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8097 BUG_ON(!block_group->ro); 8440 BUG_ON(!block_group->ro);
8098 8441
8099 memcpy(&key, &block_group->key, sizeof(key)); 8442 memcpy(&key, &block_group->key, sizeof(key));
8443 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
8444 BTRFS_BLOCK_GROUP_RAID1 |
8445 BTRFS_BLOCK_GROUP_RAID10))
8446 factor = 2;
8447 else
8448 factor = 1;
8100 8449
8101 /* make sure this block group isn't part of an allocation cluster */ 8450 /* make sure this block group isn't part of an allocation cluster */
8102 cluster = &root->fs_info->data_alloc_cluster; 8451 cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8465,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8116 path = btrfs_alloc_path(); 8465 path = btrfs_alloc_path();
8117 BUG_ON(!path); 8466 BUG_ON(!path);
8118 8467
8468 inode = lookup_free_space_inode(root, block_group, path);
8469 if (!IS_ERR(inode)) {
8470 btrfs_orphan_add(trans, inode);
8471 clear_nlink(inode);
8472 /* One for the block groups ref */
8473 spin_lock(&block_group->lock);
8474 if (block_group->iref) {
8475 block_group->iref = 0;
8476 block_group->inode = NULL;
8477 spin_unlock(&block_group->lock);
8478 iput(inode);
8479 } else {
8480 spin_unlock(&block_group->lock);
8481 }
8482 /* One for our lookup ref */
8483 iput(inode);
8484 }
8485
8486 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
8487 key.offset = block_group->key.objectid;
8488 key.type = 0;
8489
8490 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
8491 if (ret < 0)
8492 goto out;
8493 if (ret > 0)
8494 btrfs_release_path(tree_root, path);
8495 if (ret == 0) {
8496 ret = btrfs_del_item(trans, tree_root, path);
8497 if (ret)
8498 goto out;
8499 btrfs_release_path(tree_root, path);
8500 }
8501
8119 spin_lock(&root->fs_info->block_group_cache_lock); 8502 spin_lock(&root->fs_info->block_group_cache_lock);
8120 rb_erase(&block_group->cache_node, 8503 rb_erase(&block_group->cache_node,
8121 &root->fs_info->block_group_cache_tree); 8504 &root->fs_info->block_group_cache_tree);
@@ -8137,8 +8520,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
8137 spin_lock(&block_group->space_info->lock); 8520 spin_lock(&block_group->space_info->lock);
8138 block_group->space_info->total_bytes -= block_group->key.offset; 8521 block_group->space_info->total_bytes -= block_group->key.offset;
8139 block_group->space_info->bytes_readonly -= block_group->key.offset; 8522 block_group->space_info->bytes_readonly -= block_group->key.offset;
8523 block_group->space_info->disk_total -= block_group->key.offset * factor;
8140 spin_unlock(&block_group->space_info->lock); 8524 spin_unlock(&block_group->space_info->lock);
8141 8525
8526 memcpy(&key, &block_group->key, sizeof(key));
8527
8142 btrfs_clear_space_info_full(root->fs_info); 8528 btrfs_clear_space_info_full(root->fs_info);
8143 8529
8144 btrfs_put_block_group(block_group); 8530 btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d74e6af9b53a..eac10e3260a9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
104 struct address_space *mapping, gfp_t mask) 104 struct address_space *mapping, gfp_t mask)
105{ 105{
106 tree->state = RB_ROOT; 106 tree->state = RB_ROOT;
107 tree->buffer = RB_ROOT; 107 INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
108 tree->ops = NULL; 108 tree->ops = NULL;
109 tree->dirty_bytes = 0; 109 tree->dirty_bytes = 0;
110 spin_lock_init(&tree->lock); 110 spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
235 return ret; 235 return ret;
236} 236}
237 237
238static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
239 u64 offset, struct rb_node *node)
240{
241 struct rb_root *root = &tree->buffer;
242 struct rb_node **p = &root->rb_node;
243 struct rb_node *parent = NULL;
244 struct extent_buffer *eb;
245
246 while (*p) {
247 parent = *p;
248 eb = rb_entry(parent, struct extent_buffer, rb_node);
249
250 if (offset < eb->start)
251 p = &(*p)->rb_left;
252 else if (offset > eb->start)
253 p = &(*p)->rb_right;
254 else
255 return eb;
256 }
257
258 rb_link_node(node, parent, p);
259 rb_insert_color(node, root);
260 return NULL;
261}
262
263static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
264 u64 offset)
265{
266 struct rb_root *root = &tree->buffer;
267 struct rb_node *n = root->rb_node;
268 struct extent_buffer *eb;
269
270 while (n) {
271 eb = rb_entry(n, struct extent_buffer, rb_node);
272 if (offset < eb->start)
273 n = n->rb_left;
274 else if (offset > eb->start)
275 n = n->rb_right;
276 else
277 return eb;
278 }
279 return NULL;
280}
281
282static void merge_cb(struct extent_io_tree *tree, struct extent_state *new, 238static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
283 struct extent_state *other) 239 struct extent_state *other)
284{ 240{
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
1901 struct page *page = bvec->bv_page; 1857 struct page *page = bvec->bv_page;
1902 struct extent_io_tree *tree = bio->bi_private; 1858 struct extent_io_tree *tree = bio->bi_private;
1903 u64 start; 1859 u64 start;
1904 u64 end;
1905 1860
1906 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset; 1861 start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
1907 end = start + bvec->bv_len - 1;
1908 1862
1909 bio->bi_private = NULL; 1863 bio->bi_private = NULL;
1910 1864
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2204 u64 last_byte = i_size_read(inode); 2158 u64 last_byte = i_size_read(inode);
2205 u64 block_start; 2159 u64 block_start;
2206 u64 iosize; 2160 u64 iosize;
2207 u64 unlock_start;
2208 sector_t sector; 2161 sector_t sector;
2209 struct extent_state *cached_state = NULL; 2162 struct extent_state *cached_state = NULL;
2210 struct extent_map *em; 2163 struct extent_map *em;
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2329 if (tree->ops && tree->ops->writepage_end_io_hook) 2282 if (tree->ops && tree->ops->writepage_end_io_hook)
2330 tree->ops->writepage_end_io_hook(page, start, 2283 tree->ops->writepage_end_io_hook(page, start,
2331 page_end, NULL, 1); 2284 page_end, NULL, 1);
2332 unlock_start = page_end + 1;
2333 goto done; 2285 goto done;
2334 } 2286 }
2335 2287
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2340 if (tree->ops && tree->ops->writepage_end_io_hook) 2292 if (tree->ops && tree->ops->writepage_end_io_hook)
2341 tree->ops->writepage_end_io_hook(page, cur, 2293 tree->ops->writepage_end_io_hook(page, cur,
2342 page_end, NULL, 1); 2294 page_end, NULL, 1);
2343 unlock_start = page_end + 1;
2344 break; 2295 break;
2345 } 2296 }
2346 em = epd->get_extent(inode, page, pg_offset, cur, 2297 em = epd->get_extent(inode, page, pg_offset, cur,
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
2387 2338
2388 cur += iosize; 2339 cur += iosize;
2389 pg_offset += iosize; 2340 pg_offset += iosize;
2390 unlock_start = cur;
2391 continue; 2341 continue;
2392 } 2342 }
2393 /* leave this out until we have a page_mkwrite call */ 2343 /* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2473 pgoff_t index; 2423 pgoff_t index;
2474 pgoff_t end; /* Inclusive */ 2424 pgoff_t end; /* Inclusive */
2475 int scanned = 0; 2425 int scanned = 0;
2476 int range_whole = 0;
2477 2426
2478 pagevec_init(&pvec, 0); 2427 pagevec_init(&pvec, 0);
2479 if (wbc->range_cyclic) { 2428 if (wbc->range_cyclic) {
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
2482 } else { 2431 } else {
2483 index = wbc->range_start >> PAGE_CACHE_SHIFT; 2432 index = wbc->range_start >> PAGE_CACHE_SHIFT;
2484 end = wbc->range_end >> PAGE_CACHE_SHIFT; 2433 end = wbc->range_end >> PAGE_CACHE_SHIFT;
2485 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2486 range_whole = 1;
2487 scanned = 1; 2434 scanned = 1;
2488 } 2435 }
2489retry: 2436retry:
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
2823 NULL, 1, 2770 NULL, 1,
2824 end_bio_extent_preparewrite, 0, 2771 end_bio_extent_preparewrite, 0,
2825 0, 0); 2772 0, 0);
2773 if (ret && !err)
2774 err = ret;
2826 iocount++; 2775 iocount++;
2827 block_start = block_start + iosize; 2776 block_start = block_start + iosize;
2828 } else { 2777 } else {
@@ -3104,6 +3053,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
3104 kmem_cache_free(extent_buffer_cache, eb); 3053 kmem_cache_free(extent_buffer_cache, eb);
3105} 3054}
3106 3055
3056/*
3057 * Helper for releasing extent buffer page.
3058 */
3059static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
3060 unsigned long start_idx)
3061{
3062 unsigned long index;
3063 struct page *page;
3064
3065 if (!eb->first_page)
3066 return;
3067
3068 index = num_extent_pages(eb->start, eb->len);
3069 if (start_idx >= index)
3070 return;
3071
3072 do {
3073 index--;
3074 page = extent_buffer_page(eb, index);
3075 if (page)
3076 page_cache_release(page);
3077 } while (index != start_idx);
3078}
3079
3080/*
3081 * Helper for releasing the extent buffer.
3082 */
3083static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
3084{
3085 btrfs_release_extent_buffer_page(eb, 0);
3086 __free_extent_buffer(eb);
3087}
3088
3107struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, 3089struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3108 u64 start, unsigned long len, 3090 u64 start, unsigned long len,
3109 struct page *page0, 3091 struct page *page0,
@@ -3117,16 +3099,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3117 struct page *p; 3099 struct page *p;
3118 struct address_space *mapping = tree->mapping; 3100 struct address_space *mapping = tree->mapping;
3119 int uptodate = 1; 3101 int uptodate = 1;
3102 int ret;
3120 3103
3121 spin_lock(&tree->buffer_lock); 3104 rcu_read_lock();
3122 eb = buffer_search(tree, start); 3105 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3123 if (eb) { 3106 if (eb && atomic_inc_not_zero(&eb->refs)) {
3124 atomic_inc(&eb->refs); 3107 rcu_read_unlock();
3125 spin_unlock(&tree->buffer_lock);
3126 mark_page_accessed(eb->first_page); 3108 mark_page_accessed(eb->first_page);
3127 return eb; 3109 return eb;
3128 } 3110 }
3129 spin_unlock(&tree->buffer_lock); 3111 rcu_read_unlock();
3130 3112
3131 eb = __alloc_extent_buffer(tree, start, len, mask); 3113 eb = __alloc_extent_buffer(tree, start, len, mask);
3132 if (!eb) 3114 if (!eb)
@@ -3165,26 +3147,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
3165 if (uptodate) 3147 if (uptodate)
3166 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3148 set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3167 3149
3150 ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
3151 if (ret)
3152 goto free_eb;
3153
3168 spin_lock(&tree->buffer_lock); 3154 spin_lock(&tree->buffer_lock);
3169 exists = buffer_tree_insert(tree, start, &eb->rb_node); 3155 ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
3170 if (exists) { 3156 if (ret == -EEXIST) {
3157 exists = radix_tree_lookup(&tree->buffer,
3158 start >> PAGE_CACHE_SHIFT);
3171 /* add one reference for the caller */ 3159 /* add one reference for the caller */
3172 atomic_inc(&exists->refs); 3160 atomic_inc(&exists->refs);
3173 spin_unlock(&tree->buffer_lock); 3161 spin_unlock(&tree->buffer_lock);
3162 radix_tree_preload_end();
3174 goto free_eb; 3163 goto free_eb;
3175 } 3164 }
3176 /* add one reference for the tree */ 3165 /* add one reference for the tree */
3177 atomic_inc(&eb->refs); 3166 atomic_inc(&eb->refs);
3178 spin_unlock(&tree->buffer_lock); 3167 spin_unlock(&tree->buffer_lock);
3168 radix_tree_preload_end();
3179 return eb; 3169 return eb;
3180 3170
3181free_eb: 3171free_eb:
3182 if (!atomic_dec_and_test(&eb->refs)) 3172 if (!atomic_dec_and_test(&eb->refs))
3183 return exists; 3173 return exists;
3184 for (index = 1; index < i; index++) 3174 btrfs_release_extent_buffer(eb);
3185 page_cache_release(extent_buffer_page(eb, index));
3186 page_cache_release(extent_buffer_page(eb, 0));
3187 __free_extent_buffer(eb);
3188 return exists; 3175 return exists;
3189} 3176}
3190 3177
@@ -3194,16 +3181,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
3194{ 3181{
3195 struct extent_buffer *eb; 3182 struct extent_buffer *eb;
3196 3183
3197 spin_lock(&tree->buffer_lock); 3184 rcu_read_lock();
3198 eb = buffer_search(tree, start); 3185 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3199 if (eb) 3186 if (eb && atomic_inc_not_zero(&eb->refs)) {
3200 atomic_inc(&eb->refs); 3187 rcu_read_unlock();
3201 spin_unlock(&tree->buffer_lock);
3202
3203 if (eb)
3204 mark_page_accessed(eb->first_page); 3188 mark_page_accessed(eb->first_page);
3189 return eb;
3190 }
3191 rcu_read_unlock();
3205 3192
3206 return eb; 3193 return NULL;
3207} 3194}
3208 3195
3209void free_extent_buffer(struct extent_buffer *eb) 3196void free_extent_buffer(struct extent_buffer *eb)
@@ -3833,34 +3820,45 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
3833 } 3820 }
3834} 3821}
3835 3822
3823static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
3824{
3825 struct extent_buffer *eb =
3826 container_of(head, struct extent_buffer, rcu_head);
3827
3828 btrfs_release_extent_buffer(eb);
3829}
3830
3836int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) 3831int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
3837{ 3832{
3838 u64 start = page_offset(page); 3833 u64 start = page_offset(page);
3839 struct extent_buffer *eb; 3834 struct extent_buffer *eb;
3840 int ret = 1; 3835 int ret = 1;
3841 unsigned long i;
3842 unsigned long num_pages;
3843 3836
3844 spin_lock(&tree->buffer_lock); 3837 spin_lock(&tree->buffer_lock);
3845 eb = buffer_search(tree, start); 3838 eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3846 if (!eb) 3839 if (!eb)
3847 goto out; 3840 goto out;
3848 3841
3849 if (atomic_read(&eb->refs) > 1) { 3842 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
3850 ret = 0; 3843 ret = 0;
3851 goto out; 3844 goto out;
3852 } 3845 }
3853 if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { 3846
3847 /*
3848 * set @eb->refs to 0 if it is already 1, and then release the @eb.
3849 * Or go back.
3850 */
3851 if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
3854 ret = 0; 3852 ret = 0;
3855 goto out; 3853 goto out;
3856 } 3854 }
3857 /* at this point we can safely release the extent buffer */ 3855
3858 num_pages = num_extent_pages(eb->start, eb->len); 3856 radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
3859 for (i = 0; i < num_pages; i++)
3860 page_cache_release(extent_buffer_page(eb, i));
3861 rb_erase(&eb->rb_node, &tree->buffer);
3862 __free_extent_buffer(eb);
3863out: 3857out:
3864 spin_unlock(&tree->buffer_lock); 3858 spin_unlock(&tree->buffer_lock);
3859
3860 /* at this point we can safely release the extent buffer */
3861 if (atomic_read(&eb->refs) == 0)
3862 call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
3865 return ret; 3863 return ret;
3866} 3864}
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5691c7b590da..1c6d4f342ef7 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
85 85
86struct extent_io_tree { 86struct extent_io_tree {
87 struct rb_root state; 87 struct rb_root state;
88 struct rb_root buffer; 88 struct radix_tree_root buffer;
89 struct address_space *mapping; 89 struct address_space *mapping;
90 u64 dirty_bytes; 90 u64 dirty_bytes;
91 spinlock_t lock; 91 spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
123 unsigned long bflags; 123 unsigned long bflags;
124 atomic_t refs; 124 atomic_t refs;
125 struct list_head leak_list; 125 struct list_head leak_list;
126 struct rb_node rb_node; 126 struct rcu_head rcu_head;
127 127
128 /* the spinlock is used to protect most operations */ 128 /* the spinlock is used to protect most operations */
129 spinlock_t lock; 129 spinlock_t lock;
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 454ca52d6451..23cb8da3ff66 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
335 goto out; 335 goto out;
336 } 336 }
337 if (IS_ERR(rb_node)) { 337 if (IS_ERR(rb_node)) {
338 em = ERR_PTR(PTR_ERR(rb_node)); 338 em = ERR_CAST(rb_node);
339 goto out; 339 goto out;
340 } 340 }
341 em = rb_entry(rb_node, struct extent_map, rb_node); 341 em = rb_entry(rb_node, struct extent_map, rb_node);
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
384 goto out; 384 goto out;
385 } 385 }
386 if (IS_ERR(rb_node)) { 386 if (IS_ERR(rb_node)) {
387 em = ERR_PTR(PTR_ERR(rb_node)); 387 em = ERR_CAST(rb_node);
388 goto out; 388 goto out;
389 } 389 }
390 em = rb_entry(rb_node, struct extent_map, rb_node); 390 em = rb_entry(rb_node, struct extent_map, rb_node);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f488fac04d99..22ee0dc2e6b8 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,761 @@
23#include "ctree.h" 23#include "ctree.h"
24#include "free-space-cache.h" 24#include "free-space-cache.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h"
26 27
27#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
28#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
29 30
31static void recalculate_thresholds(struct btrfs_block_group_cache
32 *block_group);
33static int link_free_space(struct btrfs_block_group_cache *block_group,
34 struct btrfs_free_space *info);
35
36struct inode *lookup_free_space_inode(struct btrfs_root *root,
37 struct btrfs_block_group_cache
38 *block_group, struct btrfs_path *path)
39{
40 struct btrfs_key key;
41 struct btrfs_key location;
42 struct btrfs_disk_key disk_key;
43 struct btrfs_free_space_header *header;
44 struct extent_buffer *leaf;
45 struct inode *inode = NULL;
46 int ret;
47
48 spin_lock(&block_group->lock);
49 if (block_group->inode)
50 inode = igrab(block_group->inode);
51 spin_unlock(&block_group->lock);
52 if (inode)
53 return inode;
54
55 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
56 key.offset = block_group->key.objectid;
57 key.type = 0;
58
59 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
60 if (ret < 0)
61 return ERR_PTR(ret);
62 if (ret > 0) {
63 btrfs_release_path(root, path);
64 return ERR_PTR(-ENOENT);
65 }
66
67 leaf = path->nodes[0];
68 header = btrfs_item_ptr(leaf, path->slots[0],
69 struct btrfs_free_space_header);
70 btrfs_free_space_key(leaf, header, &disk_key);
71 btrfs_disk_key_to_cpu(&location, &disk_key);
72 btrfs_release_path(root, path);
73
74 inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
75 if (!inode)
76 return ERR_PTR(-ENOENT);
77 if (IS_ERR(inode))
78 return inode;
79 if (is_bad_inode(inode)) {
80 iput(inode);
81 return ERR_PTR(-ENOENT);
82 }
83
84 spin_lock(&block_group->lock);
85 if (!root->fs_info->closing) {
86 block_group->inode = igrab(inode);
87 block_group->iref = 1;
88 }
89 spin_unlock(&block_group->lock);
90
91 return inode;
92}
93
94int create_free_space_inode(struct btrfs_root *root,
95 struct btrfs_trans_handle *trans,
96 struct btrfs_block_group_cache *block_group,
97 struct btrfs_path *path)
98{
99 struct btrfs_key key;
100 struct btrfs_disk_key disk_key;
101 struct btrfs_free_space_header *header;
102 struct btrfs_inode_item *inode_item;
103 struct extent_buffer *leaf;
104 u64 objectid;
105 int ret;
106
107 ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
108 if (ret < 0)
109 return ret;
110
111 ret = btrfs_insert_empty_inode(trans, root, path, objectid);
112 if (ret)
113 return ret;
114
115 leaf = path->nodes[0];
116 inode_item = btrfs_item_ptr(leaf, path->slots[0],
117 struct btrfs_inode_item);
118 btrfs_item_key(leaf, &disk_key, path->slots[0]);
119 memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
120 sizeof(*inode_item));
121 btrfs_set_inode_generation(leaf, inode_item, trans->transid);
122 btrfs_set_inode_size(leaf, inode_item, 0);
123 btrfs_set_inode_nbytes(leaf, inode_item, 0);
124 btrfs_set_inode_uid(leaf, inode_item, 0);
125 btrfs_set_inode_gid(leaf, inode_item, 0);
126 btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
127 btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
128 BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
129 btrfs_set_inode_nlink(leaf, inode_item, 1);
130 btrfs_set_inode_transid(leaf, inode_item, trans->transid);
131 btrfs_set_inode_block_group(leaf, inode_item,
132 block_group->key.objectid);
133 btrfs_mark_buffer_dirty(leaf);
134 btrfs_release_path(root, path);
135
136 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
137 key.offset = block_group->key.objectid;
138 key.type = 0;
139
140 ret = btrfs_insert_empty_item(trans, root, path, &key,
141 sizeof(struct btrfs_free_space_header));
142 if (ret < 0) {
143 btrfs_release_path(root, path);
144 return ret;
145 }
146 leaf = path->nodes[0];
147 header = btrfs_item_ptr(leaf, path->slots[0],
148 struct btrfs_free_space_header);
149 memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
150 btrfs_set_free_space_key(leaf, header, &disk_key);
151 btrfs_mark_buffer_dirty(leaf);
152 btrfs_release_path(root, path);
153
154 return 0;
155}
156
157int btrfs_truncate_free_space_cache(struct btrfs_root *root,
158 struct btrfs_trans_handle *trans,
159 struct btrfs_path *path,
160 struct inode *inode)
161{
162 loff_t oldsize;
163 int ret = 0;
164
165 trans->block_rsv = root->orphan_block_rsv;
166 ret = btrfs_block_rsv_check(trans, root,
167 root->orphan_block_rsv,
168 0, 5);
169 if (ret)
170 return ret;
171
172 oldsize = i_size_read(inode);
173 btrfs_i_size_write(inode, 0);
174 truncate_pagecache(inode, oldsize, 0);
175
176 /*
177 * We don't need an orphan item because truncating the free space cache
178 * will never be split across transactions.
179 */
180 ret = btrfs_truncate_inode_items(trans, root, inode,
181 0, BTRFS_EXTENT_DATA_KEY);
182 if (ret) {
183 WARN_ON(1);
184 return ret;
185 }
186
187 return btrfs_update_inode(trans, root, inode);
188}
189
190static int readahead_cache(struct inode *inode)
191{
192 struct file_ra_state *ra;
193 unsigned long last_index;
194
195 ra = kzalloc(sizeof(*ra), GFP_NOFS);
196 if (!ra)
197 return -ENOMEM;
198
199 file_ra_state_init(ra, inode->i_mapping);
200 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
201
202 page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
203
204 kfree(ra);
205
206 return 0;
207}
208
209int load_free_space_cache(struct btrfs_fs_info *fs_info,
210 struct btrfs_block_group_cache *block_group)
211{
212 struct btrfs_root *root = fs_info->tree_root;
213 struct inode *inode;
214 struct btrfs_free_space_header *header;
215 struct extent_buffer *leaf;
216 struct page *page;
217 struct btrfs_path *path;
218 u32 *checksums = NULL, *crc;
219 char *disk_crcs = NULL;
220 struct btrfs_key key;
221 struct list_head bitmaps;
222 u64 num_entries;
223 u64 num_bitmaps;
224 u64 generation;
225 u32 cur_crc = ~(u32)0;
226 pgoff_t index = 0;
227 unsigned long first_page_offset;
228 int num_checksums;
229 int ret = 0;
230
231 /*
232 * If we're unmounting then just return, since this does a search on the
233 * normal root and not the commit root and we could deadlock.
234 */
235 smp_mb();
236 if (fs_info->closing)
237 return 0;
238
239 /*
240 * If this block group has been marked to be cleared for one reason or
241 * another then we can't trust the on disk cache, so just return.
242 */
243 spin_lock(&block_group->lock);
244 if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
245 spin_unlock(&block_group->lock);
246 return 0;
247 }
248 spin_unlock(&block_group->lock);
249
250 INIT_LIST_HEAD(&bitmaps);
251
252 path = btrfs_alloc_path();
253 if (!path)
254 return 0;
255
256 inode = lookup_free_space_inode(root, block_group, path);
257 if (IS_ERR(inode)) {
258 btrfs_free_path(path);
259 return 0;
260 }
261
262 /* Nothing in the space cache, goodbye */
263 if (!i_size_read(inode)) {
264 btrfs_free_path(path);
265 goto out;
266 }
267
268 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
269 key.offset = block_group->key.objectid;
270 key.type = 0;
271
272 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
273 if (ret) {
274 btrfs_free_path(path);
275 goto out;
276 }
277
278 leaf = path->nodes[0];
279 header = btrfs_item_ptr(leaf, path->slots[0],
280 struct btrfs_free_space_header);
281 num_entries = btrfs_free_space_entries(leaf, header);
282 num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
283 generation = btrfs_free_space_generation(leaf, header);
284 btrfs_free_path(path);
285
286 if (BTRFS_I(inode)->generation != generation) {
287 printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
288 " not match free space cache generation (%llu) for "
289 "block group %llu\n",
290 (unsigned long long)BTRFS_I(inode)->generation,
291 (unsigned long long)generation,
292 (unsigned long long)block_group->key.objectid);
293 goto out;
294 }
295
296 if (!num_entries)
297 goto out;
298
299 /* Setup everything for doing checksumming */
300 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
301 checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
302 if (!checksums)
303 goto out;
304 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
305 disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
306 if (!disk_crcs)
307 goto out;
308
309 ret = readahead_cache(inode);
310 if (ret) {
311 ret = 0;
312 goto out;
313 }
314
315 while (1) {
316 struct btrfs_free_space_entry *entry;
317 struct btrfs_free_space *e;
318 void *addr;
319 unsigned long offset = 0;
320 unsigned long start_offset = 0;
321 int need_loop = 0;
322
323 if (!num_entries && !num_bitmaps)
324 break;
325
326 if (index == 0) {
327 start_offset = first_page_offset;
328 offset = start_offset;
329 }
330
331 page = grab_cache_page(inode->i_mapping, index);
332 if (!page) {
333 ret = 0;
334 goto free_cache;
335 }
336
337 if (!PageUptodate(page)) {
338 btrfs_readpage(NULL, page);
339 lock_page(page);
340 if (!PageUptodate(page)) {
341 unlock_page(page);
342 page_cache_release(page);
343 printk(KERN_ERR "btrfs: error reading free "
344 "space cache: %llu\n",
345 (unsigned long long)
346 block_group->key.objectid);
347 goto free_cache;
348 }
349 }
350 addr = kmap(page);
351
352 if (index == 0) {
353 u64 *gen;
354
355 memcpy(disk_crcs, addr, first_page_offset);
356 gen = addr + (sizeof(u32) * num_checksums);
357 if (*gen != BTRFS_I(inode)->generation) {
358 printk(KERN_ERR "btrfs: space cache generation"
359 " (%llu) does not match inode (%llu) "
360 "for block group %llu\n",
361 (unsigned long long)*gen,
362 (unsigned long long)
363 BTRFS_I(inode)->generation,
364 (unsigned long long)
365 block_group->key.objectid);
366 kunmap(page);
367 unlock_page(page);
368 page_cache_release(page);
369 goto free_cache;
370 }
371 crc = (u32 *)disk_crcs;
372 }
373 entry = addr + start_offset;
374
375 /* First lets check our crc before we do anything fun */
376 cur_crc = ~(u32)0;
377 cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
378 PAGE_CACHE_SIZE - start_offset);
379 btrfs_csum_final(cur_crc, (char *)&cur_crc);
380 if (cur_crc != *crc) {
381 printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
382 "block group %llu\n", index,
383 (unsigned long long)block_group->key.objectid);
384 kunmap(page);
385 unlock_page(page);
386 page_cache_release(page);
387 goto free_cache;
388 }
389 crc++;
390
391 while (1) {
392 if (!num_entries)
393 break;
394
395 need_loop = 1;
396 e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
397 if (!e) {
398 kunmap(page);
399 unlock_page(page);
400 page_cache_release(page);
401 goto free_cache;
402 }
403
404 e->offset = le64_to_cpu(entry->offset);
405 e->bytes = le64_to_cpu(entry->bytes);
406 if (!e->bytes) {
407 kunmap(page);
408 kfree(e);
409 unlock_page(page);
410 page_cache_release(page);
411 goto free_cache;
412 }
413
414 if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
415 spin_lock(&block_group->tree_lock);
416 ret = link_free_space(block_group, e);
417 spin_unlock(&block_group->tree_lock);
418 BUG_ON(ret);
419 } else {
420 e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
421 if (!e->bitmap) {
422 kunmap(page);
423 kfree(e);
424 unlock_page(page);
425 page_cache_release(page);
426 goto free_cache;
427 }
428 spin_lock(&block_group->tree_lock);
429 ret = link_free_space(block_group, e);
430 block_group->total_bitmaps++;
431 recalculate_thresholds(block_group);
432 spin_unlock(&block_group->tree_lock);
433 list_add_tail(&e->list, &bitmaps);
434 }
435
436 num_entries--;
437 offset += sizeof(struct btrfs_free_space_entry);
438 if (offset + sizeof(struct btrfs_free_space_entry) >=
439 PAGE_CACHE_SIZE)
440 break;
441 entry++;
442 }
443
444 /*
445 * We read an entry out of this page, we need to move on to the
446 * next page.
447 */
448 if (need_loop) {
449 kunmap(page);
450 goto next;
451 }
452
453 /*
454 * We add the bitmaps at the end of the entries in order that
455 * the bitmap entries are added to the cache.
456 */
457 e = list_entry(bitmaps.next, struct btrfs_free_space, list);
458 list_del_init(&e->list);
459 memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
460 kunmap(page);
461 num_bitmaps--;
462next:
463 unlock_page(page);
464 page_cache_release(page);
465 index++;
466 }
467
468 ret = 1;
469out:
470 kfree(checksums);
471 kfree(disk_crcs);
472 iput(inode);
473 return ret;
474
475free_cache:
476 /* This cache is bogus, make sure it gets cleared */
477 spin_lock(&block_group->lock);
478 block_group->disk_cache_state = BTRFS_DC_CLEAR;
479 spin_unlock(&block_group->lock);
480 btrfs_remove_free_space_cache(block_group);
481 goto out;
482}
483
484int btrfs_write_out_cache(struct btrfs_root *root,
485 struct btrfs_trans_handle *trans,
486 struct btrfs_block_group_cache *block_group,
487 struct btrfs_path *path)
488{
489 struct btrfs_free_space_header *header;
490 struct extent_buffer *leaf;
491 struct inode *inode;
492 struct rb_node *node;
493 struct list_head *pos, *n;
494 struct page *page;
495 struct extent_state *cached_state = NULL;
496 struct list_head bitmap_list;
497 struct btrfs_key key;
498 u64 bytes = 0;
499 u32 *crc, *checksums;
500 pgoff_t index = 0, last_index = 0;
501 unsigned long first_page_offset;
502 int num_checksums;
503 int entries = 0;
504 int bitmaps = 0;
505 int ret = 0;
506
507 root = root->fs_info->tree_root;
508
509 INIT_LIST_HEAD(&bitmap_list);
510
511 spin_lock(&block_group->lock);
512 if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
513 spin_unlock(&block_group->lock);
514 return 0;
515 }
516 spin_unlock(&block_group->lock);
517
518 inode = lookup_free_space_inode(root, block_group, path);
519 if (IS_ERR(inode))
520 return 0;
521
522 if (!i_size_read(inode)) {
523 iput(inode);
524 return 0;
525 }
526
527 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
528 filemap_write_and_wait(inode->i_mapping);
529 btrfs_wait_ordered_range(inode, inode->i_size &
530 ~(root->sectorsize - 1), (u64)-1);
531
532 /* We need a checksum per page. */
533 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
534 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
535 if (!crc) {
536 iput(inode);
537 return 0;
538 }
539
540 /* Since the first page has all of our checksums and our generation we
541 * need to calculate the offset into the page that we can start writing
542 * our entries.
543 */
544 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
545
546 node = rb_first(&block_group->free_space_offset);
547 if (!node)
548 goto out_free;
549
550 /*
551 * Lock all pages first so we can lock the extent safely.
552 *
553 * NOTE: Because we hold the ref the entire time we're going to write to
554 * the page find_get_page should never fail, so we don't do a check
555 * after find_get_page at this point. Just putting this here so people
556 * know and don't freak out.
557 */
558 while (index <= last_index) {
559 page = grab_cache_page(inode->i_mapping, index);
560 if (!page) {
561 pgoff_t i = 0;
562
563 while (i < index) {
564 page = find_get_page(inode->i_mapping, i);
565 unlock_page(page);
566 page_cache_release(page);
567 page_cache_release(page);
568 i++;
569 }
570 goto out_free;
571 }
572 index++;
573 }
574
575 index = 0;
576 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
577 0, &cached_state, GFP_NOFS);
578
579 /* Write out the extent entries */
580 do {
581 struct btrfs_free_space_entry *entry;
582 void *addr;
583 unsigned long offset = 0;
584 unsigned long start_offset = 0;
585
586 if (index == 0) {
587 start_offset = first_page_offset;
588 offset = start_offset;
589 }
590
591 page = find_get_page(inode->i_mapping, index);
592
593 addr = kmap(page);
594 entry = addr + start_offset;
595
596 memset(addr, 0, PAGE_CACHE_SIZE);
597 while (1) {
598 struct btrfs_free_space *e;
599
600 e = rb_entry(node, struct btrfs_free_space, offset_index);
601 entries++;
602
603 entry->offset = cpu_to_le64(e->offset);
604 entry->bytes = cpu_to_le64(e->bytes);
605 if (e->bitmap) {
606 entry->type = BTRFS_FREE_SPACE_BITMAP;
607 list_add_tail(&e->list, &bitmap_list);
608 bitmaps++;
609 } else {
610 entry->type = BTRFS_FREE_SPACE_EXTENT;
611 }
612 node = rb_next(node);
613 if (!node)
614 break;
615 offset += sizeof(struct btrfs_free_space_entry);
616 if (offset + sizeof(struct btrfs_free_space_entry) >=
617 PAGE_CACHE_SIZE)
618 break;
619 entry++;
620 }
621 *crc = ~(u32)0;
622 *crc = btrfs_csum_data(root, addr + start_offset, *crc,
623 PAGE_CACHE_SIZE - start_offset);
624 kunmap(page);
625
626 btrfs_csum_final(*crc, (char *)crc);
627 crc++;
628
629 bytes += PAGE_CACHE_SIZE;
630
631 ClearPageChecked(page);
632 set_page_extent_mapped(page);
633 SetPageUptodate(page);
634 set_page_dirty(page);
635
636 /*
637 * We need to release our reference we got for grab_cache_page,
638 * except for the first page which will hold our checksums, we
639 * do that below.
640 */
641 if (index != 0) {
642 unlock_page(page);
643 page_cache_release(page);
644 }
645
646 page_cache_release(page);
647
648 index++;
649 } while (node);
650
651 /* Write out the bitmaps */
652 list_for_each_safe(pos, n, &bitmap_list) {
653 void *addr;
654 struct btrfs_free_space *entry =
655 list_entry(pos, struct btrfs_free_space, list);
656
657 page = find_get_page(inode->i_mapping, index);
658
659 addr = kmap(page);
660 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
661 *crc = ~(u32)0;
662 *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
663 kunmap(page);
664 btrfs_csum_final(*crc, (char *)crc);
665 crc++;
666 bytes += PAGE_CACHE_SIZE;
667
668 ClearPageChecked(page);
669 set_page_extent_mapped(page);
670 SetPageUptodate(page);
671 set_page_dirty(page);
672 unlock_page(page);
673 page_cache_release(page);
674 page_cache_release(page);
675 list_del_init(&entry->list);
676 index++;
677 }
678
679 /* Zero out the rest of the pages just to make sure */
680 while (index <= last_index) {
681 void *addr;
682
683 page = find_get_page(inode->i_mapping, index);
684
685 addr = kmap(page);
686 memset(addr, 0, PAGE_CACHE_SIZE);
687 kunmap(page);
688 ClearPageChecked(page);
689 set_page_extent_mapped(page);
690 SetPageUptodate(page);
691 set_page_dirty(page);
692 unlock_page(page);
693 page_cache_release(page);
694 page_cache_release(page);
695 bytes += PAGE_CACHE_SIZE;
696 index++;
697 }
698
699 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
700
701 /* Write the checksums and trans id to the first page */
702 {
703 void *addr;
704 u64 *gen;
705
706 page = find_get_page(inode->i_mapping, 0);
707
708 addr = kmap(page);
709 memcpy(addr, checksums, sizeof(u32) * num_checksums);
710 gen = addr + (sizeof(u32) * num_checksums);
711 *gen = trans->transid;
712 kunmap(page);
713 ClearPageChecked(page);
714 set_page_extent_mapped(page);
715 SetPageUptodate(page);
716 set_page_dirty(page);
717 unlock_page(page);
718 page_cache_release(page);
719 page_cache_release(page);
720 }
721 BTRFS_I(inode)->generation = trans->transid;
722
723 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
724 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
725
726 filemap_write_and_wait(inode->i_mapping);
727
728 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
729 key.offset = block_group->key.objectid;
730 key.type = 0;
731
732 ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
733 if (ret < 0) {
734 ret = 0;
735 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
736 EXTENT_DIRTY | EXTENT_DELALLOC |
737 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
738 goto out_free;
739 }
740 leaf = path->nodes[0];
741 if (ret > 0) {
742 struct btrfs_key found_key;
743 BUG_ON(!path->slots[0]);
744 path->slots[0]--;
745 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
746 if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
747 found_key.offset != block_group->key.objectid) {
748 ret = 0;
749 clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
750 EXTENT_DIRTY | EXTENT_DELALLOC |
751 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
752 GFP_NOFS);
753 btrfs_release_path(root, path);
754 goto out_free;
755 }
756 }
757 header = btrfs_item_ptr(leaf, path->slots[0],
758 struct btrfs_free_space_header);
759 btrfs_set_free_space_entries(leaf, header, entries);
760 btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
761 btrfs_set_free_space_generation(leaf, header, trans->transid);
762 btrfs_mark_buffer_dirty(leaf);
763 btrfs_release_path(root, path);
764
765 ret = 1;
766
767out_free:
768 if (ret == 0) {
769 invalidate_inode_pages2_range(inode->i_mapping, 0, index);
770 spin_lock(&block_group->lock);
771 block_group->disk_cache_state = BTRFS_DC_ERROR;
772 spin_unlock(&block_group->lock);
773 BTRFS_I(inode)->generation = 0;
774 }
775 kfree(checksums);
776 btrfs_update_inode(trans, root, inode);
777 iput(inode);
778 return ret;
779}
780
30static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 781static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
31 u64 offset) 782 u64 offset)
32{ 783{
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 890a8e79011b..e49ca5c321b5 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,24 @@ struct btrfs_free_space {
27 struct list_head list; 27 struct list_head list;
28}; 28};
29 29
30struct inode *lookup_free_space_inode(struct btrfs_root *root,
31 struct btrfs_block_group_cache
32 *block_group, struct btrfs_path *path);
33int create_free_space_inode(struct btrfs_root *root,
34 struct btrfs_trans_handle *trans,
35 struct btrfs_block_group_cache *block_group,
36 struct btrfs_path *path);
37
38int btrfs_truncate_free_space_cache(struct btrfs_root *root,
39 struct btrfs_trans_handle *trans,
40 struct btrfs_path *path,
41 struct inode *inode);
42int load_free_space_cache(struct btrfs_fs_info *fs_info,
43 struct btrfs_block_group_cache *block_group);
44int btrfs_write_out_cache(struct btrfs_root *root,
45 struct btrfs_trans_handle *trans,
46 struct btrfs_block_group_cache *block_group,
47 struct btrfs_path *path);
30int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 48int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
31 u64 bytenr, u64 size); 49 u64 bytenr, u64 size);
32int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 50int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 64f99cf69ce0..558cac2dfa54 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
319 struct btrfs_root *root = BTRFS_I(inode)->root; 319 struct btrfs_root *root = BTRFS_I(inode)->root;
320 struct btrfs_trans_handle *trans; 320 struct btrfs_trans_handle *trans;
321 u64 num_bytes; 321 u64 num_bytes;
322 u64 orig_start;
323 u64 disk_num_bytes;
324 u64 blocksize = root->sectorsize; 322 u64 blocksize = root->sectorsize;
325 u64 actual_end; 323 u64 actual_end;
326 u64 isize = i_size_read(inode); 324 u64 isize = i_size_read(inode);
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
335 int i; 333 int i;
336 int will_compress; 334 int will_compress;
337 335
338 orig_start = start;
339
340 actual_end = min_t(u64, isize, end + 1); 336 actual_end = min_t(u64, isize, end + 1);
341again: 337again:
342 will_compress = 0; 338 will_compress = 0;
@@ -371,7 +367,6 @@ again:
371 total_compressed = min(total_compressed, max_uncompressed); 367 total_compressed = min(total_compressed, max_uncompressed);
372 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 368 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
373 num_bytes = max(blocksize, num_bytes); 369 num_bytes = max(blocksize, num_bytes);
374 disk_num_bytes = num_bytes;
375 total_in = 0; 370 total_in = 0;
376 ret = 0; 371 ret = 0;
377 372
@@ -467,7 +462,6 @@ again:
467 if (total_compressed >= total_in) { 462 if (total_compressed >= total_in) {
468 will_compress = 0; 463 will_compress = 0;
469 } else { 464 } else {
470 disk_num_bytes = total_compressed;
471 num_bytes = total_in; 465 num_bytes = total_in;
472 } 466 }
473 } 467 }
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
757 u64 disk_num_bytes; 751 u64 disk_num_bytes;
758 u64 cur_alloc_size; 752 u64 cur_alloc_size;
759 u64 blocksize = root->sectorsize; 753 u64 blocksize = root->sectorsize;
760 u64 actual_end;
761 u64 isize = i_size_read(inode);
762 struct btrfs_key ins; 754 struct btrfs_key ins;
763 struct extent_map *em; 755 struct extent_map *em;
764 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 756 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
765 int ret = 0; 757 int ret = 0;
766 758
759 BUG_ON(root == root->fs_info->tree_root);
767 trans = btrfs_join_transaction(root, 1); 760 trans = btrfs_join_transaction(root, 1);
768 BUG_ON(!trans); 761 BUG_ON(!trans);
769 btrfs_set_trans_block_group(trans, inode); 762 btrfs_set_trans_block_group(trans, inode);
770 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 763 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
771 764
772 actual_end = min_t(u64, isize, end + 1);
773
774 num_bytes = (end - start + blocksize) & ~(blocksize - 1); 765 num_bytes = (end - start + blocksize) & ~(blocksize - 1);
775 num_bytes = max(blocksize, num_bytes); 766 num_bytes = max(blocksize, num_bytes);
776 disk_num_bytes = num_bytes; 767 disk_num_bytes = num_bytes;
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
1035 int type; 1026 int type;
1036 int nocow; 1027 int nocow;
1037 int check_prev = 1; 1028 int check_prev = 1;
1029 bool nolock = false;
1038 1030
1039 path = btrfs_alloc_path(); 1031 path = btrfs_alloc_path();
1040 BUG_ON(!path); 1032 BUG_ON(!path);
1041 trans = btrfs_join_transaction(root, 1); 1033 if (root == root->fs_info->tree_root) {
1034 nolock = true;
1035 trans = btrfs_join_transaction_nolock(root, 1);
1036 } else {
1037 trans = btrfs_join_transaction(root, 1);
1038 }
1042 BUG_ON(!trans); 1039 BUG_ON(!trans);
1043 1040
1044 cow_start = (u64)-1; 1041 cow_start = (u64)-1;
@@ -1211,8 +1208,13 @@ out_check:
1211 BUG_ON(ret); 1208 BUG_ON(ret);
1212 } 1209 }
1213 1210
1214 ret = btrfs_end_transaction(trans, root); 1211 if (nolock) {
1215 BUG_ON(ret); 1212 ret = btrfs_end_transaction_nolock(trans, root);
1213 BUG_ON(ret);
1214 } else {
1215 ret = btrfs_end_transaction(trans, root);
1216 BUG_ON(ret);
1217 }
1216 btrfs_free_path(path); 1218 btrfs_free_path(path);
1217 return 0; 1219 return 0;
1218} 1220}
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
1289 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1291 if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1290 struct btrfs_root *root = BTRFS_I(inode)->root; 1292 struct btrfs_root *root = BTRFS_I(inode)->root;
1291 u64 len = state->end + 1 - state->start; 1293 u64 len = state->end + 1 - state->start;
1294 int do_list = (root->root_key.objectid !=
1295 BTRFS_ROOT_TREE_OBJECTID);
1292 1296
1293 if (*bits & EXTENT_FIRST_DELALLOC) 1297 if (*bits & EXTENT_FIRST_DELALLOC)
1294 *bits &= ~EXTENT_FIRST_DELALLOC; 1298 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
1298 spin_lock(&root->fs_info->delalloc_lock); 1302 spin_lock(&root->fs_info->delalloc_lock);
1299 BTRFS_I(inode)->delalloc_bytes += len; 1303 BTRFS_I(inode)->delalloc_bytes += len;
1300 root->fs_info->delalloc_bytes += len; 1304 root->fs_info->delalloc_bytes += len;
1301 if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1305 if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1302 list_add_tail(&BTRFS_I(inode)->delalloc_inodes, 1306 list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
1303 &root->fs_info->delalloc_inodes); 1307 &root->fs_info->delalloc_inodes);
1304 } 1308 }
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1321 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { 1325 if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
1322 struct btrfs_root *root = BTRFS_I(inode)->root; 1326 struct btrfs_root *root = BTRFS_I(inode)->root;
1323 u64 len = state->end + 1 - state->start; 1327 u64 len = state->end + 1 - state->start;
1328 int do_list = (root->root_key.objectid !=
1329 BTRFS_ROOT_TREE_OBJECTID);
1324 1330
1325 if (*bits & EXTENT_FIRST_DELALLOC) 1331 if (*bits & EXTENT_FIRST_DELALLOC)
1326 *bits &= ~EXTENT_FIRST_DELALLOC; 1332 *bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1330 if (*bits & EXTENT_DO_ACCOUNTING) 1336 if (*bits & EXTENT_DO_ACCOUNTING)
1331 btrfs_delalloc_release_metadata(inode, len); 1337 btrfs_delalloc_release_metadata(inode, len);
1332 1338
1333 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) 1339 if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
1340 && do_list)
1334 btrfs_free_reserved_data_space(inode, len); 1341 btrfs_free_reserved_data_space(inode, len);
1335 1342
1336 spin_lock(&root->fs_info->delalloc_lock); 1343 spin_lock(&root->fs_info->delalloc_lock);
1337 root->fs_info->delalloc_bytes -= len; 1344 root->fs_info->delalloc_bytes -= len;
1338 BTRFS_I(inode)->delalloc_bytes -= len; 1345 BTRFS_I(inode)->delalloc_bytes -= len;
1339 1346
1340 if (BTRFS_I(inode)->delalloc_bytes == 0 && 1347 if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
1341 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { 1348 !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
1342 list_del_init(&BTRFS_I(inode)->delalloc_inodes); 1349 list_del_init(&BTRFS_I(inode)->delalloc_inodes);
1343 } 1350 }
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
1372 1379
1373 if (map_length < length + size) 1380 if (map_length < length + size)
1374 return 1; 1381 return 1;
1375 return 0; 1382 return ret;
1376} 1383}
1377 1384
1378/* 1385/*
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
1426 1433
1427 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 1434 skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
1428 1435
1429 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); 1436 if (root == root->fs_info->tree_root)
1437 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
1438 else
1439 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
1430 BUG_ON(ret); 1440 BUG_ON(ret);
1431 1441
1432 if (!(rw & REQ_WRITE)) { 1442 if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1662 struct extent_state *cached_state = NULL; 1672 struct extent_state *cached_state = NULL;
1663 int compressed = 0; 1673 int compressed = 0;
1664 int ret; 1674 int ret;
1675 bool nolock = false;
1665 1676
1666 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start, 1677 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1667 end - start + 1); 1678 end - start + 1);
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1669 return 0; 1680 return 0;
1670 BUG_ON(!ordered_extent); 1681 BUG_ON(!ordered_extent);
1671 1682
1683 nolock = (root == root->fs_info->tree_root);
1684
1672 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1685 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
1673 BUG_ON(!list_empty(&ordered_extent->list)); 1686 BUG_ON(!list_empty(&ordered_extent->list));
1674 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); 1687 ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
1675 if (!ret) { 1688 if (!ret) {
1676 trans = btrfs_join_transaction(root, 1); 1689 if (nolock)
1690 trans = btrfs_join_transaction_nolock(root, 1);
1691 else
1692 trans = btrfs_join_transaction(root, 1);
1693 BUG_ON(!trans);
1677 btrfs_set_trans_block_group(trans, inode); 1694 btrfs_set_trans_block_group(trans, inode);
1678 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1695 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1679 ret = btrfs_update_inode(trans, root, inode); 1696 ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1686 ordered_extent->file_offset + ordered_extent->len - 1, 1703 ordered_extent->file_offset + ordered_extent->len - 1,
1687 0, &cached_state, GFP_NOFS); 1704 0, &cached_state, GFP_NOFS);
1688 1705
1689 trans = btrfs_join_transaction(root, 1); 1706 if (nolock)
1707 trans = btrfs_join_transaction_nolock(root, 1);
1708 else
1709 trans = btrfs_join_transaction(root, 1);
1690 btrfs_set_trans_block_group(trans, inode); 1710 btrfs_set_trans_block_group(trans, inode);
1691 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1711 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1692 1712
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1700 ordered_extent->len); 1720 ordered_extent->len);
1701 BUG_ON(ret); 1721 BUG_ON(ret);
1702 } else { 1722 } else {
1723 BUG_ON(root == root->fs_info->tree_root);
1703 ret = insert_reserved_file_extent(trans, inode, 1724 ret = insert_reserved_file_extent(trans, inode,
1704 ordered_extent->file_offset, 1725 ordered_extent->file_offset,
1705 ordered_extent->start, 1726 ordered_extent->start,
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1724 ret = btrfs_update_inode(trans, root, inode); 1745 ret = btrfs_update_inode(trans, root, inode);
1725 BUG_ON(ret); 1746 BUG_ON(ret);
1726out: 1747out:
1727 btrfs_delalloc_release_metadata(inode, ordered_extent->len); 1748 if (nolock) {
1728 if (trans) 1749 if (trans)
1729 btrfs_end_transaction(trans, root); 1750 btrfs_end_transaction_nolock(trans, root);
1751 } else {
1752 btrfs_delalloc_release_metadata(inode, ordered_extent->len);
1753 if (trans)
1754 btrfs_end_transaction(trans, root);
1755 }
1756
1730 /* once for us */ 1757 /* once for us */
1731 btrfs_put_ordered_extent(ordered_extent); 1758 btrfs_put_ordered_extent(ordered_extent);
1732 /* once for the tree */ 1759 /* once for the tree */
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2237{ 2264{
2238 struct btrfs_path *path; 2265 struct btrfs_path *path;
2239 struct extent_buffer *leaf; 2266 struct extent_buffer *leaf;
2240 struct btrfs_item *item;
2241 struct btrfs_key key, found_key; 2267 struct btrfs_key key, found_key;
2242 struct btrfs_trans_handle *trans; 2268 struct btrfs_trans_handle *trans;
2243 struct inode *inode; 2269 struct inode *inode;
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2275 2301
2276 /* pull out the item */ 2302 /* pull out the item */
2277 leaf = path->nodes[0]; 2303 leaf = path->nodes[0];
2278 item = btrfs_item_nr(leaf, path->slots[0]);
2279 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 2304 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2280 2305
2281 /* make sure the item matches what we want */ 2306 /* make sure the item matches what we want */
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
2651 2676
2652 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, 2677 ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
2653 dir, index); 2678 dir, index);
2654 BUG_ON(ret); 2679 if (ret == -ENOENT)
2680 ret = 0;
2655err: 2681err:
2656 btrfs_free_path(path); 2682 btrfs_free_path(path);
2657 if (ret) 2683 if (ret)
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
2672{ 2698{
2673 struct extent_buffer *eb; 2699 struct extent_buffer *eb;
2674 int level; 2700 int level;
2675 int ret;
2676 u64 refs = 1; 2701 u64 refs = 1;
2702 int uninitialized_var(ret);
2677 2703
2678 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2704 for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
2679 if (!path->nodes[level]) 2705 if (!path->nodes[level])
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
2686 if (refs > 1) 2712 if (refs > 1)
2687 return 1; 2713 return 1;
2688 } 2714 }
2689 return 0; 2715 return ret; /* XXX callers? */
2690} 2716}
2691 2717
2692/* 2718/*
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
3196 3222
3197 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY); 3223 BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
3198 3224
3199 if (root->ref_cows) 3225 if (root->ref_cows || root == root->fs_info->tree_root)
3200 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0); 3226 btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
3201 3227
3202 path = btrfs_alloc_path(); 3228 path = btrfs_alloc_path();
@@ -3344,7 +3370,8 @@ delete:
3344 } else { 3370 } else {
3345 break; 3371 break;
3346 } 3372 }
3347 if (found_extent && root->ref_cows) { 3373 if (found_extent && (root->ref_cows ||
3374 root == root->fs_info->tree_root)) {
3348 btrfs_set_path_blocking(path); 3375 btrfs_set_path_blocking(path);
3349 ret = btrfs_free_extent(trans, root, extent_start, 3376 ret = btrfs_free_extent(trans, root, extent_start,
3350 extent_num_bytes, 0, 3377 extent_num_bytes, 0,
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
3675 int ret; 3702 int ret;
3676 3703
3677 truncate_inode_pages(&inode->i_data, 0); 3704 truncate_inode_pages(&inode->i_data, 0);
3678 if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0) 3705 if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
3706 root == root->fs_info->tree_root))
3679 goto no_delete; 3707 goto no_delete;
3680 3708
3681 if (is_bad_inode(inode)) { 3709 if (is_bad_inode(inode)) {
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
3888 } 3916 }
3889 spin_unlock(&root->inode_lock); 3917 spin_unlock(&root->inode_lock);
3890 3918
3891 if (empty && btrfs_root_refs(&root->root_item) == 0) { 3919 /*
3920 * Free space cache has inodes in the tree root, but the tree root has a
3921 * root_refs of 0, so this could end up dropping the tree root as a
3922 * snapshot, so we need the extra !root->fs_info->tree_root check to
3923 * make sure we don't drop it.
3924 */
3925 if (empty && btrfs_root_refs(&root->root_item) == 0 &&
3926 root != root->fs_info->tree_root) {
3892 synchronize_srcu(&root->fs_info->subvol_srcu); 3927 synchronize_srcu(&root->fs_info->subvol_srcu);
3893 spin_lock(&root->inode_lock); 3928 spin_lock(&root->inode_lock);
3894 empty = RB_EMPTY_ROOT(&root->inode_tree); 3929 empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
4282 struct btrfs_root *root = BTRFS_I(inode)->root; 4317 struct btrfs_root *root = BTRFS_I(inode)->root;
4283 struct btrfs_trans_handle *trans; 4318 struct btrfs_trans_handle *trans;
4284 int ret = 0; 4319 int ret = 0;
4320 bool nolock = false;
4285 4321
4286 if (BTRFS_I(inode)->dummy_inode) 4322 if (BTRFS_I(inode)->dummy_inode)
4287 return 0; 4323 return 0;
4288 4324
4325 smp_mb();
4326 nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
4327
4289 if (wbc->sync_mode == WB_SYNC_ALL) { 4328 if (wbc->sync_mode == WB_SYNC_ALL) {
4290 trans = btrfs_join_transaction(root, 1); 4329 if (nolock)
4330 trans = btrfs_join_transaction_nolock(root, 1);
4331 else
4332 trans = btrfs_join_transaction(root, 1);
4291 btrfs_set_trans_block_group(trans, inode); 4333 btrfs_set_trans_block_group(trans, inode);
4292 ret = btrfs_commit_transaction(trans, root); 4334 if (nolock)
4335 ret = btrfs_end_transaction_nolock(trans, root);
4336 else
4337 ret = btrfs_commit_transaction(trans, root);
4293 } 4338 }
4294 return ret; 4339 return ret;
4295} 4340}
@@ -5645,7 +5690,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5645 struct btrfs_root *root = BTRFS_I(inode)->root; 5690 struct btrfs_root *root = BTRFS_I(inode)->root;
5646 struct btrfs_dio_private *dip; 5691 struct btrfs_dio_private *dip;
5647 struct bio_vec *bvec = bio->bi_io_vec; 5692 struct bio_vec *bvec = bio->bi_io_vec;
5648 u64 start;
5649 int skip_sum; 5693 int skip_sum;
5650 int write = rw & REQ_WRITE; 5694 int write = rw & REQ_WRITE;
5651 int ret = 0; 5695 int ret = 0;
@@ -5671,7 +5715,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
5671 dip->inode = inode; 5715 dip->inode = inode;
5672 dip->logical_offset = file_offset; 5716 dip->logical_offset = file_offset;
5673 5717
5674 start = dip->logical_offset;
5675 dip->bytes = 0; 5718 dip->bytes = 0;
5676 do { 5719 do {
5677 dip->bytes += bvec->bv_len; 5720 dip->bytes += bvec->bv_len;
@@ -6308,6 +6351,21 @@ void btrfs_destroy_inode(struct inode *inode)
6308 spin_unlock(&root->fs_info->ordered_extent_lock); 6351 spin_unlock(&root->fs_info->ordered_extent_lock);
6309 } 6352 }
6310 6353
6354 if (root == root->fs_info->tree_root) {
6355 struct btrfs_block_group_cache *block_group;
6356
6357 block_group = btrfs_lookup_block_group(root->fs_info,
6358 BTRFS_I(inode)->block_group);
6359 if (block_group && block_group->inode == inode) {
6360 spin_lock(&block_group->lock);
6361 block_group->inode = NULL;
6362 spin_unlock(&block_group->lock);
6363 btrfs_put_block_group(block_group);
6364 } else if (block_group) {
6365 btrfs_put_block_group(block_group);
6366 }
6367 }
6368
6311 spin_lock(&root->orphan_lock); 6369 spin_lock(&root->orphan_lock);
6312 if (!list_empty(&BTRFS_I(inode)->i_orphan)) { 6370 if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
6313 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", 6371 printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6340,7 +6398,8 @@ int btrfs_drop_inode(struct inode *inode)
6340{ 6398{
6341 struct btrfs_root *root = BTRFS_I(inode)->root; 6399 struct btrfs_root *root = BTRFS_I(inode)->root;
6342 6400
6343 if (btrfs_root_refs(&root->root_item) == 0) 6401 if (btrfs_root_refs(&root->root_item) == 0 &&
6402 root != root->fs_info->tree_root)
6344 return 1; 6403 return 1;
6345 else 6404 else
6346 return generic_drop_inode(inode); 6405 return generic_drop_inode(inode);
@@ -6609,7 +6668,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
6609 return 0; 6668 return 0;
6610} 6669}
6611 6670
6612int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) 6671int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
6672 int sync)
6613{ 6673{
6614 struct btrfs_inode *binode; 6674 struct btrfs_inode *binode;
6615 struct inode *inode = NULL; 6675 struct inode *inode = NULL;
@@ -6631,7 +6691,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
6631 spin_unlock(&root->fs_info->delalloc_lock); 6691 spin_unlock(&root->fs_info->delalloc_lock);
6632 6692
6633 if (inode) { 6693 if (inode) {
6634 write_inode_now(inode, 0); 6694 if (sync) {
6695 filemap_write_and_wait(inode->i_mapping);
6696 /*
6697 * We have to do this because compression doesn't
6698 * actually set PG_writeback until it submits the pages
6699 * for IO, which happens in an async thread, so we could
6700 * race and not actually wait for any writeback pages
6701 * because they've not been submitted yet. Technically
6702 * this could still be the case for the ordered stuff
6703 * since the async thread may not have started to do its
6704 * work yet. If this becomes the case then we need to
6705 * figure out a way to make sure that in writepage we
6706 * wait for any async pages to be submitted before
6707 * returning so that fdatawait does what its supposed to
6708 * do.
6709 */
6710 btrfs_wait_ordered_range(inode, 0, (u64)-1);
6711 } else {
6712 filemap_flush(inode->i_mapping);
6713 }
6635 if (delay_iput) 6714 if (delay_iput)
6636 btrfs_add_delayed_iput(inode); 6715 btrfs_add_delayed_iput(inode);
6637 else 6716 else
@@ -6757,27 +6836,33 @@ out_unlock:
6757 return err; 6836 return err;
6758} 6837}
6759 6838
6760int btrfs_prealloc_file_range(struct inode *inode, int mode, 6839static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
6761 u64 start, u64 num_bytes, u64 min_size, 6840 u64 start, u64 num_bytes, u64 min_size,
6762 loff_t actual_len, u64 *alloc_hint) 6841 loff_t actual_len, u64 *alloc_hint,
6842 struct btrfs_trans_handle *trans)
6763{ 6843{
6764 struct btrfs_trans_handle *trans;
6765 struct btrfs_root *root = BTRFS_I(inode)->root; 6844 struct btrfs_root *root = BTRFS_I(inode)->root;
6766 struct btrfs_key ins; 6845 struct btrfs_key ins;
6767 u64 cur_offset = start; 6846 u64 cur_offset = start;
6768 int ret = 0; 6847 int ret = 0;
6848 bool own_trans = true;
6769 6849
6850 if (trans)
6851 own_trans = false;
6770 while (num_bytes > 0) { 6852 while (num_bytes > 0) {
6771 trans = btrfs_start_transaction(root, 3); 6853 if (own_trans) {
6772 if (IS_ERR(trans)) { 6854 trans = btrfs_start_transaction(root, 3);
6773 ret = PTR_ERR(trans); 6855 if (IS_ERR(trans)) {
6774 break; 6856 ret = PTR_ERR(trans);
6857 break;
6858 }
6775 } 6859 }
6776 6860
6777 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 6861 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
6778 0, *alloc_hint, (u64)-1, &ins, 1); 6862 0, *alloc_hint, (u64)-1, &ins, 1);
6779 if (ret) { 6863 if (ret) {
6780 btrfs_end_transaction(trans, root); 6864 if (own_trans)
6865 btrfs_end_transaction(trans, root);
6781 break; 6866 break;
6782 } 6867 }
6783 6868
@@ -6810,11 +6895,30 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
6810 ret = btrfs_update_inode(trans, root, inode); 6895 ret = btrfs_update_inode(trans, root, inode);
6811 BUG_ON(ret); 6896 BUG_ON(ret);
6812 6897
6813 btrfs_end_transaction(trans, root); 6898 if (own_trans)
6899 btrfs_end_transaction(trans, root);
6814 } 6900 }
6815 return ret; 6901 return ret;
6816} 6902}
6817 6903
6904int btrfs_prealloc_file_range(struct inode *inode, int mode,
6905 u64 start, u64 num_bytes, u64 min_size,
6906 loff_t actual_len, u64 *alloc_hint)
6907{
6908 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
6909 min_size, actual_len, alloc_hint,
6910 NULL);
6911}
6912
6913int btrfs_prealloc_file_range_trans(struct inode *inode,
6914 struct btrfs_trans_handle *trans, int mode,
6915 u64 start, u64 num_bytes, u64 min_size,
6916 loff_t actual_len, u64 *alloc_hint)
6917{
6918 return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
6919 min_size, actual_len, alloc_hint, trans);
6920}
6921
6818static long btrfs_fallocate(struct inode *inode, int mode, 6922static long btrfs_fallocate(struct inode *inode, int mode,
6819 loff_t offset, loff_t len) 6923 loff_t offset, loff_t len)
6820{ 6924{
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9254b3d58dbe..463d91b4dd3a 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
224 224
225static noinline int create_subvol(struct btrfs_root *root, 225static noinline int create_subvol(struct btrfs_root *root,
226 struct dentry *dentry, 226 struct dentry *dentry,
227 char *name, int namelen) 227 char *name, int namelen,
228 u64 *async_transid)
228{ 229{
229 struct btrfs_trans_handle *trans; 230 struct btrfs_trans_handle *trans;
230 struct btrfs_key key; 231 struct btrfs_key key;
@@ -338,13 +339,19 @@ static noinline int create_subvol(struct btrfs_root *root,
338 339
339 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 340 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
340fail: 341fail:
341 err = btrfs_commit_transaction(trans, root); 342 if (async_transid) {
343 *async_transid = trans->transid;
344 err = btrfs_commit_transaction_async(trans, root, 1);
345 } else {
346 err = btrfs_commit_transaction(trans, root);
347 }
342 if (err && !ret) 348 if (err && !ret)
343 ret = err; 349 ret = err;
344 return ret; 350 return ret;
345} 351}
346 352
347static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) 353static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
354 char *name, int namelen, u64 *async_transid)
348{ 355{
349 struct inode *inode; 356 struct inode *inode;
350 struct btrfs_pending_snapshot *pending_snapshot; 357 struct btrfs_pending_snapshot *pending_snapshot;
@@ -373,7 +380,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
373 380
374 list_add(&pending_snapshot->list, 381 list_add(&pending_snapshot->list,
375 &trans->transaction->pending_snapshots); 382 &trans->transaction->pending_snapshots);
376 ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); 383 if (async_transid) {
384 *async_transid = trans->transid;
385 ret = btrfs_commit_transaction_async(trans,
386 root->fs_info->extent_root, 1);
387 } else {
388 ret = btrfs_commit_transaction(trans,
389 root->fs_info->extent_root);
390 }
377 BUG_ON(ret); 391 BUG_ON(ret);
378 392
379 ret = pending_snapshot->error; 393 ret = pending_snapshot->error;
@@ -395,6 +409,76 @@ fail:
395 return ret; 409 return ret;
396} 410}
397 411
412/* copy of check_sticky in fs/namei.c()
413* It's inline, so penalty for filesystems that don't use sticky bit is
414* minimal.
415*/
416static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
417{
418 uid_t fsuid = current_fsuid();
419
420 if (!(dir->i_mode & S_ISVTX))
421 return 0;
422 if (inode->i_uid == fsuid)
423 return 0;
424 if (dir->i_uid == fsuid)
425 return 0;
426 return !capable(CAP_FOWNER);
427}
428
429/* copy of may_delete in fs/namei.c()
430 * Check whether we can remove a link victim from directory dir, check
431 * whether the type of victim is right.
432 * 1. We can't do it if dir is read-only (done in permission())
433 * 2. We should have write and exec permissions on dir
434 * 3. We can't remove anything from append-only dir
435 * 4. We can't do anything with immutable dir (done in permission())
436 * 5. If the sticky bit on dir is set we should either
437 * a. be owner of dir, or
438 * b. be owner of victim, or
439 * c. have CAP_FOWNER capability
440 * 6. If the victim is append-only or immutable we can't do antyhing with
441 * links pointing to it.
442 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
443 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
444 * 9. We can't remove a root or mountpoint.
445 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
446 * nfs_async_unlink().
447 */
448
449static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
450{
451 int error;
452
453 if (!victim->d_inode)
454 return -ENOENT;
455
456 BUG_ON(victim->d_parent->d_inode != dir);
457 audit_inode_child(victim, dir);
458
459 error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
460 if (error)
461 return error;
462 if (IS_APPEND(dir))
463 return -EPERM;
464 if (btrfs_check_sticky(dir, victim->d_inode)||
465 IS_APPEND(victim->d_inode)||
466 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
467 return -EPERM;
468 if (isdir) {
469 if (!S_ISDIR(victim->d_inode->i_mode))
470 return -ENOTDIR;
471 if (IS_ROOT(victim))
472 return -EBUSY;
473 } else if (S_ISDIR(victim->d_inode->i_mode))
474 return -EISDIR;
475 if (IS_DEADDIR(dir))
476 return -ENOENT;
477 if (victim->d_flags & DCACHE_NFSFS_RENAMED)
478 return -EBUSY;
479 return 0;
480}
481
398/* copy of may_create in fs/namei.c() */ 482/* copy of may_create in fs/namei.c() */
399static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 483static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
400{ 484{
@@ -412,7 +496,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
412 */ 496 */
413static noinline int btrfs_mksubvol(struct path *parent, 497static noinline int btrfs_mksubvol(struct path *parent,
414 char *name, int namelen, 498 char *name, int namelen,
415 struct btrfs_root *snap_src) 499 struct btrfs_root *snap_src,
500 u64 *async_transid)
416{ 501{
417 struct inode *dir = parent->dentry->d_inode; 502 struct inode *dir = parent->dentry->d_inode;
418 struct dentry *dentry; 503 struct dentry *dentry;
@@ -443,10 +528,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
443 goto out_up_read; 528 goto out_up_read;
444 529
445 if (snap_src) { 530 if (snap_src) {
446 error = create_snapshot(snap_src, dentry); 531 error = create_snapshot(snap_src, dentry,
532 name, namelen, async_transid);
447 } else { 533 } else {
448 error = create_subvol(BTRFS_I(dir)->root, dentry, 534 error = create_subvol(BTRFS_I(dir)->root, dentry,
449 name, namelen); 535 name, namelen, async_transid);
450 } 536 }
451 if (!error) 537 if (!error)
452 fsnotify_mkdir(dir, dentry); 538 fsnotify_mkdir(dir, dentry);
@@ -708,7 +794,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
708 char *sizestr; 794 char *sizestr;
709 char *devstr = NULL; 795 char *devstr = NULL;
710 int ret = 0; 796 int ret = 0;
711 int namelen;
712 int mod = 0; 797 int mod = 0;
713 798
714 if (root->fs_info->sb->s_flags & MS_RDONLY) 799 if (root->fs_info->sb->s_flags & MS_RDONLY)
@@ -722,7 +807,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
722 return PTR_ERR(vol_args); 807 return PTR_ERR(vol_args);
723 808
724 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 809 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
725 namelen = strlen(vol_args->name);
726 810
727 mutex_lock(&root->fs_info->volume_mutex); 811 mutex_lock(&root->fs_info->volume_mutex);
728 sizestr = vol_args->name; 812 sizestr = vol_args->name;
@@ -801,11 +885,13 @@ out_unlock:
801 return ret; 885 return ret;
802} 886}
803 887
804static noinline int btrfs_ioctl_snap_create(struct file *file, 888static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
805 void __user *arg, int subvol) 889 char *name,
890 unsigned long fd,
891 int subvol,
892 u64 *transid)
806{ 893{
807 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 894 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
808 struct btrfs_ioctl_vol_args *vol_args;
809 struct file *src_file; 895 struct file *src_file;
810 int namelen; 896 int namelen;
811 int ret = 0; 897 int ret = 0;
@@ -813,23 +899,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
813 if (root->fs_info->sb->s_flags & MS_RDONLY) 899 if (root->fs_info->sb->s_flags & MS_RDONLY)
814 return -EROFS; 900 return -EROFS;
815 901
816 vol_args = memdup_user(arg, sizeof(*vol_args)); 902 namelen = strlen(name);
817 if (IS_ERR(vol_args)) 903 if (strchr(name, '/')) {
818 return PTR_ERR(vol_args);
819
820 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
821 namelen = strlen(vol_args->name);
822 if (strchr(vol_args->name, '/')) {
823 ret = -EINVAL; 904 ret = -EINVAL;
824 goto out; 905 goto out;
825 } 906 }
826 907
827 if (subvol) { 908 if (subvol) {
828 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, 909 ret = btrfs_mksubvol(&file->f_path, name, namelen,
829 NULL); 910 NULL, transid);
830 } else { 911 } else {
831 struct inode *src_inode; 912 struct inode *src_inode;
832 src_file = fget(vol_args->fd); 913 src_file = fget(fd);
833 if (!src_file) { 914 if (!src_file) {
834 ret = -EINVAL; 915 ret = -EINVAL;
835 goto out; 916 goto out;
@@ -843,12 +924,56 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
843 fput(src_file); 924 fput(src_file);
844 goto out; 925 goto out;
845 } 926 }
846 ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen, 927 ret = btrfs_mksubvol(&file->f_path, name, namelen,
847 BTRFS_I(src_inode)->root); 928 BTRFS_I(src_inode)->root,
929 transid);
848 fput(src_file); 930 fput(src_file);
849 } 931 }
850out: 932out:
933 return ret;
934}
935
936static noinline int btrfs_ioctl_snap_create(struct file *file,
937 void __user *arg, int subvol,
938 int async)
939{
940 struct btrfs_ioctl_vol_args *vol_args = NULL;
941 struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
942 char *name;
943 u64 fd;
944 u64 transid = 0;
945 int ret;
946
947 if (async) {
948 async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
949 if (IS_ERR(async_vol_args))
950 return PTR_ERR(async_vol_args);
951
952 name = async_vol_args->name;
953 fd = async_vol_args->fd;
954 async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
955 } else {
956 vol_args = memdup_user(arg, sizeof(*vol_args));
957 if (IS_ERR(vol_args))
958 return PTR_ERR(vol_args);
959 name = vol_args->name;
960 fd = vol_args->fd;
961 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
962 }
963
964 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
965 subvol, &transid);
966
967 if (!ret && async) {
968 if (copy_to_user(arg +
969 offsetof(struct btrfs_ioctl_async_vol_args,
970 transid), &transid, sizeof(transid)))
971 return -EFAULT;
972 }
973
851 kfree(vol_args); 974 kfree(vol_args);
975 kfree(async_vol_args);
976
852 return ret; 977 return ret;
853} 978}
854 979
@@ -1073,14 +1198,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
1073 if (!capable(CAP_SYS_ADMIN)) 1198 if (!capable(CAP_SYS_ADMIN))
1074 return -EPERM; 1199 return -EPERM;
1075 1200
1076 args = kmalloc(sizeof(*args), GFP_KERNEL); 1201 args = memdup_user(argp, sizeof(*args));
1077 if (!args) 1202 if (IS_ERR(args))
1078 return -ENOMEM; 1203 return PTR_ERR(args);
1079 1204
1080 if (copy_from_user(args, argp, sizeof(*args))) {
1081 kfree(args);
1082 return -EFAULT;
1083 }
1084 inode = fdentry(file)->d_inode; 1205 inode = fdentry(file)->d_inode;
1085 ret = search_ioctl(inode, args); 1206 ret = search_ioctl(inode, args);
1086 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1207 if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@@ -1188,14 +1309,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
1188 if (!capable(CAP_SYS_ADMIN)) 1309 if (!capable(CAP_SYS_ADMIN))
1189 return -EPERM; 1310 return -EPERM;
1190 1311
1191 args = kmalloc(sizeof(*args), GFP_KERNEL); 1312 args = memdup_user(argp, sizeof(*args));
1192 if (!args) 1313 if (IS_ERR(args))
1193 return -ENOMEM; 1314 return PTR_ERR(args);
1194 1315
1195 if (copy_from_user(args, argp, sizeof(*args))) {
1196 kfree(args);
1197 return -EFAULT;
1198 }
1199 inode = fdentry(file)->d_inode; 1316 inode = fdentry(file)->d_inode;
1200 1317
1201 if (args->treeid == 0) 1318 if (args->treeid == 0)
@@ -1227,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1227 int ret; 1344 int ret;
1228 int err = 0; 1345 int err = 0;
1229 1346
1230 if (!capable(CAP_SYS_ADMIN))
1231 return -EPERM;
1232
1233 vol_args = memdup_user(arg, sizeof(*vol_args)); 1347 vol_args = memdup_user(arg, sizeof(*vol_args));
1234 if (IS_ERR(vol_args)) 1348 if (IS_ERR(vol_args))
1235 return PTR_ERR(vol_args); 1349 return PTR_ERR(vol_args);
@@ -1259,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1259 } 1373 }
1260 1374
1261 inode = dentry->d_inode; 1375 inode = dentry->d_inode;
1376 dest = BTRFS_I(inode)->root;
1377 if (!capable(CAP_SYS_ADMIN)){
1378 /*
1379 * Regular user. Only allow this with a special mount
1380 * option, when the user has write+exec access to the
1381 * subvol root, and when rmdir(2) would have been
1382 * allowed.
1383 *
1384 * Note that this is _not_ check that the subvol is
1385 * empty or doesn't contain data that we wouldn't
1386 * otherwise be able to delete.
1387 *
1388 * Users who want to delete empty subvols should try
1389 * rmdir(2).
1390 */
1391 err = -EPERM;
1392 if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
1393 goto out_dput;
1394
1395 /*
1396 * Do not allow deletion if the parent dir is the same
1397 * as the dir to be deleted. That means the ioctl
1398 * must be called on the dentry referencing the root
1399 * of the subvol, not a random directory contained
1400 * within it.
1401 */
1402 err = -EINVAL;
1403 if (root == dest)
1404 goto out_dput;
1405
1406 err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
1407 if (err)
1408 goto out_dput;
1409
1410 /* check if subvolume may be deleted by a non-root user */
1411 err = btrfs_may_delete(dir, dentry, 1);
1412 if (err)
1413 goto out_dput;
1414 }
1415
1262 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { 1416 if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
1263 err = -EINVAL; 1417 err = -EINVAL;
1264 goto out_dput; 1418 goto out_dput;
1265 } 1419 }
1266 1420
1267 dest = BTRFS_I(inode)->root;
1268
1269 mutex_lock(&inode->i_mutex); 1421 mutex_lock(&inode->i_mutex);
1270 err = d_invalidate(dentry); 1422 err = d_invalidate(dentry);
1271 if (err) 1423 if (err)
@@ -1304,7 +1456,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
1304 BUG_ON(ret); 1456 BUG_ON(ret);
1305 } 1457 }
1306 1458
1307 ret = btrfs_commit_transaction(trans, root); 1459 ret = btrfs_end_transaction(trans, root);
1308 BUG_ON(ret); 1460 BUG_ON(ret);
1309 inode->i_flags |= S_DEAD; 1461 inode->i_flags |= S_DEAD;
1310out_up_write: 1462out_up_write:
@@ -1502,11 +1654,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1502 path->reada = 2; 1654 path->reada = 2;
1503 1655
1504 if (inode < src) { 1656 if (inode < src) {
1505 mutex_lock(&inode->i_mutex); 1657 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
1506 mutex_lock(&src->i_mutex); 1658 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
1507 } else { 1659 } else {
1508 mutex_lock(&src->i_mutex); 1660 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
1509 mutex_lock(&inode->i_mutex); 1661 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
1510 } 1662 }
1511 1663
1512 /* determine range to clone */ 1664 /* determine range to clone */
@@ -1530,13 +1682,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1530 while (1) { 1682 while (1) {
1531 struct btrfs_ordered_extent *ordered; 1683 struct btrfs_ordered_extent *ordered;
1532 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1684 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1533 ordered = btrfs_lookup_first_ordered_extent(inode, off+len); 1685 ordered = btrfs_lookup_first_ordered_extent(src, off+len);
1534 if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered) 1686 if (!ordered &&
1687 !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
1688 EXTENT_DELALLOC, 0, NULL))
1535 break; 1689 break;
1536 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); 1690 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
1537 if (ordered) 1691 if (ordered)
1538 btrfs_put_ordered_extent(ordered); 1692 btrfs_put_ordered_extent(ordered);
1539 btrfs_wait_ordered_range(src, off, off+len); 1693 btrfs_wait_ordered_range(src, off, len);
1540 } 1694 }
1541 1695
1542 /* clone data */ 1696 /* clone data */
@@ -1605,7 +1759,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
1605 } 1759 }
1606 btrfs_release_path(root, path); 1760 btrfs_release_path(root, path);
1607 1761
1608 if (key.offset + datal < off || 1762 if (key.offset + datal <= off ||
1609 key.offset >= off+len) 1763 key.offset >= off+len)
1610 goto next; 1764 goto next;
1611 1765
@@ -1879,6 +2033,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
1879 return 0; 2033 return 0;
1880} 2034}
1881 2035
2036static void get_block_group_info(struct list_head *groups_list,
2037 struct btrfs_ioctl_space_info *space)
2038{
2039 struct btrfs_block_group_cache *block_group;
2040
2041 space->total_bytes = 0;
2042 space->used_bytes = 0;
2043 space->flags = 0;
2044 list_for_each_entry(block_group, groups_list, list) {
2045 space->flags = block_group->flags;
2046 space->total_bytes += block_group->key.offset;
2047 space->used_bytes +=
2048 btrfs_block_group_used(&block_group->item);
2049 }
2050}
2051
1882long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 2052long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1883{ 2053{
1884 struct btrfs_ioctl_space_args space_args; 2054 struct btrfs_ioctl_space_args space_args;
@@ -1887,27 +2057,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1887 struct btrfs_ioctl_space_info *dest_orig; 2057 struct btrfs_ioctl_space_info *dest_orig;
1888 struct btrfs_ioctl_space_info *user_dest; 2058 struct btrfs_ioctl_space_info *user_dest;
1889 struct btrfs_space_info *info; 2059 struct btrfs_space_info *info;
2060 u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
2061 BTRFS_BLOCK_GROUP_SYSTEM,
2062 BTRFS_BLOCK_GROUP_METADATA,
2063 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
2064 int num_types = 4;
1890 int alloc_size; 2065 int alloc_size;
1891 int ret = 0; 2066 int ret = 0;
1892 int slot_count = 0; 2067 int slot_count = 0;
2068 int i, c;
1893 2069
1894 if (copy_from_user(&space_args, 2070 if (copy_from_user(&space_args,
1895 (struct btrfs_ioctl_space_args __user *)arg, 2071 (struct btrfs_ioctl_space_args __user *)arg,
1896 sizeof(space_args))) 2072 sizeof(space_args)))
1897 return -EFAULT; 2073 return -EFAULT;
1898 2074
1899 /* first we count slots */ 2075 for (i = 0; i < num_types; i++) {
1900 rcu_read_lock(); 2076 struct btrfs_space_info *tmp;
1901 list_for_each_entry_rcu(info, &root->fs_info->space_info, list) 2077
1902 slot_count++; 2078 info = NULL;
1903 rcu_read_unlock(); 2079 rcu_read_lock();
2080 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
2081 list) {
2082 if (tmp->flags == types[i]) {
2083 info = tmp;
2084 break;
2085 }
2086 }
2087 rcu_read_unlock();
2088
2089 if (!info)
2090 continue;
2091
2092 down_read(&info->groups_sem);
2093 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
2094 if (!list_empty(&info->block_groups[c]))
2095 slot_count++;
2096 }
2097 up_read(&info->groups_sem);
2098 }
1904 2099
1905 /* space_slots == 0 means they are asking for a count */ 2100 /* space_slots == 0 means they are asking for a count */
1906 if (space_args.space_slots == 0) { 2101 if (space_args.space_slots == 0) {
1907 space_args.total_spaces = slot_count; 2102 space_args.total_spaces = slot_count;
1908 goto out; 2103 goto out;
1909 } 2104 }
2105
2106 slot_count = min_t(int, space_args.space_slots, slot_count);
2107
1910 alloc_size = sizeof(*dest) * slot_count; 2108 alloc_size = sizeof(*dest) * slot_count;
2109
1911 /* we generally have at most 6 or so space infos, one for each raid 2110 /* we generally have at most 6 or so space infos, one for each raid
1912 * level. So, a whole page should be more than enough for everyone 2111 * level. So, a whole page should be more than enough for everyone
1913 */ 2112 */
@@ -1921,27 +2120,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1921 dest_orig = dest; 2120 dest_orig = dest;
1922 2121
1923 /* now we have a buffer to copy into */ 2122 /* now we have a buffer to copy into */
1924 rcu_read_lock(); 2123 for (i = 0; i < num_types; i++) {
1925 list_for_each_entry_rcu(info, &root->fs_info->space_info, list) { 2124 struct btrfs_space_info *tmp;
1926 /* make sure we don't copy more than we allocated 2125
1927 * in our buffer 2126 info = NULL;
1928 */ 2127 rcu_read_lock();
1929 if (slot_count == 0) 2128 list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
1930 break; 2129 list) {
1931 slot_count--; 2130 if (tmp->flags == types[i]) {
1932 2131 info = tmp;
1933 /* make sure userland has enough room in their buffer */ 2132 break;
1934 if (space_args.total_spaces >= space_args.space_slots) 2133 }
1935 break; 2134 }
2135 rcu_read_unlock();
1936 2136
1937 space.flags = info->flags; 2137 if (!info)
1938 space.total_bytes = info->total_bytes; 2138 continue;
1939 space.used_bytes = info->bytes_used; 2139 down_read(&info->groups_sem);
1940 memcpy(dest, &space, sizeof(space)); 2140 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
1941 dest++; 2141 if (!list_empty(&info->block_groups[c])) {
1942 space_args.total_spaces++; 2142 get_block_group_info(&info->block_groups[c],
2143 &space);
2144 memcpy(dest, &space, sizeof(space));
2145 dest++;
2146 space_args.total_spaces++;
2147 }
2148 }
2149 up_read(&info->groups_sem);
1943 } 2150 }
1944 rcu_read_unlock();
1945 2151
1946 user_dest = (struct btrfs_ioctl_space_info *) 2152 user_dest = (struct btrfs_ioctl_space_info *)
1947 (arg + sizeof(struct btrfs_ioctl_space_args)); 2153 (arg + sizeof(struct btrfs_ioctl_space_args));
@@ -1984,6 +2190,36 @@ long btrfs_ioctl_trans_end(struct file *file)
1984 return 0; 2190 return 0;
1985} 2191}
1986 2192
2193static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
2194{
2195 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
2196 struct btrfs_trans_handle *trans;
2197 u64 transid;
2198
2199 trans = btrfs_start_transaction(root, 0);
2200 transid = trans->transid;
2201 btrfs_commit_transaction_async(trans, root, 0);
2202
2203 if (argp)
2204 if (copy_to_user(argp, &transid, sizeof(transid)))
2205 return -EFAULT;
2206 return 0;
2207}
2208
2209static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
2210{
2211 struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
2212 u64 transid;
2213
2214 if (argp) {
2215 if (copy_from_user(&transid, argp, sizeof(transid)))
2216 return -EFAULT;
2217 } else {
2218 transid = 0; /* current trans */
2219 }
2220 return btrfs_wait_for_commit(root, transid);
2221}
2222
1987long btrfs_ioctl(struct file *file, unsigned int 2223long btrfs_ioctl(struct file *file, unsigned int
1988 cmd, unsigned long arg) 2224 cmd, unsigned long arg)
1989{ 2225{
@@ -1998,9 +2234,11 @@ long btrfs_ioctl(struct file *file, unsigned int
1998 case FS_IOC_GETVERSION: 2234 case FS_IOC_GETVERSION:
1999 return btrfs_ioctl_getversion(file, argp); 2235 return btrfs_ioctl_getversion(file, argp);
2000 case BTRFS_IOC_SNAP_CREATE: 2236 case BTRFS_IOC_SNAP_CREATE:
2001 return btrfs_ioctl_snap_create(file, argp, 0); 2237 return btrfs_ioctl_snap_create(file, argp, 0, 0);
2238 case BTRFS_IOC_SNAP_CREATE_ASYNC:
2239 return btrfs_ioctl_snap_create(file, argp, 0, 1);
2002 case BTRFS_IOC_SUBVOL_CREATE: 2240 case BTRFS_IOC_SUBVOL_CREATE:
2003 return btrfs_ioctl_snap_create(file, argp, 1); 2241 return btrfs_ioctl_snap_create(file, argp, 1, 0);
2004 case BTRFS_IOC_SNAP_DESTROY: 2242 case BTRFS_IOC_SNAP_DESTROY:
2005 return btrfs_ioctl_snap_destroy(file, argp); 2243 return btrfs_ioctl_snap_destroy(file, argp);
2006 case BTRFS_IOC_DEFAULT_SUBVOL: 2244 case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2034,6 +2272,10 @@ long btrfs_ioctl(struct file *file, unsigned int
2034 case BTRFS_IOC_SYNC: 2272 case BTRFS_IOC_SYNC:
2035 btrfs_sync_fs(file->f_dentry->d_sb, 1); 2273 btrfs_sync_fs(file->f_dentry->d_sb, 1);
2036 return 0; 2274 return 0;
2275 case BTRFS_IOC_START_SYNC:
2276 return btrfs_ioctl_start_sync(file, argp);
2277 case BTRFS_IOC_WAIT_SYNC:
2278 return btrfs_ioctl_wait_sync(file, argp);
2037 } 2279 }
2038 2280
2039 return -ENOTTY; 2281 return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 424694aa517f..17c99ebdf960 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,14 +22,21 @@
22 22
23#define BTRFS_IOCTL_MAGIC 0x94 23#define BTRFS_IOCTL_MAGIC 0x94
24#define BTRFS_VOL_NAME_MAX 255 24#define BTRFS_VOL_NAME_MAX 255
25#define BTRFS_PATH_NAME_MAX 4087
26 25
27/* this should be 4k */ 26/* this should be 4k */
27#define BTRFS_PATH_NAME_MAX 4087
28struct btrfs_ioctl_vol_args { 28struct btrfs_ioctl_vol_args {
29 __s64 fd; 29 __s64 fd;
30 char name[BTRFS_PATH_NAME_MAX + 1]; 30 char name[BTRFS_PATH_NAME_MAX + 1];
31}; 31};
32 32
33#define BTRFS_SNAPSHOT_NAME_MAX 4079
34struct btrfs_ioctl_async_vol_args {
35 __s64 fd;
36 __u64 transid;
37 char name[BTRFS_SNAPSHOT_NAME_MAX + 1];
38};
39
33#define BTRFS_INO_LOOKUP_PATH_MAX 4080 40#define BTRFS_INO_LOOKUP_PATH_MAX 4080
34struct btrfs_ioctl_ino_lookup_args { 41struct btrfs_ioctl_ino_lookup_args {
35 __u64 treeid; 42 __u64 treeid;
@@ -178,4 +185,8 @@ struct btrfs_ioctl_space_args {
178#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64) 185#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
179#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \ 186#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
180 struct btrfs_ioctl_space_args) 187 struct btrfs_ioctl_space_args)
188#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
189#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
190#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
191 struct btrfs_ioctl_async_vol_args)
181#endif 192#endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e56c72bc5add..f4621f6deca1 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -526,7 +526,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
526{ 526{
527 u64 end; 527 u64 end;
528 u64 orig_end; 528 u64 orig_end;
529 u64 wait_end;
530 struct btrfs_ordered_extent *ordered; 529 struct btrfs_ordered_extent *ordered;
531 int found; 530 int found;
532 531
@@ -537,7 +536,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
537 if (orig_end > INT_LIMIT(loff_t)) 536 if (orig_end > INT_LIMIT(loff_t))
538 orig_end = INT_LIMIT(loff_t); 537 orig_end = INT_LIMIT(loff_t);
539 } 538 }
540 wait_end = orig_end;
541again: 539again:
542 /* start IO across the range first to instantiate any delalloc 540 /* start IO across the range first to instantiate any delalloc
543 * extents 541 * extents
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b37d723b9d4a..045c9c2b2d7e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
29#include "locking.h" 29#include "locking.h"
30#include "btrfs_inode.h" 30#include "btrfs_inode.h"
31#include "async-thread.h" 31#include "async-thread.h"
32#include "free-space-cache.h"
32 33
33/* 34/*
34 * backref_node, mapping_node and tree_block start with this 35 * backref_node, mapping_node and tree_block start with this
@@ -178,8 +179,6 @@ struct reloc_control {
178 u64 search_start; 179 u64 search_start;
179 u64 extents_found; 180 u64 extents_found;
180 181
181 int block_rsv_retries;
182
183 unsigned int stage:8; 182 unsigned int stage:8;
184 unsigned int create_reloc_tree:1; 183 unsigned int create_reloc_tree:1;
185 unsigned int merge_reloc_tree:1; 184 unsigned int merge_reloc_tree:1;
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
2133 LIST_HEAD(reloc_roots); 2132 LIST_HEAD(reloc_roots);
2134 u64 num_bytes = 0; 2133 u64 num_bytes = 0;
2135 int ret; 2134 int ret;
2136 int retries = 0;
2137 2135
2138 mutex_lock(&root->fs_info->trans_mutex); 2136 mutex_lock(&root->fs_info->trans_mutex);
2139 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2137 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@ -2143,7 +2141,7 @@ again:
2143 if (!err) { 2141 if (!err) {
2144 num_bytes = rc->merging_rsv_size; 2142 num_bytes = rc->merging_rsv_size;
2145 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, 2143 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
2146 num_bytes, &retries); 2144 num_bytes);
2147 if (ret) 2145 if (ret)
2148 err = ret; 2146 err = ret;
2149 } 2147 }
@@ -2155,7 +2153,6 @@ again:
2155 btrfs_end_transaction(trans, rc->extent_root); 2153 btrfs_end_transaction(trans, rc->extent_root);
2156 btrfs_block_rsv_release(rc->extent_root, 2154 btrfs_block_rsv_release(rc->extent_root,
2157 rc->block_rsv, num_bytes); 2155 rc->block_rsv, num_bytes);
2158 retries = 0;
2159 goto again; 2156 goto again;
2160 } 2157 }
2161 } 2158 }
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
2405 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2402 num_bytes = calcu_metadata_size(rc, node, 1) * 2;
2406 2403
2407 trans->block_rsv = rc->block_rsv; 2404 trans->block_rsv = rc->block_rsv;
2408 ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, 2405 ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
2409 &rc->block_rsv_retries);
2410 if (ret) { 2406 if (ret) {
2411 if (ret == -EAGAIN) 2407 if (ret == -EAGAIN)
2412 rc->commit_transaction = 1; 2408 rc->commit_transaction = 1;
2413 return ret; 2409 return ret;
2414 } 2410 }
2415 2411
2416 rc->block_rsv_retries = 0;
2417 return 0; 2412 return 0;
2418} 2413}
2419 2414
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
3099 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3094 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
3100 ret = get_ref_objectid_v0(rc, path, extent_key, 3095 ret = get_ref_objectid_v0(rc, path, extent_key,
3101 &ref_owner, NULL); 3096 &ref_owner, NULL);
3097 if (ret < 0)
3098 return ret;
3102 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3099 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
3103 level = (int)ref_owner; 3100 level = (int)ref_owner;
3104 /* FIXME: get real generation */ 3101 /* FIXME: get real generation */
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
3191 return ret; 3188 return ret;
3192} 3189}
3193 3190
3191static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
3192 struct inode *inode, u64 ino)
3193{
3194 struct btrfs_key key;
3195 struct btrfs_path *path;
3196 struct btrfs_root *root = fs_info->tree_root;
3197 struct btrfs_trans_handle *trans;
3198 unsigned long nr;
3199 int ret = 0;
3200
3201 if (inode)
3202 goto truncate;
3203
3204 key.objectid = ino;
3205 key.type = BTRFS_INODE_ITEM_KEY;
3206 key.offset = 0;
3207
3208 inode = btrfs_iget(fs_info->sb, &key, root, NULL);
3209 if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
3210 if (inode && !IS_ERR(inode))
3211 iput(inode);
3212 return -ENOENT;
3213 }
3214
3215truncate:
3216 path = btrfs_alloc_path();
3217 if (!path) {
3218 ret = -ENOMEM;
3219 goto out;
3220 }
3221
3222 trans = btrfs_join_transaction(root, 0);
3223 if (IS_ERR(trans)) {
3224 btrfs_free_path(path);
3225 goto out;
3226 }
3227
3228 ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
3229
3230 btrfs_free_path(path);
3231 nr = trans->blocks_used;
3232 btrfs_end_transaction(trans, root);
3233 btrfs_btree_balance_dirty(root, nr);
3234out:
3235 iput(inode);
3236 return ret;
3237}
3238
3194/* 3239/*
3195 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3240 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
3196 * this function scans fs tree to find blocks reference the data extent 3241 * this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
3217 int counted; 3262 int counted;
3218 int ret; 3263 int ret;
3219 3264
3220 path = btrfs_alloc_path();
3221 if (!path)
3222 return -ENOMEM;
3223
3224 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3265 ref_root = btrfs_extent_data_ref_root(leaf, ref);
3225 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3266 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
3226 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3267 ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
3227 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3268 ref_count = btrfs_extent_data_ref_count(leaf, ref);
3228 3269
3270 /*
3271 * This is an extent belonging to the free space cache, lets just delete
3272 * it and redo the search.
3273 */
3274 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
3275 ret = delete_block_group_cache(rc->extent_root->fs_info,
3276 NULL, ref_objectid);
3277 if (ret != -ENOENT)
3278 return ret;
3279 ret = 0;
3280 }
3281
3282 path = btrfs_alloc_path();
3283 if (!path)
3284 return -ENOMEM;
3285
3229 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3286 root = read_fs_root(rc->extent_root->fs_info, ref_root);
3230 if (IS_ERR(root)) { 3287 if (IS_ERR(root)) {
3231 err = PTR_ERR(root); 3288 err = PTR_ERR(root);
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
3554 * is no reservation in transaction handle. 3611 * is no reservation in transaction handle.
3555 */ 3612 */
3556 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, 3613 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
3557 rc->extent_root->nodesize * 256, 3614 rc->extent_root->nodesize * 256);
3558 &rc->block_rsv_retries);
3559 if (ret) 3615 if (ret)
3560 return ret; 3616 return ret;
3561 3617
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
3567 rc->extents_found = 0; 3623 rc->extents_found = 0;
3568 rc->nodes_relocated = 0; 3624 rc->nodes_relocated = 0;
3569 rc->merging_rsv_size = 0; 3625 rc->merging_rsv_size = 0;
3570 rc->block_rsv_retries = 0;
3571 3626
3572 rc->create_reloc_tree = 1; 3627 rc->create_reloc_tree = 1;
3573 set_reloc_control(rc); 3628 set_reloc_control(rc);
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3860{ 3915{
3861 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3916 struct btrfs_fs_info *fs_info = extent_root->fs_info;
3862 struct reloc_control *rc; 3917 struct reloc_control *rc;
3918 struct inode *inode;
3919 struct btrfs_path *path;
3863 int ret; 3920 int ret;
3864 int rw = 0; 3921 int rw = 0;
3865 int err = 0; 3922 int err = 0;
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
3882 rw = 1; 3939 rw = 1;
3883 } 3940 }
3884 3941
3942 path = btrfs_alloc_path();
3943 if (!path) {
3944 err = -ENOMEM;
3945 goto out;
3946 }
3947
3948 inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
3949 path);
3950 btrfs_free_path(path);
3951
3952 if (!IS_ERR(inode))
3953 ret = delete_block_group_cache(fs_info, inode, 0);
3954 else
3955 ret = PTR_ERR(inode);
3956
3957 if (ret && ret != -ENOENT) {
3958 err = ret;
3959 goto out;
3960 }
3961
3885 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3962 rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
3886 if (IS_ERR(rc->data_inode)) { 3963 if (IS_ERR(rc->data_inode)) {
3887 err = PTR_ERR(rc->data_inode); 3964 err = PTR_ERR(rc->data_inode);
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
4143 btrfs_add_ordered_sum(inode, ordered, sums); 4220 btrfs_add_ordered_sum(inode, ordered, sums);
4144 } 4221 }
4145 btrfs_put_ordered_extent(ordered); 4222 btrfs_put_ordered_extent(ordered);
4146 return 0; 4223 return ret;
4147} 4224}
4148 4225
4149void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4226void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2d958be761c8..6a1086e83ffc 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
181int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid) 181int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
182{ 182{
183 struct btrfs_root *dead_root; 183 struct btrfs_root *dead_root;
184 struct btrfs_item *item;
185 struct btrfs_root_item *ri; 184 struct btrfs_root_item *ri;
186 struct btrfs_key key; 185 struct btrfs_key key;
187 struct btrfs_key found_key; 186 struct btrfs_key found_key;
@@ -214,7 +213,6 @@ again:
214 nritems = btrfs_header_nritems(leaf); 213 nritems = btrfs_header_nritems(leaf);
215 slot = path->slots[0]; 214 slot = path->slots[0];
216 } 215 }
217 item = btrfs_item_nr(leaf, slot);
218 btrfs_item_key_to_cpu(leaf, &key, slot); 216 btrfs_item_key_to_cpu(leaf, &key, slot);
219 if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY) 217 if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
220 goto next; 218 goto next;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index ebe46c628748..8299a25ffc8f 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
61 61
62 ret = close_ctree(root); 62 ret = close_ctree(root);
63 sb->s_fs_info = NULL; 63 sb->s_fs_info = NULL;
64
65 (void)ret; /* FIXME: need to fix VFS to return error? */
64} 66}
65 67
66enum { 68enum {
@@ -68,7 +70,8 @@ enum {
68 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd, 70 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
69 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress, 71 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
70 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit, 72 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
71 Opt_discard, Opt_err, 73 Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
74 Opt_user_subvol_rm_allowed,
72}; 75};
73 76
74static match_table_t tokens = { 77static match_table_t tokens = {
@@ -92,6 +95,9 @@ static match_table_t tokens = {
92 {Opt_flushoncommit, "flushoncommit"}, 95 {Opt_flushoncommit, "flushoncommit"},
93 {Opt_ratio, "metadata_ratio=%d"}, 96 {Opt_ratio, "metadata_ratio=%d"},
94 {Opt_discard, "discard"}, 97 {Opt_discard, "discard"},
98 {Opt_space_cache, "space_cache"},
99 {Opt_clear_cache, "clear_cache"},
100 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
95 {Opt_err, NULL}, 101 {Opt_err, NULL},
96}; 102};
97 103
@@ -235,6 +241,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
235 case Opt_discard: 241 case Opt_discard:
236 btrfs_set_opt(info->mount_opt, DISCARD); 242 btrfs_set_opt(info->mount_opt, DISCARD);
237 break; 243 break;
244 case Opt_space_cache:
245 printk(KERN_INFO "btrfs: enabling disk space caching\n");
246 btrfs_set_opt(info->mount_opt, SPACE_CACHE);
247 case Opt_clear_cache:
248 printk(KERN_INFO "btrfs: force clearing of disk cache\n");
249 btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
250 break;
251 case Opt_user_subvol_rm_allowed:
252 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
253 break;
238 case Opt_err: 254 case Opt_err:
239 printk(KERN_INFO "btrfs: unrecognized mount option " 255 printk(KERN_INFO "btrfs: unrecognized mount option "
240 "'%s'\n", p); 256 "'%s'\n", p);
@@ -380,7 +396,7 @@ static struct dentry *get_default_root(struct super_block *sb,
380find_root: 396find_root:
381 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 397 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
382 if (IS_ERR(new_root)) 398 if (IS_ERR(new_root))
383 return ERR_PTR(PTR_ERR(new_root)); 399 return ERR_CAST(new_root);
384 400
385 if (btrfs_root_refs(&new_root->root_item) == 0) 401 if (btrfs_root_refs(&new_root->root_item) == 0)
386 return ERR_PTR(-ENOENT); 402 return ERR_PTR(-ENOENT);
@@ -436,7 +452,6 @@ static int btrfs_fill_super(struct super_block *sb,
436{ 452{
437 struct inode *inode; 453 struct inode *inode;
438 struct dentry *root_dentry; 454 struct dentry *root_dentry;
439 struct btrfs_super_block *disk_super;
440 struct btrfs_root *tree_root; 455 struct btrfs_root *tree_root;
441 struct btrfs_key key; 456 struct btrfs_key key;
442 int err; 457 int err;
@@ -458,7 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
458 return PTR_ERR(tree_root); 473 return PTR_ERR(tree_root);
459 } 474 }
460 sb->s_fs_info = tree_root; 475 sb->s_fs_info = tree_root;
461 disk_super = &tree_root->fs_info->super_copy;
462 476
463 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 477 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
464 key.type = BTRFS_INODE_ITEM_KEY; 478 key.type = BTRFS_INODE_ITEM_KEY;
@@ -571,7 +585,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
571 char *subvol_name = NULL; 585 char *subvol_name = NULL;
572 u64 subvol_objectid = 0; 586 u64 subvol_objectid = 0;
573 int error = 0; 587 int error = 0;
574 int found = 0;
575 588
576 if (!(flags & MS_RDONLY)) 589 if (!(flags & MS_RDONLY))
577 mode |= FMODE_WRITE; 590 mode |= FMODE_WRITE;
@@ -607,7 +620,6 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
607 goto error_close_devices; 620 goto error_close_devices;
608 } 621 }
609 622
610 found = 1;
611 btrfs_close_devices(fs_devices); 623 btrfs_close_devices(fs_devices);
612 } else { 624 } else {
613 char b[BDEVNAME_SIZE]; 625 char b[BDEVNAME_SIZE];
@@ -629,7 +641,7 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
629 if (IS_ERR(root)) { 641 if (IS_ERR(root)) {
630 error = PTR_ERR(root); 642 error = PTR_ERR(root);
631 deactivate_locked_super(s); 643 deactivate_locked_super(s);
632 goto error; 644 goto error_free_subvol_name;
633 } 645 }
634 /* if they gave us a subvolume name bind mount into that */ 646 /* if they gave us a subvolume name bind mount into that */
635 if (strcmp(subvol_name, ".")) { 647 if (strcmp(subvol_name, ".")) {
@@ -643,14 +655,14 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
643 deactivate_locked_super(s); 655 deactivate_locked_super(s);
644 error = PTR_ERR(new_root); 656 error = PTR_ERR(new_root);
645 dput(root); 657 dput(root);
646 goto error_close_devices; 658 goto error_free_subvol_name;
647 } 659 }
648 if (!new_root->d_inode) { 660 if (!new_root->d_inode) {
649 dput(root); 661 dput(root);
650 dput(new_root); 662 dput(new_root);
651 deactivate_locked_super(s); 663 deactivate_locked_super(s);
652 error = -ENXIO; 664 error = -ENXIO;
653 goto error_close_devices; 665 goto error_free_subvol_name;
654 } 666 }
655 dput(root); 667 dput(root);
656 root = new_root; 668 root = new_root;
@@ -665,7 +677,6 @@ error_close_devices:
665 btrfs_close_devices(fs_devices); 677 btrfs_close_devices(fs_devices);
666error_free_subvol_name: 678error_free_subvol_name:
667 kfree(subvol_name); 679 kfree(subvol_name);
668error:
669 return ERR_PTR(error); 680 return ERR_PTR(error);
670} 681}
671 682
@@ -713,18 +724,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
713 struct list_head *head = &root->fs_info->space_info; 724 struct list_head *head = &root->fs_info->space_info;
714 struct btrfs_space_info *found; 725 struct btrfs_space_info *found;
715 u64 total_used = 0; 726 u64 total_used = 0;
727 u64 total_used_data = 0;
716 int bits = dentry->d_sb->s_blocksize_bits; 728 int bits = dentry->d_sb->s_blocksize_bits;
717 __be32 *fsid = (__be32 *)root->fs_info->fsid; 729 __be32 *fsid = (__be32 *)root->fs_info->fsid;
718 730
719 rcu_read_lock(); 731 rcu_read_lock();
720 list_for_each_entry_rcu(found, head, list) 732 list_for_each_entry_rcu(found, head, list) {
733 if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
734 BTRFS_BLOCK_GROUP_SYSTEM))
735 total_used_data += found->disk_total;
736 else
737 total_used_data += found->disk_used;
721 total_used += found->disk_used; 738 total_used += found->disk_used;
739 }
722 rcu_read_unlock(); 740 rcu_read_unlock();
723 741
724 buf->f_namelen = BTRFS_NAME_LEN; 742 buf->f_namelen = BTRFS_NAME_LEN;
725 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 743 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
726 buf->f_bfree = buf->f_blocks - (total_used >> bits); 744 buf->f_bfree = buf->f_blocks - (total_used >> bits);
727 buf->f_bavail = buf->f_bfree; 745 buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
728 buf->f_bsize = dentry->d_sb->s_blocksize; 746 buf->f_bsize = dentry->d_sb->s_blocksize;
729 buf->f_type = BTRFS_SUPER_MAGIC; 747 buf->f_type = BTRFS_SUPER_MAGIC;
730 748
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66cc63b..1fffbc017bdf 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
163 TRANS_START, 163 TRANS_START,
164 TRANS_JOIN, 164 TRANS_JOIN,
165 TRANS_USERSPACE, 165 TRANS_USERSPACE,
166 TRANS_JOIN_NOLOCK,
166}; 167};
167 168
168static int may_wait_transaction(struct btrfs_root *root, int type) 169static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
179{ 180{
180 struct btrfs_trans_handle *h; 181 struct btrfs_trans_handle *h;
181 struct btrfs_transaction *cur_trans; 182 struct btrfs_transaction *cur_trans;
182 int retries = 0;
183 int ret; 183 int ret;
184again: 184again:
185 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 185 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
186 if (!h) 186 if (!h)
187 return ERR_PTR(-ENOMEM); 187 return ERR_PTR(-ENOMEM);
188 188
189 mutex_lock(&root->fs_info->trans_mutex); 189 if (type != TRANS_JOIN_NOLOCK)
190 mutex_lock(&root->fs_info->trans_mutex);
190 if (may_wait_transaction(root, type)) 191 if (may_wait_transaction(root, type))
191 wait_current_trans(root); 192 wait_current_trans(root);
192 193
@@ -195,7 +196,8 @@ again:
195 196
196 cur_trans = root->fs_info->running_transaction; 197 cur_trans = root->fs_info->running_transaction;
197 cur_trans->use_count++; 198 cur_trans->use_count++;
198 mutex_unlock(&root->fs_info->trans_mutex); 199 if (type != TRANS_JOIN_NOLOCK)
200 mutex_unlock(&root->fs_info->trans_mutex);
199 201
200 h->transid = cur_trans->transid; 202 h->transid = cur_trans->transid;
201 h->transaction = cur_trans; 203 h->transaction = cur_trans;
@@ -212,8 +214,7 @@ again:
212 } 214 }
213 215
214 if (num_items > 0) { 216 if (num_items > 0) {
215 ret = btrfs_trans_reserve_metadata(h, root, num_items, 217 ret = btrfs_trans_reserve_metadata(h, root, num_items);
216 &retries);
217 if (ret == -EAGAIN) { 218 if (ret == -EAGAIN) {
218 btrfs_commit_transaction(h, root); 219 btrfs_commit_transaction(h, root);
219 goto again; 220 goto again;
@@ -224,9 +225,11 @@ again:
224 } 225 }
225 } 226 }
226 227
227 mutex_lock(&root->fs_info->trans_mutex); 228 if (type != TRANS_JOIN_NOLOCK)
229 mutex_lock(&root->fs_info->trans_mutex);
228 record_root_in_trans(h, root); 230 record_root_in_trans(h, root);
229 mutex_unlock(&root->fs_info->trans_mutex); 231 if (type != TRANS_JOIN_NOLOCK)
232 mutex_unlock(&root->fs_info->trans_mutex);
230 233
231 if (!current->journal_info && type != TRANS_USERSPACE) 234 if (!current->journal_info && type != TRANS_USERSPACE)
232 current->journal_info = h; 235 current->journal_info = h;
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
244 return start_transaction(root, 0, TRANS_JOIN); 247 return start_transaction(root, 0, TRANS_JOIN);
245} 248}
246 249
250struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
251 int num_blocks)
252{
253 return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
254}
255
247struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 256struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
248 int num_blocks) 257 int num_blocks)
249{ 258{
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
270 return 0; 279 return 0;
271} 280}
272 281
282int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
283{
284 struct btrfs_transaction *cur_trans = NULL, *t;
285 int ret;
286
287 mutex_lock(&root->fs_info->trans_mutex);
288
289 ret = 0;
290 if (transid) {
291 if (transid <= root->fs_info->last_trans_committed)
292 goto out_unlock;
293
294 /* find specified transaction */
295 list_for_each_entry(t, &root->fs_info->trans_list, list) {
296 if (t->transid == transid) {
297 cur_trans = t;
298 break;
299 }
300 if (t->transid > transid)
301 break;
302 }
303 ret = -EINVAL;
304 if (!cur_trans)
305 goto out_unlock; /* bad transid */
306 } else {
307 /* find newest transaction that is committing | committed */
308 list_for_each_entry_reverse(t, &root->fs_info->trans_list,
309 list) {
310 if (t->in_commit) {
311 if (t->commit_done)
312 goto out_unlock;
313 cur_trans = t;
314 break;
315 }
316 }
317 if (!cur_trans)
318 goto out_unlock; /* nothing committing|committed */
319 }
320
321 cur_trans->use_count++;
322 mutex_unlock(&root->fs_info->trans_mutex);
323
324 wait_for_commit(root, cur_trans);
325
326 mutex_lock(&root->fs_info->trans_mutex);
327 put_transaction(cur_trans);
328 ret = 0;
329out_unlock:
330 mutex_unlock(&root->fs_info->trans_mutex);
331 return ret;
332}
333
273#if 0 334#if 0
274/* 335/*
275 * rate limit against the drop_snapshot code. This helps to slow down new 336 * rate limit against the drop_snapshot code. This helps to slow down new
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
348} 409}
349 410
350static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 411static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
351 struct btrfs_root *root, int throttle) 412 struct btrfs_root *root, int throttle, int lock)
352{ 413{
353 struct btrfs_transaction *cur_trans = trans->transaction; 414 struct btrfs_transaction *cur_trans = trans->transaction;
354 struct btrfs_fs_info *info = root->fs_info; 415 struct btrfs_fs_info *info = root->fs_info;
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
376 437
377 btrfs_trans_release_metadata(trans, root); 438 btrfs_trans_release_metadata(trans, root);
378 439
379 if (!root->fs_info->open_ioctl_trans && 440 if (lock && !root->fs_info->open_ioctl_trans &&
380 should_end_transaction(trans, root)) 441 should_end_transaction(trans, root))
381 trans->transaction->blocked = 1; 442 trans->transaction->blocked = 1;
382 443
383 if (cur_trans->blocked && !cur_trans->in_commit) { 444 if (lock && cur_trans->blocked && !cur_trans->in_commit) {
384 if (throttle) 445 if (throttle)
385 return btrfs_commit_transaction(trans, root); 446 return btrfs_commit_transaction(trans, root);
386 else 447 else
387 wake_up_process(info->transaction_kthread); 448 wake_up_process(info->transaction_kthread);
388 } 449 }
389 450
390 mutex_lock(&info->trans_mutex); 451 if (lock)
452 mutex_lock(&info->trans_mutex);
391 WARN_ON(cur_trans != info->running_transaction); 453 WARN_ON(cur_trans != info->running_transaction);
392 WARN_ON(cur_trans->num_writers < 1); 454 WARN_ON(cur_trans->num_writers < 1);
393 cur_trans->num_writers--; 455 cur_trans->num_writers--;
394 456
457 smp_mb();
395 if (waitqueue_active(&cur_trans->writer_wait)) 458 if (waitqueue_active(&cur_trans->writer_wait))
396 wake_up(&cur_trans->writer_wait); 459 wake_up(&cur_trans->writer_wait);
397 put_transaction(cur_trans); 460 put_transaction(cur_trans);
398 mutex_unlock(&info->trans_mutex); 461 if (lock)
462 mutex_unlock(&info->trans_mutex);
399 463
400 if (current->journal_info == trans) 464 if (current->journal_info == trans)
401 current->journal_info = NULL; 465 current->journal_info = NULL;
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
411int btrfs_end_transaction(struct btrfs_trans_handle *trans, 475int btrfs_end_transaction(struct btrfs_trans_handle *trans,
412 struct btrfs_root *root) 476 struct btrfs_root *root)
413{ 477{
414 return __btrfs_end_transaction(trans, root, 0); 478 return __btrfs_end_transaction(trans, root, 0, 1);
415} 479}
416 480
417int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 481int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
418 struct btrfs_root *root) 482 struct btrfs_root *root)
419{ 483{
420 return __btrfs_end_transaction(trans, root, 1); 484 return __btrfs_end_transaction(trans, root, 1, 1);
485}
486
487int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
488 struct btrfs_root *root)
489{
490 return __btrfs_end_transaction(trans, root, 0, 0);
421} 491}
422 492
423/* 493/*
@@ -836,7 +906,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
836 struct extent_buffer *tmp; 906 struct extent_buffer *tmp;
837 struct extent_buffer *old; 907 struct extent_buffer *old;
838 int ret; 908 int ret;
839 int retries = 0;
840 u64 to_reserve = 0; 909 u64 to_reserve = 0;
841 u64 index = 0; 910 u64 index = 0;
842 u64 objectid; 911 u64 objectid;
@@ -858,7 +927,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
858 927
859 if (to_reserve > 0) { 928 if (to_reserve > 0) {
860 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, 929 ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
861 to_reserve, &retries); 930 to_reserve);
862 if (ret) { 931 if (ret) {
863 pending->error = ret; 932 pending->error = ret;
864 goto fail; 933 goto fail;
@@ -966,6 +1035,8 @@ static void update_super_roots(struct btrfs_root *root)
966 super->root = root_item->bytenr; 1035 super->root = root_item->bytenr;
967 super->generation = root_item->generation; 1036 super->generation = root_item->generation;
968 super->root_level = root_item->level; 1037 super->root_level = root_item->level;
1038 if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
1039 super->cache_generation = root_item->generation;
969} 1040}
970 1041
971int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1042int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -988,11 +1059,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
988 return ret; 1059 return ret;
989} 1060}
990 1061
1062/*
1063 * wait for the current transaction commit to start and block subsequent
1064 * transaction joins
1065 */
1066static void wait_current_trans_commit_start(struct btrfs_root *root,
1067 struct btrfs_transaction *trans)
1068{
1069 DEFINE_WAIT(wait);
1070
1071 if (trans->in_commit)
1072 return;
1073
1074 while (1) {
1075 prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
1076 TASK_UNINTERRUPTIBLE);
1077 if (trans->in_commit) {
1078 finish_wait(&root->fs_info->transaction_blocked_wait,
1079 &wait);
1080 break;
1081 }
1082 mutex_unlock(&root->fs_info->trans_mutex);
1083 schedule();
1084 mutex_lock(&root->fs_info->trans_mutex);
1085 finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
1086 }
1087}
1088
1089/*
1090 * wait for the current transaction to start and then become unblocked.
1091 * caller holds ref.
1092 */
1093static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
1094 struct btrfs_transaction *trans)
1095{
1096 DEFINE_WAIT(wait);
1097
1098 if (trans->commit_done || (trans->in_commit && !trans->blocked))
1099 return;
1100
1101 while (1) {
1102 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
1103 TASK_UNINTERRUPTIBLE);
1104 if (trans->commit_done ||
1105 (trans->in_commit && !trans->blocked)) {
1106 finish_wait(&root->fs_info->transaction_wait,
1107 &wait);
1108 break;
1109 }
1110 mutex_unlock(&root->fs_info->trans_mutex);
1111 schedule();
1112 mutex_lock(&root->fs_info->trans_mutex);
1113 finish_wait(&root->fs_info->transaction_wait,
1114 &wait);
1115 }
1116}
1117
1118/*
1119 * commit transactions asynchronously. once btrfs_commit_transaction_async
1120 * returns, any subsequent transaction will not be allowed to join.
1121 */
1122struct btrfs_async_commit {
1123 struct btrfs_trans_handle *newtrans;
1124 struct btrfs_root *root;
1125 struct delayed_work work;
1126};
1127
1128static void do_async_commit(struct work_struct *work)
1129{
1130 struct btrfs_async_commit *ac =
1131 container_of(work, struct btrfs_async_commit, work.work);
1132
1133 btrfs_commit_transaction(ac->newtrans, ac->root);
1134 kfree(ac);
1135}
1136
1137int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
1138 struct btrfs_root *root,
1139 int wait_for_unblock)
1140{
1141 struct btrfs_async_commit *ac;
1142 struct btrfs_transaction *cur_trans;
1143
1144 ac = kmalloc(sizeof(*ac), GFP_NOFS);
1145 BUG_ON(!ac);
1146
1147 INIT_DELAYED_WORK(&ac->work, do_async_commit);
1148 ac->root = root;
1149 ac->newtrans = btrfs_join_transaction(root, 0);
1150
1151 /* take transaction reference */
1152 mutex_lock(&root->fs_info->trans_mutex);
1153 cur_trans = trans->transaction;
1154 cur_trans->use_count++;
1155 mutex_unlock(&root->fs_info->trans_mutex);
1156
1157 btrfs_end_transaction(trans, root);
1158 schedule_delayed_work(&ac->work, 0);
1159
1160 /* wait for transaction to start and unblock */
1161 mutex_lock(&root->fs_info->trans_mutex);
1162 if (wait_for_unblock)
1163 wait_current_trans_commit_start_and_unblock(root, cur_trans);
1164 else
1165 wait_current_trans_commit_start(root, cur_trans);
1166 put_transaction(cur_trans);
1167 mutex_unlock(&root->fs_info->trans_mutex);
1168
1169 return 0;
1170}
1171
1172/*
1173 * btrfs_transaction state sequence:
1174 * in_commit = 0, blocked = 0 (initial)
1175 * in_commit = 1, blocked = 1
1176 * blocked = 0
1177 * commit_done = 1
1178 */
991int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1179int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
992 struct btrfs_root *root) 1180 struct btrfs_root *root)
993{ 1181{
994 unsigned long joined = 0; 1182 unsigned long joined = 0;
995 unsigned long timeout = 1;
996 struct btrfs_transaction *cur_trans; 1183 struct btrfs_transaction *cur_trans;
997 struct btrfs_transaction *prev_trans = NULL; 1184 struct btrfs_transaction *prev_trans = NULL;
998 DEFINE_WAIT(wait); 1185 DEFINE_WAIT(wait);
@@ -1039,6 +1226,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1039 1226
1040 trans->transaction->in_commit = 1; 1227 trans->transaction->in_commit = 1;
1041 trans->transaction->blocked = 1; 1228 trans->transaction->blocked = 1;
1229 wake_up(&root->fs_info->transaction_blocked_wait);
1230
1042 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1231 if (cur_trans->list.prev != &root->fs_info->trans_list) {
1043 prev_trans = list_entry(cur_trans->list.prev, 1232 prev_trans = list_entry(cur_trans->list.prev,
1044 struct btrfs_transaction, list); 1233 struct btrfs_transaction, list);
@@ -1063,11 +1252,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1063 snap_pending = 1; 1252 snap_pending = 1;
1064 1253
1065 WARN_ON(cur_trans != trans->transaction); 1254 WARN_ON(cur_trans != trans->transaction);
1066 if (cur_trans->num_writers > 1)
1067 timeout = MAX_SCHEDULE_TIMEOUT;
1068 else if (should_grow)
1069 timeout = 1;
1070
1071 mutex_unlock(&root->fs_info->trans_mutex); 1255 mutex_unlock(&root->fs_info->trans_mutex);
1072 1256
1073 if (flush_on_commit || snap_pending) { 1257 if (flush_on_commit || snap_pending) {
@@ -1089,8 +1273,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1089 TASK_UNINTERRUPTIBLE); 1273 TASK_UNINTERRUPTIBLE);
1090 1274
1091 smp_mb(); 1275 smp_mb();
1092 if (cur_trans->num_writers > 1 || should_grow) 1276 if (cur_trans->num_writers > 1)
1093 schedule_timeout(timeout); 1277 schedule_timeout(MAX_SCHEDULE_TIMEOUT);
1278 else if (should_grow)
1279 schedule_timeout(1);
1094 1280
1095 mutex_lock(&root->fs_info->trans_mutex); 1281 mutex_lock(&root->fs_info->trans_mutex);
1096 finish_wait(&cur_trans->writer_wait, &wait); 1282 finish_wait(&cur_trans->writer_wait, &wait);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e104986d0bfd..f104b57ad4ef 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
87 87
88int btrfs_end_transaction(struct btrfs_trans_handle *trans, 88int btrfs_end_transaction(struct btrfs_trans_handle *trans,
89 struct btrfs_root *root); 89 struct btrfs_root *root);
90int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
91 struct btrfs_root *root);
90struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 92struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
91 int num_items); 93 int num_items);
92struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 94struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
93 int num_blocks); 95 int num_blocks);
96struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
97 int num_blocks);
94struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 98struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
95 int num_blocks); 99 int num_blocks);
100int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
96int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 101int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
97 struct btrfs_root *root); 102 struct btrfs_root *root);
98int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, 103int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
104int btrfs_clean_old_snapshots(struct btrfs_root *root); 109int btrfs_clean_old_snapshots(struct btrfs_root *root);
105int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 110int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
106 struct btrfs_root *root); 111 struct btrfs_root *root);
112int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
113 struct btrfs_root *root,
114 int wait_for_unblock);
107int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 115int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
108 struct btrfs_root *root); 116 struct btrfs_root *root);
109int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 117int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f7ac8e013ed7..992ab425599d 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
36 int ret = 0; 36 int ret = 0;
37 int wret; 37 int wret;
38 int level; 38 int level;
39 int orig_level;
40 int is_extent = 0; 39 int is_extent = 0;
41 int next_key_ret = 0; 40 int next_key_ret = 0;
42 u64 last_ret = 0; 41 u64 last_ret = 0;
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
64 return -ENOMEM; 63 return -ENOMEM;
65 64
66 level = btrfs_header_level(root->node); 65 level = btrfs_header_level(root->node);
67 orig_level = level;
68 66
69 if (level == 0) 67 if (level == 0)
70 goto out; 68 goto out;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fb102a9aee9c..a29f19384a27 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
786{ 786{
787 struct inode *dir; 787 struct inode *dir;
788 int ret; 788 int ret;
789 struct btrfs_key location;
790 struct btrfs_inode_ref *ref; 789 struct btrfs_inode_ref *ref;
791 struct btrfs_dir_item *di; 790 struct btrfs_dir_item *di;
792 struct inode *inode; 791 struct inode *inode;
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
795 unsigned long ref_ptr; 794 unsigned long ref_ptr;
796 unsigned long ref_end; 795 unsigned long ref_end;
797 796
798 location.objectid = key->objectid;
799 location.type = BTRFS_INODE_ITEM_KEY;
800 location.offset = 0;
801
802 /* 797 /*
803 * it is possible that we didn't log all the parent directories 798 * it is possible that we didn't log all the parent directories
804 * for a given inode. If we don't find the dir, just don't 799 * for a given inode. If we don't find the dir, just don't
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1583 struct btrfs_path *path; 1578 struct btrfs_path *path;
1584 struct btrfs_root *root = wc->replay_dest; 1579 struct btrfs_root *root = wc->replay_dest;
1585 struct btrfs_key key; 1580 struct btrfs_key key;
1586 u32 item_size;
1587 int level; 1581 int level;
1588 int i; 1582 int i;
1589 int ret; 1583 int ret;
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
1601 nritems = btrfs_header_nritems(eb); 1595 nritems = btrfs_header_nritems(eb);
1602 for (i = 0; i < nritems; i++) { 1596 for (i = 0; i < nritems; i++) {
1603 btrfs_item_key_to_cpu(eb, &key, i); 1597 btrfs_item_key_to_cpu(eb, &key, i);
1604 item_size = btrfs_item_size_nr(eb, i);
1605 1598
1606 /* inode keys are done during the first stage */ 1599 /* inode keys are done during the first stage */
1607 if (key.type == BTRFS_INODE_ITEM_KEY && 1600 if (key.type == BTRFS_INODE_ITEM_KEY &&
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1668 struct walk_control *wc) 1661 struct walk_control *wc)
1669{ 1662{
1670 u64 root_owner; 1663 u64 root_owner;
1671 u64 root_gen;
1672 u64 bytenr; 1664 u64 bytenr;
1673 u64 ptr_gen; 1665 u64 ptr_gen;
1674 struct extent_buffer *next; 1666 struct extent_buffer *next;
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1698 1690
1699 parent = path->nodes[*level]; 1691 parent = path->nodes[*level];
1700 root_owner = btrfs_header_owner(parent); 1692 root_owner = btrfs_header_owner(parent);
1701 root_gen = btrfs_header_generation(parent);
1702 1693
1703 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 1694 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
1704 1695
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1749 struct walk_control *wc) 1740 struct walk_control *wc)
1750{ 1741{
1751 u64 root_owner; 1742 u64 root_owner;
1752 u64 root_gen;
1753 int i; 1743 int i;
1754 int slot; 1744 int slot;
1755 int ret; 1745 int ret;
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1757 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { 1747 for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1758 slot = path->slots[i]; 1748 slot = path->slots[i];
1759 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 1749 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
1760 struct extent_buffer *node;
1761 node = path->nodes[i];
1762 path->slots[i]++; 1750 path->slots[i]++;
1763 *level = i; 1751 *level = i;
1764 WARN_ON(*level == 0); 1752 WARN_ON(*level == 0);
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1771 parent = path->nodes[*level + 1]; 1759 parent = path->nodes[*level + 1];
1772 1760
1773 root_owner = btrfs_header_owner(parent); 1761 root_owner = btrfs_header_owner(parent);
1774 root_gen = btrfs_header_generation(parent);
1775 wc->process_func(root, path->nodes[*level], wc, 1762 wc->process_func(root, path->nodes[*level], wc,
1776 btrfs_header_generation(path->nodes[*level])); 1763 btrfs_header_generation(path->nodes[*level]));
1777 if (wc->free) { 1764 if (wc->free) {
@@ -2273,7 +2260,7 @@ fail:
2273 } 2260 }
2274 btrfs_end_log_trans(root); 2261 btrfs_end_log_trans(root);
2275 2262
2276 return 0; 2263 return err;
2277} 2264}
2278 2265
2279/* see comments for btrfs_del_dir_entries_in_log */ 2266/* see comments for btrfs_del_dir_entries_in_log */
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
2729 struct btrfs_key max_key; 2716 struct btrfs_key max_key;
2730 struct btrfs_root *log = root->log_root; 2717 struct btrfs_root *log = root->log_root;
2731 struct extent_buffer *src = NULL; 2718 struct extent_buffer *src = NULL;
2732 u32 size;
2733 int err = 0; 2719 int err = 0;
2734 int ret; 2720 int ret;
2735 int nritems; 2721 int nritems;
@@ -2793,7 +2779,6 @@ again:
2793 break; 2779 break;
2794 2780
2795 src = path->nodes[0]; 2781 src = path->nodes[0];
2796 size = btrfs_item_size_nr(src, path->slots[0]);
2797 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { 2782 if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
2798 ins_nr++; 2783 ins_nr++;
2799 goto next_slot; 2784 goto next_slot;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e25e46a8b4e2..cc04dc1445d6 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1898,7 +1898,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
1898 u64 size_to_free; 1898 u64 size_to_free;
1899 struct btrfs_path *path; 1899 struct btrfs_path *path;
1900 struct btrfs_key key; 1900 struct btrfs_key key;
1901 struct btrfs_chunk *chunk;
1902 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root; 1901 struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
1903 struct btrfs_trans_handle *trans; 1902 struct btrfs_trans_handle *trans;
1904 struct btrfs_key found_key; 1903 struct btrfs_key found_key;
@@ -1962,9 +1961,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
1962 if (found_key.objectid != key.objectid) 1961 if (found_key.objectid != key.objectid)
1963 break; 1962 break;
1964 1963
1965 chunk = btrfs_item_ptr(path->nodes[0],
1966 path->slots[0],
1967 struct btrfs_chunk);
1968 /* chunk zero is special */ 1964 /* chunk zero is special */
1969 if (found_key.offset == 0) 1965 if (found_key.offset == 0)
1970 break; 1966 break;
@@ -3031,8 +3027,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
3031 } 3027 }
3032 bio->bi_sector = multi->stripes[dev_nr].physical >> 9; 3028 bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
3033 dev = multi->stripes[dev_nr].dev; 3029 dev = multi->stripes[dev_nr].dev;
3034 BUG_ON(rw == WRITE && !dev->writeable); 3030 if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
3035 if (dev && dev->bdev) {
3036 bio->bi_bdev = dev->bdev; 3031 bio->bi_bdev = dev->bdev;
3037 if (async_submit) 3032 if (async_submit)
3038 schedule_bio(root, dev, rw, bio); 3033 schedule_bio(root, dev, rw, bio);
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 88ecbb215878..698fdd2c739c 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
178 struct inode *inode = dentry->d_inode; 178 struct inode *inode = dentry->d_inode;
179 struct btrfs_root *root = BTRFS_I(inode)->root; 179 struct btrfs_root *root = BTRFS_I(inode)->root;
180 struct btrfs_path *path; 180 struct btrfs_path *path;
181 struct btrfs_item *item;
182 struct extent_buffer *leaf; 181 struct extent_buffer *leaf;
183 struct btrfs_dir_item *di; 182 struct btrfs_dir_item *di;
184 int ret = 0, slot, advance; 183 int ret = 0, slot, advance;
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
234 } 233 }
235 advance = 1; 234 advance = 1;
236 235
237 item = btrfs_item_nr(leaf, slot);
238 btrfs_item_key_to_cpu(leaf, &found_key, slot); 236 btrfs_item_key_to_cpu(leaf, &found_key, slot);
239 237
240 /* check to make sure this item is what we want */ 238 /* check to make sure this item is what we want */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 3e2b90eaa239..b9cd5445f71c 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
199 int nr_pages = 0; 199 int nr_pages = 0;
200 struct page *in_page = NULL; 200 struct page *in_page = NULL;
201 struct page *out_page = NULL; 201 struct page *out_page = NULL;
202 int out_written = 0;
203 int in_read = 0;
204 unsigned long bytes_left; 202 unsigned long bytes_left;
205 203
206 *out_pages = 0; 204 *out_pages = 0;
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
233 workspace->def_strm.avail_out = PAGE_CACHE_SIZE; 231 workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
234 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); 232 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
235 233
236 out_written = 0;
237 in_read = 0;
238
239 while (workspace->def_strm.total_in < len) { 234 while (workspace->def_strm.total_in < len) {
240 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); 235 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
241 if (ret != Z_OK) { 236 if (ret != Z_OK) {
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index f027382b54be..3d06ccc953aa 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1081,30 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
1081} 1081}
1082 1082
1083/** 1083/**
1084 * writeback_inodes_sb - writeback dirty inodes from given super_block 1084 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
1085 * @sb: the superblock 1085 * @sb: the superblock
1086 * @nr: the number of pages to write
1086 * 1087 *
1087 * Start writeback on some inodes on this super_block. No guarantees are made 1088 * Start writeback on some inodes on this super_block. No guarantees are made
1088 * on how many (if any) will be written, and this function does not wait 1089 * on how many (if any) will be written, and this function does not wait
1089 * for IO completion of submitted IO. The number of pages submitted is 1090 * for IO completion of submitted IO.
1090 * returned.
1091 */ 1091 */
1092void writeback_inodes_sb(struct super_block *sb) 1092void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
1093{ 1093{
1094 DECLARE_COMPLETION_ONSTACK(done); 1094 DECLARE_COMPLETION_ONSTACK(done);
1095 struct wb_writeback_work work = { 1095 struct wb_writeback_work work = {
1096 .sb = sb, 1096 .sb = sb,
1097 .sync_mode = WB_SYNC_NONE, 1097 .sync_mode = WB_SYNC_NONE,
1098 .done = &done, 1098 .done = &done,
1099 .nr_pages = nr,
1099 }; 1100 };
1100 1101
1101 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1102 WARN_ON(!rwsem_is_locked(&sb->s_umount));
1102
1103 work.nr_pages = get_nr_dirty_pages();
1104
1105 bdi_queue_work(sb->s_bdi, &work); 1103 bdi_queue_work(sb->s_bdi, &work);
1106 wait_for_completion(&done); 1104 wait_for_completion(&done);
1107} 1105}
1106EXPORT_SYMBOL(writeback_inodes_sb_nr);
1107
1108/**
1109 * writeback_inodes_sb - writeback dirty inodes from given super_block
1110 * @sb: the superblock
1111 *
1112 * Start writeback on some inodes on this super_block. No guarantees are made
1113 * on how many (if any) will be written, and this function does not wait
1114 * for IO completion of submitted IO.
1115 */
1116void writeback_inodes_sb(struct super_block *sb)
1117{
1118 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
1119}
1108EXPORT_SYMBOL(writeback_inodes_sb); 1120EXPORT_SYMBOL(writeback_inodes_sb);
1109 1121
1110/** 1122/**
@@ -1127,6 +1139,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
1127EXPORT_SYMBOL(writeback_inodes_sb_if_idle); 1139EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
1128 1140
1129/** 1141/**
1142 * writeback_inodes_sb_if_idle - start writeback if none underway
1143 * @sb: the superblock
1144 * @nr: the number of pages to write
1145 *
1146 * Invoke writeback_inodes_sb if no writeback is currently underway.
1147 * Returns 1 if writeback was started, 0 if not.
1148 */
1149int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
1150 unsigned long nr)
1151{
1152 if (!writeback_in_progress(sb->s_bdi)) {
1153 down_read(&sb->s_umount);
1154 writeback_inodes_sb_nr(sb, nr);
1155 up_read(&sb->s_umount);
1156 return 1;
1157 } else
1158 return 0;
1159}
1160EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
1161
1162/**
1130 * sync_inodes_sb - sync sb inode pages 1163 * sync_inodes_sb - sync sb inode pages
1131 * @sb: the superblock 1164 * @sb: the superblock
1132 * 1165 *
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 09eec350054d..0ead399e08b5 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -58,7 +58,9 @@ struct writeback_control {
58struct bdi_writeback; 58struct bdi_writeback;
59int inode_wait(void *); 59int inode_wait(void *);
60void writeback_inodes_sb(struct super_block *); 60void writeback_inodes_sb(struct super_block *);
61void writeback_inodes_sb_nr(struct super_block *, unsigned long nr);
61int writeback_inodes_sb_if_idle(struct super_block *); 62int writeback_inodes_sb_if_idle(struct super_block *);
63int writeback_inodes_sb_nr_if_idle(struct super_block *, unsigned long nr);
62void sync_inodes_sb(struct super_block *); 64void sync_inodes_sb(struct super_block *);
63void writeback_inodes_wb(struct bdi_writeback *wb, 65void writeback_inodes_wb(struct bdi_writeback *wb,
64 struct writeback_control *wbc); 66 struct writeback_control *wbc);