aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/acl.c1
-rw-r--r--fs/btrfs/async-thread.c1
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/compression.c23
-rw-r--r--fs/btrfs/ctree.c5
-rw-r--r--fs/btrfs/ctree.h15
-rw-r--r--fs/btrfs/delayed-ref.c1
-rw-r--r--fs/btrfs/disk-io.c28
-rw-r--r--fs/btrfs/export.c4
-rw-r--r--fs/btrfs/extent-tree.c55
-rw-r--r--fs/btrfs/extent_io.c95
-rw-r--r--fs/btrfs/extent_io.h10
-rw-r--r--fs/btrfs/extent_map.c1
-rw-r--r--fs/btrfs/file-item.c1
-rw-r--r--fs/btrfs/file.c24
-rw-r--r--fs/btrfs/free-space-cache.c1
-rw-r--r--fs/btrfs/inode.c199
-rw-r--r--fs/btrfs/ioctl.c710
-rw-r--r--fs/btrfs/ioctl.h111
-rw-r--r--fs/btrfs/locking.c1
-rw-r--r--fs/btrfs/ordered-data.c48
-rw-r--r--fs/btrfs/ordered-data.h7
-rw-r--r--fs/btrfs/ref-cache.c1
-rw-r--r--fs/btrfs/relocation.c5
-rw-r--r--fs/btrfs/super.c254
-rw-r--r--fs/btrfs/transaction.c118
-rw-r--r--fs/btrfs/tree-log.c3
-rw-r--r--fs/btrfs/volumes.c56
28 files changed, 1348 insertions, 435 deletions
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 6df6d6ed74fd..6ef7b26724ec 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -22,6 +22,7 @@
22#include <linux/posix_acl_xattr.h> 22#include <linux/posix_acl_xattr.h>
23#include <linux/posix_acl.h> 23#include <linux/posix_acl.h>
24#include <linux/sched.h> 24#include <linux/sched.h>
25#include <linux/slab.h>
25 26
26#include "ctree.h" 27#include "ctree.h"
27#include "btrfs_inode.h" 28#include "btrfs_inode.h"
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index c0861e781cdb..462859a30141 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/kthread.h> 19#include <linux/kthread.h>
20#include <linux/slab.h>
20#include <linux/list.h> 21#include <linux/list.h>
21#include <linux/spinlock.h> 22#include <linux/spinlock.h>
22#include <linux/freezer.h> 23#include <linux/freezer.h>
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 3f1f50d9d916..7a4dee199832 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -153,6 +153,11 @@ struct btrfs_inode {
153 unsigned ordered_data_close:1; 153 unsigned ordered_data_close:1;
154 unsigned dummy_inode:1; 154 unsigned dummy_inode:1;
155 155
156 /*
157 * always compress this one file
158 */
159 unsigned force_compress:1;
160
156 struct inode vfs_inode; 161 struct inode vfs_inode;
157}; 162};
158 163
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index a11a32058b50..396039b3a8a2 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -31,7 +31,7 @@
31#include <linux/swap.h> 31#include <linux/swap.h>
32#include <linux/writeback.h> 32#include <linux/writeback.h>
33#include <linux/bit_spinlock.h> 33#include <linux/bit_spinlock.h>
34#include <linux/pagevec.h> 34#include <linux/slab.h>
35#include "compat.h" 35#include "compat.h"
36#include "ctree.h" 36#include "ctree.h"
37#include "disk-io.h" 37#include "disk-io.h"
@@ -445,7 +445,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
445 unsigned long nr_pages = 0; 445 unsigned long nr_pages = 0;
446 struct extent_map *em; 446 struct extent_map *em;
447 struct address_space *mapping = inode->i_mapping; 447 struct address_space *mapping = inode->i_mapping;
448 struct pagevec pvec;
449 struct extent_map_tree *em_tree; 448 struct extent_map_tree *em_tree;
450 struct extent_io_tree *tree; 449 struct extent_io_tree *tree;
451 u64 end; 450 u64 end;
@@ -461,7 +460,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
461 460
462 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 461 end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
463 462
464 pagevec_init(&pvec, 0);
465 while (last_offset < compressed_end) { 463 while (last_offset < compressed_end) {
466 page_index = last_offset >> PAGE_CACHE_SHIFT; 464 page_index = last_offset >> PAGE_CACHE_SHIFT;
467 465
@@ -478,26 +476,17 @@ static noinline int add_ra_bio_pages(struct inode *inode,
478 goto next; 476 goto next;
479 } 477 }
480 478
481 page = alloc_page(mapping_gfp_mask(mapping) | GFP_NOFS); 479 page = __page_cache_alloc(mapping_gfp_mask(mapping) &
480 ~__GFP_FS);
482 if (!page) 481 if (!page)
483 break; 482 break;
484 483
485 page->index = page_index; 484 if (add_to_page_cache_lru(page, mapping, page_index,
486 /* 485 GFP_NOFS)) {
487 * what we want to do here is call add_to_page_cache_lru,
488 * but that isn't exported, so we reproduce it here
489 */
490 if (add_to_page_cache(page, mapping,
491 page->index, GFP_NOFS)) {
492 page_cache_release(page); 486 page_cache_release(page);
493 goto next; 487 goto next;
494 } 488 }
495 489
496 /* open coding of lru_cache_add, also not exported */
497 page_cache_get(page);
498 if (!pagevec_add(&pvec, page))
499 __pagevec_lru_add_file(&pvec);
500
501 end = last_offset + PAGE_CACHE_SIZE - 1; 490 end = last_offset + PAGE_CACHE_SIZE - 1;
502 /* 491 /*
503 * at this point, we have a locked page in the page cache 492 * at this point, we have a locked page in the page cache
@@ -551,8 +540,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
551next: 540next:
552 last_offset += PAGE_CACHE_SIZE; 541 last_offset += PAGE_CACHE_SIZE;
553 } 542 }
554 if (pagevec_count(&pvec))
555 __pagevec_lru_add_file(&pvec);
556 return 0; 543 return 0;
557} 544}
558 545
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c4bc570a396e..6795a713b205 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h>
20#include "ctree.h" 21#include "ctree.h"
21#include "disk-io.h" 22#include "disk-io.h"
22#include "transaction.h" 23#include "transaction.h"
@@ -3040,6 +3041,10 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
3040 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0])) 3041 if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
3041 goto err; 3042 goto err;
3042 3043
3044 /* the leaf has changed, it now has room. return now */
3045 if (btrfs_leaf_free_space(root, path->nodes[0]) >= ins_len)
3046 goto err;
3047
3043 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3048 if (key.type == BTRFS_EXTENT_DATA_KEY) {
3044 fi = btrfs_item_ptr(leaf, path->slots[0], 3049 fi = btrfs_item_ptr(leaf, path->slots[0],
3045 struct btrfs_file_extent_item); 3050 struct btrfs_file_extent_item);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 8b5cfdd4bfc1..746a7248678e 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -26,6 +26,7 @@
26#include <linux/completion.h> 26#include <linux/completion.h>
27#include <linux/backing-dev.h> 27#include <linux/backing-dev.h>
28#include <linux/wait.h> 28#include <linux/wait.h>
29#include <linux/slab.h>
29#include <asm/kmap_types.h> 30#include <asm/kmap_types.h>
30#include "extent_io.h" 31#include "extent_io.h"
31#include "extent_map.h" 32#include "extent_map.h"
@@ -373,11 +374,13 @@ struct btrfs_super_block {
373 * ones specified below then we will fail to mount 374 * ones specified below then we will fail to mount
374 */ 375 */
375#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0) 376#define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF (1ULL << 0)
377#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (2ULL << 0)
376 378
377#define BTRFS_FEATURE_COMPAT_SUPP 0ULL 379#define BTRFS_FEATURE_COMPAT_SUPP 0ULL
378#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL 380#define BTRFS_FEATURE_COMPAT_RO_SUPP 0ULL
379#define BTRFS_FEATURE_INCOMPAT_SUPP \ 381#define BTRFS_FEATURE_INCOMPAT_SUPP \
380 BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF 382 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
383 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
381 384
382/* 385/*
383 * A leaf is full of items. offset and size tell us where to find 386 * A leaf is full of items. offset and size tell us where to find
@@ -832,7 +835,6 @@ struct btrfs_fs_info {
832 u64 last_trans_log_full_commit; 835 u64 last_trans_log_full_commit;
833 u64 open_ioctl_trans; 836 u64 open_ioctl_trans;
834 unsigned long mount_opt; 837 unsigned long mount_opt;
835 u64 max_extent;
836 u64 max_inline; 838 u64 max_inline;
837 u64 alloc_start; 839 u64 alloc_start;
838 struct btrfs_transaction *running_transaction; 840 struct btrfs_transaction *running_transaction;
@@ -1182,7 +1184,6 @@ struct btrfs_root {
1182#define BTRFS_INODE_NOATIME (1 << 9) 1184#define BTRFS_INODE_NOATIME (1 << 9)
1183#define BTRFS_INODE_DIRSYNC (1 << 10) 1185#define BTRFS_INODE_DIRSYNC (1 << 10)
1184 1186
1185
1186/* some macros to generate set/get funcs for the struct fields. This 1187/* some macros to generate set/get funcs for the struct fields. This
1187 * assumes there is a lefoo_to_cpu for every type, so lets make a simple 1188 * assumes there is a lefoo_to_cpu for every type, so lets make a simple
1188 * one for u8: 1189 * one for u8:
@@ -1842,7 +1843,7 @@ BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block,
1842BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, 1843BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block,
1843 compat_flags, 64); 1844 compat_flags, 64);
1844BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, 1845BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block,
1845 compat_flags, 64); 1846 compat_ro_flags, 64);
1846BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, 1847BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
1847 incompat_flags, 64); 1848 incompat_flags, 64);
1848BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, 1849BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
@@ -2310,7 +2311,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
2310 u32 min_type); 2311 u32 min_type);
2311 2312
2312int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); 2313int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
2313int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end); 2314int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
2315 struct extent_state **cached_state);
2314int btrfs_writepages(struct address_space *mapping, 2316int btrfs_writepages(struct address_space *mapping,
2315 struct writeback_control *wbc); 2317 struct writeback_control *wbc);
2316int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, 2318int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
@@ -2335,7 +2337,7 @@ int btrfs_init_cachep(void);
2335void btrfs_destroy_cachep(void); 2337void btrfs_destroy_cachep(void);
2336long btrfs_ioctl_trans_end(struct file *file); 2338long btrfs_ioctl_trans_end(struct file *file);
2337struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 2339struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
2338 struct btrfs_root *root); 2340 struct btrfs_root *root, int *was_new);
2339int btrfs_commit_write(struct file *file, struct page *page, 2341int btrfs_commit_write(struct file *file, struct page *page,
2340 unsigned from, unsigned to); 2342 unsigned from, unsigned to);
2341struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page, 2343struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
@@ -2386,7 +2388,6 @@ void btrfs_sysfs_del_super(struct btrfs_fs_info *root);
2386ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size); 2388ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
2387 2389
2388/* super.c */ 2390/* super.c */
2389u64 btrfs_parse_size(char *str);
2390int btrfs_parse_options(struct btrfs_root *root, char *options); 2391int btrfs_parse_options(struct btrfs_root *root, char *options);
2391int btrfs_sync_fs(struct super_block *sb, int wait); 2392int btrfs_sync_fs(struct super_block *sb, int wait);
2392 2393
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 84e6781413b1..902ce507c4e3 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h>
20#include <linux/sort.h> 21#include <linux/sort.h>
21#include "ctree.h" 22#include "ctree.h"
22#include "delayed-ref.h" 23#include "delayed-ref.h"
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 0427183e3e05..e7b8f2c89ccb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -27,6 +27,7 @@
27#include <linux/kthread.h> 27#include <linux/kthread.h>
28#include <linux/freezer.h> 28#include <linux/freezer.h>
29#include <linux/crc32c.h> 29#include <linux/crc32c.h>
30#include <linux/slab.h>
30#include "compat.h" 31#include "compat.h"
31#include "ctree.h" 32#include "ctree.h"
32#include "disk-io.h" 33#include "disk-io.h"
@@ -263,13 +264,15 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
263static int verify_parent_transid(struct extent_io_tree *io_tree, 264static int verify_parent_transid(struct extent_io_tree *io_tree,
264 struct extent_buffer *eb, u64 parent_transid) 265 struct extent_buffer *eb, u64 parent_transid)
265{ 266{
267 struct extent_state *cached_state = NULL;
266 int ret; 268 int ret;
267 269
268 if (!parent_transid || btrfs_header_generation(eb) == parent_transid) 270 if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
269 return 0; 271 return 0;
270 272
271 lock_extent(io_tree, eb->start, eb->start + eb->len - 1, GFP_NOFS); 273 lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
272 if (extent_buffer_uptodate(io_tree, eb) && 274 0, &cached_state, GFP_NOFS);
275 if (extent_buffer_uptodate(io_tree, eb, cached_state) &&
273 btrfs_header_generation(eb) == parent_transid) { 276 btrfs_header_generation(eb) == parent_transid) {
274 ret = 0; 277 ret = 0;
275 goto out; 278 goto out;
@@ -282,10 +285,10 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
282 (unsigned long long)btrfs_header_generation(eb)); 285 (unsigned long long)btrfs_header_generation(eb));
283 } 286 }
284 ret = 1; 287 ret = 1;
285 clear_extent_buffer_uptodate(io_tree, eb); 288 clear_extent_buffer_uptodate(io_tree, eb, &cached_state);
286out: 289out:
287 unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, 290 unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
288 GFP_NOFS); 291 &cached_state, GFP_NOFS);
289 return ret; 292 return ret;
290} 293}
291 294
@@ -1632,7 +1635,6 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1632 atomic_set(&fs_info->async_submit_draining, 0); 1635 atomic_set(&fs_info->async_submit_draining, 0);
1633 atomic_set(&fs_info->nr_async_bios, 0); 1636 atomic_set(&fs_info->nr_async_bios, 0);
1634 fs_info->sb = sb; 1637 fs_info->sb = sb;
1635 fs_info->max_extent = (u64)-1;
1636 fs_info->max_inline = 8192 * 1024; 1638 fs_info->max_inline = 8192 * 1024;
1637 fs_info->metadata_ratio = 0; 1639 fs_info->metadata_ratio = 0;
1638 1640
@@ -1920,7 +1922,11 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1920 1922
1921 csum_root->track_dirty = 1; 1923 csum_root->track_dirty = 1;
1922 1924
1923 btrfs_read_block_groups(extent_root); 1925 ret = btrfs_read_block_groups(extent_root);
1926 if (ret) {
1927 printk(KERN_ERR "Failed to read block groups: %d\n", ret);
1928 goto fail_block_groups;
1929 }
1924 1930
1925 fs_info->generation = generation; 1931 fs_info->generation = generation;
1926 fs_info->last_trans_committed = generation; 1932 fs_info->last_trans_committed = generation;
@@ -1930,7 +1936,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
1930 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, 1936 fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
1931 "btrfs-cleaner"); 1937 "btrfs-cleaner");
1932 if (IS_ERR(fs_info->cleaner_kthread)) 1938 if (IS_ERR(fs_info->cleaner_kthread))
1933 goto fail_csum_root; 1939 goto fail_block_groups;
1934 1940
1935 fs_info->transaction_kthread = kthread_run(transaction_kthread, 1941 fs_info->transaction_kthread = kthread_run(transaction_kthread,
1936 tree_root, 1942 tree_root,
@@ -2018,7 +2024,8 @@ fail_cleaner:
2018 filemap_write_and_wait(fs_info->btree_inode->i_mapping); 2024 filemap_write_and_wait(fs_info->btree_inode->i_mapping);
2019 invalidate_inode_pages2(fs_info->btree_inode->i_mapping); 2025 invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
2020 2026
2021fail_csum_root: 2027fail_block_groups:
2028 btrfs_free_block_groups(fs_info);
2022 free_extent_buffer(csum_root->node); 2029 free_extent_buffer(csum_root->node);
2023 free_extent_buffer(csum_root->commit_root); 2030 free_extent_buffer(csum_root->commit_root);
2024fail_dev_root: 2031fail_dev_root:
@@ -2497,7 +2504,8 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid)
2497 int ret; 2504 int ret;
2498 struct inode *btree_inode = buf->first_page->mapping->host; 2505 struct inode *btree_inode = buf->first_page->mapping->host;
2499 2506
2500 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf); 2507 ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf,
2508 NULL);
2501 if (!ret) 2509 if (!ret)
2502 return ret; 2510 return ret;
2503 2511
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index ba5c3fd5ab8c..951ef09b82f4 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -95,7 +95,7 @@ static struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
95 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); 95 btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
96 key.offset = 0; 96 key.offset = 0;
97 97
98 inode = btrfs_iget(sb, &key, root); 98 inode = btrfs_iget(sb, &key, root, NULL);
99 if (IS_ERR(inode)) { 99 if (IS_ERR(inode)) {
100 err = PTR_ERR(inode); 100 err = PTR_ERR(inode);
101 goto fail; 101 goto fail;
@@ -223,7 +223,7 @@ static struct dentry *btrfs_get_parent(struct dentry *child)
223 223
224 key.type = BTRFS_INODE_ITEM_KEY; 224 key.type = BTRFS_INODE_ITEM_KEY;
225 key.offset = 0; 225 key.offset = 0;
226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root)); 226 dentry = d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
227 if (!IS_ERR(dentry)) 227 if (!IS_ERR(dentry))
228 dentry->d_op = &btrfs_dentry_operations; 228 dentry->d_op = &btrfs_dentry_operations;
229 return dentry; 229 return dentry;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 559f72489b3b..b34d32fdaaec 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -22,6 +22,7 @@
22#include <linux/sort.h> 22#include <linux/sort.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/kthread.h> 24#include <linux/kthread.h>
25#include <linux/slab.h>
25#include "compat.h" 26#include "compat.h"
26#include "hash.h" 27#include "hash.h"
27#include "ctree.h" 28#include "ctree.h"
@@ -2676,6 +2677,8 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
2676 2677
2677 INIT_LIST_HEAD(&found->block_groups); 2678 INIT_LIST_HEAD(&found->block_groups);
2678 init_rwsem(&found->groups_sem); 2679 init_rwsem(&found->groups_sem);
2680 init_waitqueue_head(&found->flush_wait);
2681 init_waitqueue_head(&found->allocate_wait);
2679 spin_lock_init(&found->lock); 2682 spin_lock_init(&found->lock);
2680 found->flags = flags; 2683 found->flags = flags;
2681 found->total_bytes = total_bytes; 2684 found->total_bytes = total_bytes;
@@ -2846,7 +2849,7 @@ int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root,
2846 } 2849 }
2847 spin_unlock(&BTRFS_I(inode)->accounting_lock); 2850 spin_unlock(&BTRFS_I(inode)->accounting_lock);
2848 2851
2849 BTRFS_I(inode)->reserved_extents--; 2852 BTRFS_I(inode)->reserved_extents -= num_items;
2850 BUG_ON(BTRFS_I(inode)->reserved_extents < 0); 2853 BUG_ON(BTRFS_I(inode)->reserved_extents < 0);
2851 2854
2852 if (meta_sinfo->bytes_delalloc < num_bytes) { 2855 if (meta_sinfo->bytes_delalloc < num_bytes) {
@@ -2944,12 +2947,10 @@ static void flush_delalloc(struct btrfs_root *root,
2944 2947
2945 spin_lock(&info->lock); 2948 spin_lock(&info->lock);
2946 2949
2947 if (!info->flushing) { 2950 if (!info->flushing)
2948 info->flushing = 1; 2951 info->flushing = 1;
2949 init_waitqueue_head(&info->flush_wait); 2952 else
2950 } else {
2951 wait = true; 2953 wait = true;
2952 }
2953 2954
2954 spin_unlock(&info->lock); 2955 spin_unlock(&info->lock);
2955 2956
@@ -3011,7 +3012,6 @@ static int maybe_allocate_chunk(struct btrfs_root *root,
3011 if (!info->allocating_chunk) { 3012 if (!info->allocating_chunk) {
3012 info->force_alloc = 1; 3013 info->force_alloc = 1;
3013 info->allocating_chunk = 1; 3014 info->allocating_chunk = 1;
3014 init_waitqueue_head(&info->allocate_wait);
3015 } else { 3015 } else {
3016 wait = true; 3016 wait = true;
3017 } 3017 }
@@ -3111,7 +3111,7 @@ again:
3111 return -ENOSPC; 3111 return -ENOSPC;
3112 } 3112 }
3113 3113
3114 BTRFS_I(inode)->reserved_extents++; 3114 BTRFS_I(inode)->reserved_extents += num_items;
3115 check_force_delalloc(meta_sinfo); 3115 check_force_delalloc(meta_sinfo);
3116 spin_unlock(&meta_sinfo->lock); 3116 spin_unlock(&meta_sinfo->lock);
3117 3117
@@ -3235,7 +3235,8 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
3235 u64 bytes) 3235 u64 bytes)
3236{ 3236{
3237 struct btrfs_space_info *data_sinfo; 3237 struct btrfs_space_info *data_sinfo;
3238 int ret = 0, committed = 0; 3238 u64 used;
3239 int ret = 0, committed = 0, flushed = 0;
3239 3240
3240 /* make sure bytes are sectorsize aligned */ 3241 /* make sure bytes are sectorsize aligned */
3241 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3242 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
@@ -3247,12 +3248,21 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode,
3247again: 3248again:
3248 /* make sure we have enough space to handle the data first */ 3249 /* make sure we have enough space to handle the data first */
3249 spin_lock(&data_sinfo->lock); 3250 spin_lock(&data_sinfo->lock);
3250 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 3251 used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc +
3251 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 3252 data_sinfo->bytes_reserved + data_sinfo->bytes_pinned +
3252 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 3253 data_sinfo->bytes_readonly + data_sinfo->bytes_may_use +
3253 data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { 3254 data_sinfo->bytes_super;
3255
3256 if (used + bytes > data_sinfo->total_bytes) {
3254 struct btrfs_trans_handle *trans; 3257 struct btrfs_trans_handle *trans;
3255 3258
3259 if (!flushed) {
3260 spin_unlock(&data_sinfo->lock);
3261 flush_delalloc(root, data_sinfo);
3262 flushed = 1;
3263 goto again;
3264 }
3265
3256 /* 3266 /*
3257 * if we don't have enough free bytes in this space then we need 3267 * if we don't have enough free bytes in this space then we need
3258 * to alloc a new chunk. 3268 * to alloc a new chunk.
@@ -4170,6 +4180,10 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
4170 ins->offset = 0; 4180 ins->offset = 0;
4171 4181
4172 space_info = __find_space_info(root->fs_info, data); 4182 space_info = __find_space_info(root->fs_info, data);
4183 if (!space_info) {
4184 printk(KERN_ERR "No space info for %d\n", data);
4185 return -ENOSPC;
4186 }
4173 4187
4174 if (orig_root->ref_cows || empty_size) 4188 if (orig_root->ref_cows || empty_size)
4175 allowed_chunk_alloc = 1; 4189 allowed_chunk_alloc = 1;
@@ -5205,6 +5219,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
5205 next = btrfs_find_tree_block(root, bytenr, blocksize); 5219 next = btrfs_find_tree_block(root, bytenr, blocksize);
5206 if (!next) { 5220 if (!next) {
5207 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 5221 next = btrfs_find_create_tree_block(root, bytenr, blocksize);
5222 if (!next)
5223 return -ENOMEM;
5208 reada = 1; 5224 reada = 1;
5209 } 5225 }
5210 btrfs_tree_lock(next); 5226 btrfs_tree_lock(next);
@@ -5417,7 +5433,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
5417 if (ret > 0) { 5433 if (ret > 0) {
5418 path->slots[level]++; 5434 path->slots[level]++;
5419 continue; 5435 continue;
5420 } 5436 } else if (ret < 0)
5437 return ret;
5421 level = wc->level; 5438 level = wc->level;
5422 } 5439 }
5423 return 0; 5440 return 0;
@@ -6561,6 +6578,7 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root,
6561 struct btrfs_key key; 6578 struct btrfs_key key;
6562 struct inode *inode = NULL; 6579 struct inode *inode = NULL;
6563 struct btrfs_file_extent_item *fi; 6580 struct btrfs_file_extent_item *fi;
6581 struct extent_state *cached_state = NULL;
6564 u64 num_bytes; 6582 u64 num_bytes;
6565 u64 skip_objectid = 0; 6583 u64 skip_objectid = 0;
6566 u32 nritems; 6584 u32 nritems;
@@ -6589,12 +6607,14 @@ static noinline int invalidate_extent_cache(struct btrfs_root *root,
6589 } 6607 }
6590 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6608 num_bytes = btrfs_file_extent_num_bytes(leaf, fi);
6591 6609
6592 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6610 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset,
6593 key.offset + num_bytes - 1, GFP_NOFS); 6611 key.offset + num_bytes - 1, 0, &cached_state,
6612 GFP_NOFS);
6594 btrfs_drop_extent_cache(inode, key.offset, 6613 btrfs_drop_extent_cache(inode, key.offset,
6595 key.offset + num_bytes - 1, 1); 6614 key.offset + num_bytes - 1, 1);
6596 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6615 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset,
6597 key.offset + num_bytes - 1, GFP_NOFS); 6616 key.offset + num_bytes - 1, &cached_state,
6617 GFP_NOFS);
6598 cond_resched(); 6618 cond_resched();
6599 } 6619 }
6600 iput(inode); 6620 iput(inode);
@@ -7366,7 +7386,6 @@ static int find_first_block_group(struct btrfs_root *root,
7366 } 7386 }
7367 path->slots[0]++; 7387 path->slots[0]++;
7368 } 7388 }
7369 ret = -ENOENT;
7370out: 7389out:
7371 return ret; 7390 return ret;
7372} 7391}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 7073cbb1b2d4..d2d03684fab2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2,7 +2,6 @@
2#include <linux/slab.h> 2#include <linux/slab.h>
3#include <linux/bio.h> 3#include <linux/bio.h>
4#include <linux/mm.h> 4#include <linux/mm.h>
5#include <linux/gfp.h>
6#include <linux/pagemap.h> 5#include <linux/pagemap.h>
7#include <linux/page-flags.h> 6#include <linux/page-flags.h>
8#include <linux/module.h> 7#include <linux/module.h>
@@ -513,7 +512,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
513 u64 last_end; 512 u64 last_end;
514 int err; 513 int err;
515 int set = 0; 514 int set = 0;
515 int clear = 0;
516 516
517 if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY))
518 clear = 1;
517again: 519again:
518 if (!prealloc && (mask & __GFP_WAIT)) { 520 if (!prealloc && (mask & __GFP_WAIT)) {
519 prealloc = alloc_extent_state(mask); 521 prealloc = alloc_extent_state(mask);
@@ -524,14 +526,20 @@ again:
524 spin_lock(&tree->lock); 526 spin_lock(&tree->lock);
525 if (cached_state) { 527 if (cached_state) {
526 cached = *cached_state; 528 cached = *cached_state;
527 *cached_state = NULL; 529
528 cached_state = NULL; 530 if (clear) {
531 *cached_state = NULL;
532 cached_state = NULL;
533 }
534
529 if (cached && cached->tree && cached->start == start) { 535 if (cached && cached->tree && cached->start == start) {
530 atomic_dec(&cached->refs); 536 if (clear)
537 atomic_dec(&cached->refs);
531 state = cached; 538 state = cached;
532 goto hit_next; 539 goto hit_next;
533 } 540 }
534 free_extent_state(cached); 541 if (clear)
542 free_extent_state(cached);
535 } 543 }
536 /* 544 /*
537 * this search will find the extents that end after 545 * this search will find the extents that end after
@@ -946,11 +954,11 @@ int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
946} 954}
947 955
948int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 956int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
949 gfp_t mask) 957 struct extent_state **cached_state, gfp_t mask)
950{ 958{
951 return set_extent_bit(tree, start, end, 959 return set_extent_bit(tree, start, end,
952 EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, 960 EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE,
953 0, NULL, NULL, mask); 961 0, NULL, cached_state, mask);
954} 962}
955 963
956int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, 964int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end,
@@ -984,10 +992,11 @@ int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
984} 992}
985 993
986static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start, 994static int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
987 u64 end, gfp_t mask) 995 u64 end, struct extent_state **cached_state,
996 gfp_t mask)
988{ 997{
989 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0, 998 return clear_extent_bit(tree, start, end, EXTENT_UPTODATE, 0, 0,
990 NULL, mask); 999 cached_state, mask);
991} 1000}
992 1001
993int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end) 1002int wait_on_extent_writeback(struct extent_io_tree *tree, u64 start, u64 end)
@@ -1171,7 +1180,8 @@ out:
1171 * 1 is returned if we find something, 0 if nothing was in the tree 1180 * 1 is returned if we find something, 0 if nothing was in the tree
1172 */ 1181 */
1173static noinline u64 find_delalloc_range(struct extent_io_tree *tree, 1182static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1174 u64 *start, u64 *end, u64 max_bytes) 1183 u64 *start, u64 *end, u64 max_bytes,
1184 struct extent_state **cached_state)
1175{ 1185{
1176 struct rb_node *node; 1186 struct rb_node *node;
1177 struct extent_state *state; 1187 struct extent_state *state;
@@ -1203,8 +1213,11 @@ static noinline u64 find_delalloc_range(struct extent_io_tree *tree,
1203 *end = state->end; 1213 *end = state->end;
1204 goto out; 1214 goto out;
1205 } 1215 }
1206 if (!found) 1216 if (!found) {
1207 *start = state->start; 1217 *start = state->start;
1218 *cached_state = state;
1219 atomic_inc(&state->refs);
1220 }
1208 found++; 1221 found++;
1209 *end = state->end; 1222 *end = state->end;
1210 cur_start = state->end + 1; 1223 cur_start = state->end + 1;
@@ -1336,10 +1349,11 @@ again:
1336 delalloc_start = *start; 1349 delalloc_start = *start;
1337 delalloc_end = 0; 1350 delalloc_end = 0;
1338 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end, 1351 found = find_delalloc_range(tree, &delalloc_start, &delalloc_end,
1339 max_bytes); 1352 max_bytes, &cached_state);
1340 if (!found || delalloc_end <= *start) { 1353 if (!found || delalloc_end <= *start) {
1341 *start = delalloc_start; 1354 *start = delalloc_start;
1342 *end = delalloc_end; 1355 *end = delalloc_end;
1356 free_extent_state(cached_state);
1343 return found; 1357 return found;
1344 } 1358 }
1345 1359
@@ -1722,7 +1736,7 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
1722 } 1736 }
1723 1737
1724 if (!uptodate) { 1738 if (!uptodate) {
1725 clear_extent_uptodate(tree, start, end, GFP_NOFS); 1739 clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS);
1726 ClearPageUptodate(page); 1740 ClearPageUptodate(page);
1727 SetPageError(page); 1741 SetPageError(page);
1728 } 1742 }
@@ -1750,7 +1764,8 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
1750static void end_bio_extent_readpage(struct bio *bio, int err) 1764static void end_bio_extent_readpage(struct bio *bio, int err)
1751{ 1765{
1752 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 1766 int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
1753 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; 1767 struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
1768 struct bio_vec *bvec = bio->bi_io_vec;
1754 struct extent_io_tree *tree; 1769 struct extent_io_tree *tree;
1755 u64 start; 1770 u64 start;
1756 u64 end; 1771 u64 end;
@@ -1773,7 +1788,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1773 else 1788 else
1774 whole_page = 0; 1789 whole_page = 0;
1775 1790
1776 if (--bvec >= bio->bi_io_vec) 1791 if (++bvec <= bvec_end)
1777 prefetchw(&bvec->bv_page->flags); 1792 prefetchw(&bvec->bv_page->flags);
1778 1793
1779 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { 1794 if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
@@ -1818,7 +1833,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
1818 } 1833 }
1819 check_page_locked(tree, page); 1834 check_page_locked(tree, page);
1820 } 1835 }
1821 } while (bvec >= bio->bi_io_vec); 1836 } while (bvec <= bvec_end);
1822 1837
1823 bio_put(bio); 1838 bio_put(bio);
1824} 1839}
@@ -2663,33 +2678,20 @@ int extent_readpages(struct extent_io_tree *tree,
2663{ 2678{
2664 struct bio *bio = NULL; 2679 struct bio *bio = NULL;
2665 unsigned page_idx; 2680 unsigned page_idx;
2666 struct pagevec pvec;
2667 unsigned long bio_flags = 0; 2681 unsigned long bio_flags = 0;
2668 2682
2669 pagevec_init(&pvec, 0);
2670 for (page_idx = 0; page_idx < nr_pages; page_idx++) { 2683 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
2671 struct page *page = list_entry(pages->prev, struct page, lru); 2684 struct page *page = list_entry(pages->prev, struct page, lru);
2672 2685
2673 prefetchw(&page->flags); 2686 prefetchw(&page->flags);
2674 list_del(&page->lru); 2687 list_del(&page->lru);
2675 /* 2688 if (!add_to_page_cache_lru(page, mapping,
2676 * what we want to do here is call add_to_page_cache_lru,
2677 * but that isn't exported, so we reproduce it here
2678 */
2679 if (!add_to_page_cache(page, mapping,
2680 page->index, GFP_KERNEL)) { 2689 page->index, GFP_KERNEL)) {
2681
2682 /* open coding of lru_cache_add, also not exported */
2683 page_cache_get(page);
2684 if (!pagevec_add(&pvec, page))
2685 __pagevec_lru_add_file(&pvec);
2686 __extent_read_full_page(tree, page, get_extent, 2690 __extent_read_full_page(tree, page, get_extent,
2687 &bio, 0, &bio_flags); 2691 &bio, 0, &bio_flags);
2688 } 2692 }
2689 page_cache_release(page); 2693 page_cache_release(page);
2690 } 2694 }
2691 if (pagevec_count(&pvec))
2692 __pagevec_lru_add_file(&pvec);
2693 BUG_ON(!list_empty(pages)); 2695 BUG_ON(!list_empty(pages));
2694 if (bio) 2696 if (bio)
2695 submit_one_bio(READ, bio, 0, bio_flags); 2697 submit_one_bio(READ, bio, 0, bio_flags);
@@ -2704,6 +2706,7 @@ int extent_readpages(struct extent_io_tree *tree,
2704int extent_invalidatepage(struct extent_io_tree *tree, 2706int extent_invalidatepage(struct extent_io_tree *tree,
2705 struct page *page, unsigned long offset) 2707 struct page *page, unsigned long offset)
2706{ 2708{
2709 struct extent_state *cached_state = NULL;
2707 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT); 2710 u64 start = ((u64)page->index << PAGE_CACHE_SHIFT);
2708 u64 end = start + PAGE_CACHE_SIZE - 1; 2711 u64 end = start + PAGE_CACHE_SIZE - 1;
2709 size_t blocksize = page->mapping->host->i_sb->s_blocksize; 2712 size_t blocksize = page->mapping->host->i_sb->s_blocksize;
@@ -2712,12 +2715,12 @@ int extent_invalidatepage(struct extent_io_tree *tree,
2712 if (start > end) 2715 if (start > end)
2713 return 0; 2716 return 0;
2714 2717
2715 lock_extent(tree, start, end, GFP_NOFS); 2718 lock_extent_bits(tree, start, end, 0, &cached_state, GFP_NOFS);
2716 wait_on_page_writeback(page); 2719 wait_on_page_writeback(page);
2717 clear_extent_bit(tree, start, end, 2720 clear_extent_bit(tree, start, end,
2718 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 2721 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
2719 EXTENT_DO_ACCOUNTING, 2722 EXTENT_DO_ACCOUNTING,
2720 1, 1, NULL, GFP_NOFS); 2723 1, 1, &cached_state, GFP_NOFS);
2721 return 0; 2724 return 0;
2722} 2725}
2723 2726
@@ -2920,16 +2923,17 @@ sector_t extent_bmap(struct address_space *mapping, sector_t iblock,
2920 get_extent_t *get_extent) 2923 get_extent_t *get_extent)
2921{ 2924{
2922 struct inode *inode = mapping->host; 2925 struct inode *inode = mapping->host;
2926 struct extent_state *cached_state = NULL;
2923 u64 start = iblock << inode->i_blkbits; 2927 u64 start = iblock << inode->i_blkbits;
2924 sector_t sector = 0; 2928 sector_t sector = 0;
2925 size_t blksize = (1 << inode->i_blkbits); 2929 size_t blksize = (1 << inode->i_blkbits);
2926 struct extent_map *em; 2930 struct extent_map *em;
2927 2931
2928 lock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2932 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + blksize - 1,
2929 GFP_NOFS); 2933 0, &cached_state, GFP_NOFS);
2930 em = get_extent(inode, NULL, 0, start, blksize, 0); 2934 em = get_extent(inode, NULL, 0, start, blksize, 0);
2931 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + blksize - 1, 2935 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start,
2932 GFP_NOFS); 2936 start + blksize - 1, &cached_state, GFP_NOFS);
2933 if (!em || IS_ERR(em)) 2937 if (!em || IS_ERR(em))
2934 return 0; 2938 return 0;
2935 2939
@@ -2951,6 +2955,7 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2951 u32 flags = 0; 2955 u32 flags = 0;
2952 u64 disko = 0; 2956 u64 disko = 0;
2953 struct extent_map *em = NULL; 2957 struct extent_map *em = NULL;
2958 struct extent_state *cached_state = NULL;
2954 int end = 0; 2959 int end = 0;
2955 u64 em_start = 0, em_len = 0; 2960 u64 em_start = 0, em_len = 0;
2956 unsigned long emflags; 2961 unsigned long emflags;
@@ -2959,8 +2964,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2959 if (len == 0) 2964 if (len == 0)
2960 return -EINVAL; 2965 return -EINVAL;
2961 2966
2962 lock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 2967 lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
2963 GFP_NOFS); 2968 &cached_state, GFP_NOFS);
2964 em = get_extent(inode, NULL, 0, off, max - off, 0); 2969 em = get_extent(inode, NULL, 0, off, max - off, 0);
2965 if (!em) 2970 if (!em)
2966 goto out; 2971 goto out;
@@ -3023,8 +3028,8 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3023out_free: 3028out_free:
3024 free_extent_map(em); 3029 free_extent_map(em);
3025out: 3030out:
3026 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len, 3031 unlock_extent_cached(&BTRFS_I(inode)->io_tree, start, start + len,
3027 GFP_NOFS); 3032 &cached_state, GFP_NOFS);
3028 return ret; 3033 return ret;
3029} 3034}
3030 3035
@@ -3264,7 +3269,8 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree,
3264} 3269}
3265 3270
3266int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 3271int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3267 struct extent_buffer *eb) 3272 struct extent_buffer *eb,
3273 struct extent_state **cached_state)
3268{ 3274{
3269 unsigned long i; 3275 unsigned long i;
3270 struct page *page; 3276 struct page *page;
@@ -3274,7 +3280,7 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
3274 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 3280 clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
3275 3281
3276 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, 3282 clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1,
3277 GFP_NOFS); 3283 cached_state, GFP_NOFS);
3278 for (i = 0; i < num_pages; i++) { 3284 for (i = 0; i < num_pages; i++) {
3279 page = extent_buffer_page(eb, i); 3285 page = extent_buffer_page(eb, i);
3280 if (page) 3286 if (page)
@@ -3334,7 +3340,8 @@ int extent_range_uptodate(struct extent_io_tree *tree,
3334} 3340}
3335 3341
3336int extent_buffer_uptodate(struct extent_io_tree *tree, 3342int extent_buffer_uptodate(struct extent_io_tree *tree,
3337 struct extent_buffer *eb) 3343 struct extent_buffer *eb,
3344 struct extent_state *cached_state)
3338{ 3345{
3339 int ret = 0; 3346 int ret = 0;
3340 unsigned long num_pages; 3347 unsigned long num_pages;
@@ -3346,7 +3353,7 @@ int extent_buffer_uptodate(struct extent_io_tree *tree,
3346 return 1; 3353 return 1;
3347 3354
3348 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, 3355 ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1,
3349 EXTENT_UPTODATE, 1, NULL); 3356 EXTENT_UPTODATE, 1, cached_state);
3350 if (ret) 3357 if (ret)
3351 return ret; 3358 return ret;
3352 3359
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 36de250a7b2b..bbab4813646f 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -163,6 +163,8 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
163int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 163int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
164 int bits, struct extent_state **cached, gfp_t mask); 164 int bits, struct extent_state **cached, gfp_t mask);
165int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask); 165int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask);
166int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
167 struct extent_state **cached, gfp_t mask);
166int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end, 168int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
167 gfp_t mask); 169 gfp_t mask);
168int extent_read_full_page(struct extent_io_tree *tree, struct page *page, 170int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
@@ -196,7 +198,7 @@ int clear_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
196int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start, 198int clear_extent_ordered_metadata(struct extent_io_tree *tree, u64 start,
197 u64 end, gfp_t mask); 199 u64 end, gfp_t mask);
198int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, 200int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end,
199 gfp_t mask); 201 struct extent_state **cached_state, gfp_t mask);
200int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end, 202int set_extent_ordered(struct extent_io_tree *tree, u64 start, u64 end,
201 gfp_t mask); 203 gfp_t mask);
202int find_first_extent_bit(struct extent_io_tree *tree, u64 start, 204int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
@@ -281,9 +283,11 @@ int test_extent_buffer_dirty(struct extent_io_tree *tree,
281int set_extent_buffer_uptodate(struct extent_io_tree *tree, 283int set_extent_buffer_uptodate(struct extent_io_tree *tree,
282 struct extent_buffer *eb); 284 struct extent_buffer *eb);
283int clear_extent_buffer_uptodate(struct extent_io_tree *tree, 285int clear_extent_buffer_uptodate(struct extent_io_tree *tree,
284 struct extent_buffer *eb); 286 struct extent_buffer *eb,
287 struct extent_state **cached_state);
285int extent_buffer_uptodate(struct extent_io_tree *tree, 288int extent_buffer_uptodate(struct extent_io_tree *tree,
286 struct extent_buffer *eb); 289 struct extent_buffer *eb,
290 struct extent_state *cached_state);
287int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, 291int map_extent_buffer(struct extent_buffer *eb, unsigned long offset,
288 unsigned long min_len, char **token, char **map, 292 unsigned long min_len, char **token, char **map,
289 unsigned long *map_start, 293 unsigned long *map_start,
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 28d87ba60ce8..454ca52d6451 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -1,5 +1,4 @@
1#include <linux/err.h> 1#include <linux/err.h>
2#include <linux/gfp.h>
3#include <linux/slab.h> 2#include <linux/slab.h>
4#include <linux/module.h> 3#include <linux/module.h>
5#include <linux/spinlock.h> 4#include <linux/spinlock.h>
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index 9b99886562d0..54a255065aa3 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/bio.h> 19#include <linux/bio.h>
20#include <linux/slab.h>
20#include <linux/pagemap.h> 21#include <linux/pagemap.h>
21#include <linux/highmem.h> 22#include <linux/highmem.h>
22#include "ctree.h" 23#include "ctree.h"
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 6ed434ac037f..29ff749ff4ca 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -28,6 +28,7 @@
28#include <linux/writeback.h> 28#include <linux/writeback.h>
29#include <linux/statfs.h> 29#include <linux/statfs.h>
30#include <linux/compat.h> 30#include <linux/compat.h>
31#include <linux/slab.h>
31#include "ctree.h" 32#include "ctree.h"
32#include "disk-io.h" 33#include "disk-io.h"
33#include "transaction.h" 34#include "transaction.h"
@@ -123,7 +124,8 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
123 root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 124 root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
124 125
125 end_of_last_block = start_pos + num_bytes - 1; 126 end_of_last_block = start_pos + num_bytes - 1;
126 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); 127 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
128 NULL);
127 if (err) 129 if (err)
128 return err; 130 return err;
129 131
@@ -753,6 +755,7 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file,
753 loff_t pos, unsigned long first_index, 755 loff_t pos, unsigned long first_index,
754 unsigned long last_index, size_t write_bytes) 756 unsigned long last_index, size_t write_bytes)
755{ 757{
758 struct extent_state *cached_state = NULL;
756 int i; 759 int i;
757 unsigned long index = pos >> PAGE_CACHE_SHIFT; 760 unsigned long index = pos >> PAGE_CACHE_SHIFT;
758 struct inode *inode = fdentry(file)->d_inode; 761 struct inode *inode = fdentry(file)->d_inode;
@@ -781,16 +784,18 @@ again:
781 } 784 }
782 if (start_pos < inode->i_size) { 785 if (start_pos < inode->i_size) {
783 struct btrfs_ordered_extent *ordered; 786 struct btrfs_ordered_extent *ordered;
784 lock_extent(&BTRFS_I(inode)->io_tree, 787 lock_extent_bits(&BTRFS_I(inode)->io_tree,
785 start_pos, last_pos - 1, GFP_NOFS); 788 start_pos, last_pos - 1, 0, &cached_state,
789 GFP_NOFS);
786 ordered = btrfs_lookup_first_ordered_extent(inode, 790 ordered = btrfs_lookup_first_ordered_extent(inode,
787 last_pos - 1); 791 last_pos - 1);
788 if (ordered && 792 if (ordered &&
789 ordered->file_offset + ordered->len > start_pos && 793 ordered->file_offset + ordered->len > start_pos &&
790 ordered->file_offset < last_pos) { 794 ordered->file_offset < last_pos) {
791 btrfs_put_ordered_extent(ordered); 795 btrfs_put_ordered_extent(ordered);
792 unlock_extent(&BTRFS_I(inode)->io_tree, 796 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
793 start_pos, last_pos - 1, GFP_NOFS); 797 start_pos, last_pos - 1,
798 &cached_state, GFP_NOFS);
794 for (i = 0; i < num_pages; i++) { 799 for (i = 0; i < num_pages; i++) {
795 unlock_page(pages[i]); 800 unlock_page(pages[i]);
796 page_cache_release(pages[i]); 801 page_cache_release(pages[i]);
@@ -802,12 +807,13 @@ again:
802 if (ordered) 807 if (ordered)
803 btrfs_put_ordered_extent(ordered); 808 btrfs_put_ordered_extent(ordered);
804 809
805 clear_extent_bits(&BTRFS_I(inode)->io_tree, start_pos, 810 clear_extent_bit(&BTRFS_I(inode)->io_tree, start_pos,
806 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 811 last_pos - 1, EXTENT_DIRTY | EXTENT_DELALLOC |
807 EXTENT_DO_ACCOUNTING, 812 EXTENT_DO_ACCOUNTING, 0, 0, &cached_state,
808 GFP_NOFS); 813 GFP_NOFS);
809 unlock_extent(&BTRFS_I(inode)->io_tree, 814 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
810 start_pos, last_pos - 1, GFP_NOFS); 815 start_pos, last_pos - 1, &cached_state,
816 GFP_NOFS);
811 } 817 }
812 for (i = 0; i < num_pages; i++) { 818 for (i = 0; i < num_pages; i++) {
813 clear_page_dirty_for_io(pages[i]); 819 clear_page_dirty_for_io(pages[i]);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index dd831ed31eea..f488fac04d99 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -18,6 +18,7 @@
18 18
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
20#include <linux/sched.h> 20#include <linux/sched.h>
21#include <linux/slab.h>
21#include <linux/math64.h> 22#include <linux/math64.h>
22#include "ctree.h" 23#include "ctree.h"
23#include "free-space-cache.h" 24#include "free-space-cache.h"
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c41db6d45ab6..2bfdc641d4e3 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -36,6 +36,7 @@
36#include <linux/xattr.h> 36#include <linux/xattr.h>
37#include <linux/posix_acl.h> 37#include <linux/posix_acl.h>
38#include <linux/falloc.h> 38#include <linux/falloc.h>
39#include <linux/slab.h>
39#include "compat.h" 40#include "compat.h"
40#include "ctree.h" 41#include "ctree.h"
41#include "disk-io.h" 42#include "disk-io.h"
@@ -379,7 +380,8 @@ again:
379 * change at any time if we discover bad compression ratios. 380 * change at any time if we discover bad compression ratios.
380 */ 381 */
381 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) && 382 if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NOCOMPRESS) &&
382 btrfs_test_opt(root, COMPRESS)) { 383 (btrfs_test_opt(root, COMPRESS) ||
384 (BTRFS_I(inode)->force_compress))) {
383 WARN_ON(pages); 385 WARN_ON(pages);
384 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS); 386 pages = kzalloc(sizeof(struct page *) * nr_pages, GFP_NOFS);
385 387
@@ -483,8 +485,10 @@ again:
483 nr_pages_ret = 0; 485 nr_pages_ret = 0;
484 486
485 /* flag the file so we don't compress in the future */ 487 /* flag the file so we don't compress in the future */
486 if (!btrfs_test_opt(root, FORCE_COMPRESS)) 488 if (!btrfs_test_opt(root, FORCE_COMPRESS) &&
489 !(BTRFS_I(inode)->force_compress)) {
487 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 490 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
491 }
488 } 492 }
489 if (will_compress) { 493 if (will_compress) {
490 *num_added += 1; 494 *num_added += 1;
@@ -570,8 +574,8 @@ retry:
570 unsigned long nr_written = 0; 574 unsigned long nr_written = 0;
571 575
572 lock_extent(io_tree, async_extent->start, 576 lock_extent(io_tree, async_extent->start,
573 async_extent->start + 577 async_extent->start +
574 async_extent->ram_size - 1, GFP_NOFS); 578 async_extent->ram_size - 1, GFP_NOFS);
575 579
576 /* allocate blocks */ 580 /* allocate blocks */
577 ret = cow_file_range(inode, async_cow->locked_page, 581 ret = cow_file_range(inode, async_cow->locked_page,
@@ -793,7 +797,7 @@ static noinline int cow_file_range(struct inode *inode,
793 while (disk_num_bytes > 0) { 797 while (disk_num_bytes > 0) {
794 unsigned long op; 798 unsigned long op;
795 799
796 cur_alloc_size = min(disk_num_bytes, root->fs_info->max_extent); 800 cur_alloc_size = disk_num_bytes;
797 ret = btrfs_reserve_extent(trans, root, cur_alloc_size, 801 ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
798 root->sectorsize, 0, alloc_hint, 802 root->sectorsize, 0, alloc_hint,
799 (u64)-1, &ins, 1); 803 (u64)-1, &ins, 1);
@@ -1211,7 +1215,8 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1211 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC) 1215 else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC)
1212 ret = run_delalloc_nocow(inode, locked_page, start, end, 1216 ret = run_delalloc_nocow(inode, locked_page, start, end,
1213 page_started, 0, nr_written); 1217 page_started, 0, nr_written);
1214 else if (!btrfs_test_opt(root, COMPRESS)) 1218 else if (!btrfs_test_opt(root, COMPRESS) &&
1219 !(BTRFS_I(inode)->force_compress))
1215 ret = cow_file_range(inode, locked_page, start, end, 1220 ret = cow_file_range(inode, locked_page, start, end,
1216 page_started, nr_written, 1); 1221 page_started, nr_written, 1);
1217 else 1222 else
@@ -1223,30 +1228,9 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page,
1223static int btrfs_split_extent_hook(struct inode *inode, 1228static int btrfs_split_extent_hook(struct inode *inode,
1224 struct extent_state *orig, u64 split) 1229 struct extent_state *orig, u64 split)
1225{ 1230{
1226 struct btrfs_root *root = BTRFS_I(inode)->root;
1227 u64 size;
1228
1229 if (!(orig->state & EXTENT_DELALLOC)) 1231 if (!(orig->state & EXTENT_DELALLOC))
1230 return 0; 1232 return 0;
1231 1233
1232 size = orig->end - orig->start + 1;
1233 if (size > root->fs_info->max_extent) {
1234 u64 num_extents;
1235 u64 new_size;
1236
1237 new_size = orig->end - split + 1;
1238 num_extents = div64_u64(size + root->fs_info->max_extent - 1,
1239 root->fs_info->max_extent);
1240
1241 /*
1242 * if we break a large extent up then leave oustanding_extents
1243 * be, since we've already accounted for the large extent.
1244 */
1245 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1246 root->fs_info->max_extent) < num_extents)
1247 return 0;
1248 }
1249
1250 spin_lock(&BTRFS_I(inode)->accounting_lock); 1234 spin_lock(&BTRFS_I(inode)->accounting_lock);
1251 BTRFS_I(inode)->outstanding_extents++; 1235 BTRFS_I(inode)->outstanding_extents++;
1252 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1236 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1264,38 +1248,10 @@ static int btrfs_merge_extent_hook(struct inode *inode,
1264 struct extent_state *new, 1248 struct extent_state *new,
1265 struct extent_state *other) 1249 struct extent_state *other)
1266{ 1250{
1267 struct btrfs_root *root = BTRFS_I(inode)->root;
1268 u64 new_size, old_size;
1269 u64 num_extents;
1270
1271 /* not delalloc, ignore it */ 1251 /* not delalloc, ignore it */
1272 if (!(other->state & EXTENT_DELALLOC)) 1252 if (!(other->state & EXTENT_DELALLOC))
1273 return 0; 1253 return 0;
1274 1254
1275 old_size = other->end - other->start + 1;
1276 if (new->start < other->start)
1277 new_size = other->end - new->start + 1;
1278 else
1279 new_size = new->end - other->start + 1;
1280
1281 /* we're not bigger than the max, unreserve the space and go */
1282 if (new_size <= root->fs_info->max_extent) {
1283 spin_lock(&BTRFS_I(inode)->accounting_lock);
1284 BTRFS_I(inode)->outstanding_extents--;
1285 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1286 return 0;
1287 }
1288
1289 /*
1290 * If we grew by another max_extent, just return, we want to keep that
1291 * reserved amount.
1292 */
1293 num_extents = div64_u64(old_size + root->fs_info->max_extent - 1,
1294 root->fs_info->max_extent);
1295 if (div64_u64(new_size + root->fs_info->max_extent - 1,
1296 root->fs_info->max_extent) > num_extents)
1297 return 0;
1298
1299 spin_lock(&BTRFS_I(inode)->accounting_lock); 1255 spin_lock(&BTRFS_I(inode)->accounting_lock);
1300 BTRFS_I(inode)->outstanding_extents--; 1256 BTRFS_I(inode)->outstanding_extents--;
1301 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1257 spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -1324,6 +1280,7 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
1324 BTRFS_I(inode)->outstanding_extents++; 1280 BTRFS_I(inode)->outstanding_extents++;
1325 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1281 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1326 btrfs_delalloc_reserve_space(root, inode, end - start + 1); 1282 btrfs_delalloc_reserve_space(root, inode, end - start + 1);
1283
1327 spin_lock(&root->fs_info->delalloc_lock); 1284 spin_lock(&root->fs_info->delalloc_lock);
1328 BTRFS_I(inode)->delalloc_bytes += end - start + 1; 1285 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
1329 root->fs_info->delalloc_bytes += end - start + 1; 1286 root->fs_info->delalloc_bytes += end - start + 1;
@@ -1352,6 +1309,7 @@ static int btrfs_clear_bit_hook(struct inode *inode,
1352 1309
1353 if (bits & EXTENT_DO_ACCOUNTING) { 1310 if (bits & EXTENT_DO_ACCOUNTING) {
1354 spin_lock(&BTRFS_I(inode)->accounting_lock); 1311 spin_lock(&BTRFS_I(inode)->accounting_lock);
1312 WARN_ON(!BTRFS_I(inode)->outstanding_extents);
1355 BTRFS_I(inode)->outstanding_extents--; 1313 BTRFS_I(inode)->outstanding_extents--;
1356 spin_unlock(&BTRFS_I(inode)->accounting_lock); 1314 spin_unlock(&BTRFS_I(inode)->accounting_lock);
1357 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 1315 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
@@ -1508,12 +1466,13 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
1508 return 0; 1466 return 0;
1509} 1467}
1510 1468
1511int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end) 1469int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
1470 struct extent_state **cached_state)
1512{ 1471{
1513 if ((end & (PAGE_CACHE_SIZE - 1)) == 0) 1472 if ((end & (PAGE_CACHE_SIZE - 1)) == 0)
1514 WARN_ON(1); 1473 WARN_ON(1);
1515 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end, 1474 return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
1516 GFP_NOFS); 1475 cached_state, GFP_NOFS);
1517} 1476}
1518 1477
1519/* see btrfs_writepage_start_hook for details on why this is required */ 1478/* see btrfs_writepage_start_hook for details on why this is required */
@@ -1526,6 +1485,7 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
1526{ 1485{
1527 struct btrfs_writepage_fixup *fixup; 1486 struct btrfs_writepage_fixup *fixup;
1528 struct btrfs_ordered_extent *ordered; 1487 struct btrfs_ordered_extent *ordered;
1488 struct extent_state *cached_state = NULL;
1529 struct page *page; 1489 struct page *page;
1530 struct inode *inode; 1490 struct inode *inode;
1531 u64 page_start; 1491 u64 page_start;
@@ -1544,7 +1504,8 @@ again:
1544 page_start = page_offset(page); 1504 page_start = page_offset(page);
1545 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1; 1505 page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
1546 1506
1547 lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1507 lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 0,
1508 &cached_state, GFP_NOFS);
1548 1509
1549 /* already ordered? We're done */ 1510 /* already ordered? We're done */
1550 if (PagePrivate2(page)) 1511 if (PagePrivate2(page))
@@ -1552,17 +1513,18 @@ again:
1552 1513
1553 ordered = btrfs_lookup_ordered_extent(inode, page_start); 1514 ordered = btrfs_lookup_ordered_extent(inode, page_start);
1554 if (ordered) { 1515 if (ordered) {
1555 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, 1516 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
1556 page_end, GFP_NOFS); 1517 page_end, &cached_state, GFP_NOFS);
1557 unlock_page(page); 1518 unlock_page(page);
1558 btrfs_start_ordered_extent(inode, ordered, 1); 1519 btrfs_start_ordered_extent(inode, ordered, 1);
1559 goto again; 1520 goto again;
1560 } 1521 }
1561 1522
1562 btrfs_set_extent_delalloc(inode, page_start, page_end); 1523 btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state);
1563 ClearPageChecked(page); 1524 ClearPageChecked(page);
1564out: 1525out:
1565 unlock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS); 1526 unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start, page_end,
1527 &cached_state, GFP_NOFS);
1566out_page: 1528out_page:
1567 unlock_page(page); 1529 unlock_page(page);
1568 page_cache_release(page); 1530 page_cache_release(page);
@@ -1691,14 +1653,14 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1691 struct btrfs_trans_handle *trans; 1653 struct btrfs_trans_handle *trans;
1692 struct btrfs_ordered_extent *ordered_extent = NULL; 1654 struct btrfs_ordered_extent *ordered_extent = NULL;
1693 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1655 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1656 struct extent_state *cached_state = NULL;
1694 int compressed = 0; 1657 int compressed = 0;
1695 int ret; 1658 int ret;
1696 1659
1697 ret = btrfs_dec_test_ordered_pending(inode, start, end - start + 1); 1660 ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
1661 end - start + 1);
1698 if (!ret) 1662 if (!ret)
1699 return 0; 1663 return 0;
1700
1701 ordered_extent = btrfs_lookup_ordered_extent(inode, start);
1702 BUG_ON(!ordered_extent); 1664 BUG_ON(!ordered_extent);
1703 1665
1704 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { 1666 if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
@@ -1713,9 +1675,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1713 goto out; 1675 goto out;
1714 } 1676 }
1715 1677
1716 lock_extent(io_tree, ordered_extent->file_offset, 1678 lock_extent_bits(io_tree, ordered_extent->file_offset,
1717 ordered_extent->file_offset + ordered_extent->len - 1, 1679 ordered_extent->file_offset + ordered_extent->len - 1,
1718 GFP_NOFS); 1680 0, &cached_state, GFP_NOFS);
1719 1681
1720 trans = btrfs_join_transaction(root, 1); 1682 trans = btrfs_join_transaction(root, 1);
1721 1683
@@ -1742,9 +1704,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
1742 ordered_extent->len); 1704 ordered_extent->len);
1743 BUG_ON(ret); 1705 BUG_ON(ret);
1744 } 1706 }
1745 unlock_extent(io_tree, ordered_extent->file_offset, 1707 unlock_extent_cached(io_tree, ordered_extent->file_offset,
1746 ordered_extent->file_offset + ordered_extent->len - 1, 1708 ordered_extent->file_offset +
1747 GFP_NOFS); 1709 ordered_extent->len - 1, &cached_state, GFP_NOFS);
1710
1748 add_pending_csums(trans, inode, ordered_extent->file_offset, 1711 add_pending_csums(trans, inode, ordered_extent->file_offset,
1749 &ordered_extent->list); 1712 &ordered_extent->list);
1750 1713
@@ -2153,7 +2116,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
2153 found_key.objectid = found_key.offset; 2116 found_key.objectid = found_key.offset;
2154 found_key.type = BTRFS_INODE_ITEM_KEY; 2117 found_key.type = BTRFS_INODE_ITEM_KEY;
2155 found_key.offset = 0; 2118 found_key.offset = 0;
2156 inode = btrfs_iget(root->fs_info->sb, &found_key, root); 2119 inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL);
2157 if (IS_ERR(inode)) 2120 if (IS_ERR(inode))
2158 break; 2121 break;
2159 2122
@@ -3081,6 +3044,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
3081 struct btrfs_root *root = BTRFS_I(inode)->root; 3044 struct btrfs_root *root = BTRFS_I(inode)->root;
3082 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3045 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3083 struct btrfs_ordered_extent *ordered; 3046 struct btrfs_ordered_extent *ordered;
3047 struct extent_state *cached_state = NULL;
3084 char *kaddr; 3048 char *kaddr;
3085 u32 blocksize = root->sectorsize; 3049 u32 blocksize = root->sectorsize;
3086 pgoff_t index = from >> PAGE_CACHE_SHIFT; 3050 pgoff_t index = from >> PAGE_CACHE_SHIFT;
@@ -3127,12 +3091,14 @@ again:
3127 } 3091 }
3128 wait_on_page_writeback(page); 3092 wait_on_page_writeback(page);
3129 3093
3130 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 3094 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
3095 GFP_NOFS);
3131 set_page_extent_mapped(page); 3096 set_page_extent_mapped(page);
3132 3097
3133 ordered = btrfs_lookup_ordered_extent(inode, page_start); 3098 ordered = btrfs_lookup_ordered_extent(inode, page_start);
3134 if (ordered) { 3099 if (ordered) {
3135 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3100 unlock_extent_cached(io_tree, page_start, page_end,
3101 &cached_state, GFP_NOFS);
3136 unlock_page(page); 3102 unlock_page(page);
3137 page_cache_release(page); 3103 page_cache_release(page);
3138 btrfs_start_ordered_extent(inode, ordered, 1); 3104 btrfs_start_ordered_extent(inode, ordered, 1);
@@ -3140,13 +3106,15 @@ again:
3140 goto again; 3106 goto again;
3141 } 3107 }
3142 3108
3143 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 3109 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
3144 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 3110 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
3145 GFP_NOFS); 3111 0, 0, &cached_state, GFP_NOFS);
3146 3112
3147 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 3113 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
3114 &cached_state);
3148 if (ret) { 3115 if (ret) {
3149 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3116 unlock_extent_cached(io_tree, page_start, page_end,
3117 &cached_state, GFP_NOFS);
3150 goto out_unlock; 3118 goto out_unlock;
3151 } 3119 }
3152 3120
@@ -3159,7 +3127,8 @@ again:
3159 } 3127 }
3160 ClearPageChecked(page); 3128 ClearPageChecked(page);
3161 set_page_dirty(page); 3129 set_page_dirty(page);
3162 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 3130 unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
3131 GFP_NOFS);
3163 3132
3164out_unlock: 3133out_unlock:
3165 if (ret) 3134 if (ret)
@@ -3177,6 +3146,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3177 struct btrfs_root *root = BTRFS_I(inode)->root; 3146 struct btrfs_root *root = BTRFS_I(inode)->root;
3178 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 3147 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
3179 struct extent_map *em; 3148 struct extent_map *em;
3149 struct extent_state *cached_state = NULL;
3180 u64 mask = root->sectorsize - 1; 3150 u64 mask = root->sectorsize - 1;
3181 u64 hole_start = (inode->i_size + mask) & ~mask; 3151 u64 hole_start = (inode->i_size + mask) & ~mask;
3182 u64 block_end = (size + mask) & ~mask; 3152 u64 block_end = (size + mask) & ~mask;
@@ -3192,11 +3162,13 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3192 struct btrfs_ordered_extent *ordered; 3162 struct btrfs_ordered_extent *ordered;
3193 btrfs_wait_ordered_range(inode, hole_start, 3163 btrfs_wait_ordered_range(inode, hole_start,
3194 block_end - hole_start); 3164 block_end - hole_start);
3195 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3165 lock_extent_bits(io_tree, hole_start, block_end - 1, 0,
3166 &cached_state, GFP_NOFS);
3196 ordered = btrfs_lookup_ordered_extent(inode, hole_start); 3167 ordered = btrfs_lookup_ordered_extent(inode, hole_start);
3197 if (!ordered) 3168 if (!ordered)
3198 break; 3169 break;
3199 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3170 unlock_extent_cached(io_tree, hole_start, block_end - 1,
3171 &cached_state, GFP_NOFS);
3200 btrfs_put_ordered_extent(ordered); 3172 btrfs_put_ordered_extent(ordered);
3201 } 3173 }
3202 3174
@@ -3241,7 +3213,8 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
3241 break; 3213 break;
3242 } 3214 }
3243 3215
3244 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS); 3216 unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,
3217 GFP_NOFS);
3245 return err; 3218 return err;
3246} 3219}
3247 3220
@@ -3639,6 +3612,7 @@ static noinline void init_btrfs_i(struct inode *inode)
3639 bi->index_cnt = (u64)-1; 3612 bi->index_cnt = (u64)-1;
3640 bi->last_unlink_trans = 0; 3613 bi->last_unlink_trans = 0;
3641 bi->ordered_data_close = 0; 3614 bi->ordered_data_close = 0;
3615 bi->force_compress = 0;
3642 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); 3616 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3643 extent_io_tree_init(&BTRFS_I(inode)->io_tree, 3617 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3644 inode->i_mapping, GFP_NOFS); 3618 inode->i_mapping, GFP_NOFS);
@@ -3687,7 +3661,7 @@ static struct inode *btrfs_iget_locked(struct super_block *s,
3687 * Returns in *is_new if the inode was read from disk 3661 * Returns in *is_new if the inode was read from disk
3688 */ 3662 */
3689struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, 3663struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3690 struct btrfs_root *root) 3664 struct btrfs_root *root, int *new)
3691{ 3665{
3692 struct inode *inode; 3666 struct inode *inode;
3693 3667
@@ -3702,6 +3676,8 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
3702 3676
3703 inode_tree_add(inode); 3677 inode_tree_add(inode);
3704 unlock_new_inode(inode); 3678 unlock_new_inode(inode);
3679 if (new)
3680 *new = 1;
3705 } 3681 }
3706 3682
3707 return inode; 3683 return inode;
@@ -3754,7 +3730,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3754 return NULL; 3730 return NULL;
3755 3731
3756 if (location.type == BTRFS_INODE_ITEM_KEY) { 3732 if (location.type == BTRFS_INODE_ITEM_KEY) {
3757 inode = btrfs_iget(dir->i_sb, &location, root); 3733 inode = btrfs_iget(dir->i_sb, &location, root, NULL);
3758 return inode; 3734 return inode;
3759 } 3735 }
3760 3736
@@ -3769,7 +3745,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
3769 else 3745 else
3770 inode = new_simple_dir(dir->i_sb, &location, sub_root); 3746 inode = new_simple_dir(dir->i_sb, &location, sub_root);
3771 } else { 3747 } else {
3772 inode = btrfs_iget(dir->i_sb, &location, sub_root); 3748 inode = btrfs_iget(dir->i_sb, &location, sub_root, NULL);
3773 } 3749 }
3774 srcu_read_unlock(&root->fs_info->subvol_srcu, index); 3750 srcu_read_unlock(&root->fs_info->subvol_srcu, index);
3775 3751
@@ -4501,7 +4477,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
4501 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); 4477 err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
4502 if (err) { 4478 if (err) {
4503 err = -ENOSPC; 4479 err = -ENOSPC;
4504 goto out_unlock; 4480 goto out_fail;
4505 } 4481 }
4506 4482
4507 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, 4483 inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
@@ -4979,6 +4955,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4979{ 4955{
4980 struct extent_io_tree *tree; 4956 struct extent_io_tree *tree;
4981 struct btrfs_ordered_extent *ordered; 4957 struct btrfs_ordered_extent *ordered;
4958 struct extent_state *cached_state = NULL;
4982 u64 page_start = page_offset(page); 4959 u64 page_start = page_offset(page);
4983 u64 page_end = page_start + PAGE_CACHE_SIZE - 1; 4960 u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
4984 4961
@@ -4997,7 +4974,8 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
4997 btrfs_releasepage(page, GFP_NOFS); 4974 btrfs_releasepage(page, GFP_NOFS);
4998 return; 4975 return;
4999 } 4976 }
5000 lock_extent(tree, page_start, page_end, GFP_NOFS); 4977 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
4978 GFP_NOFS);
5001 ordered = btrfs_lookup_ordered_extent(page->mapping->host, 4979 ordered = btrfs_lookup_ordered_extent(page->mapping->host,
5002 page_offset(page)); 4980 page_offset(page));
5003 if (ordered) { 4981 if (ordered) {
@@ -5008,7 +4986,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5008 clear_extent_bit(tree, page_start, page_end, 4986 clear_extent_bit(tree, page_start, page_end,
5009 EXTENT_DIRTY | EXTENT_DELALLOC | 4987 EXTENT_DIRTY | EXTENT_DELALLOC |
5010 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0, 4988 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING, 1, 0,
5011 NULL, GFP_NOFS); 4989 &cached_state, GFP_NOFS);
5012 /* 4990 /*
5013 * whoever cleared the private bit is responsible 4991 * whoever cleared the private bit is responsible
5014 * for the finish_ordered_io 4992 * for the finish_ordered_io
@@ -5018,11 +4996,13 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)
5018 page_start, page_end); 4996 page_start, page_end);
5019 } 4997 }
5020 btrfs_put_ordered_extent(ordered); 4998 btrfs_put_ordered_extent(ordered);
5021 lock_extent(tree, page_start, page_end, GFP_NOFS); 4999 cached_state = NULL;
5000 lock_extent_bits(tree, page_start, page_end, 0, &cached_state,
5001 GFP_NOFS);
5022 } 5002 }
5023 clear_extent_bit(tree, page_start, page_end, 5003 clear_extent_bit(tree, page_start, page_end,
5024 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | 5004 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC |
5025 EXTENT_DO_ACCOUNTING, 1, 1, NULL, GFP_NOFS); 5005 EXTENT_DO_ACCOUNTING, 1, 1, &cached_state, GFP_NOFS);
5026 __btrfs_releasepage(page, GFP_NOFS); 5006 __btrfs_releasepage(page, GFP_NOFS);
5027 5007
5028 ClearPageChecked(page); 5008 ClearPageChecked(page);
@@ -5055,6 +5035,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
5055 struct btrfs_root *root = BTRFS_I(inode)->root; 5035 struct btrfs_root *root = BTRFS_I(inode)->root;
5056 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5036 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
5057 struct btrfs_ordered_extent *ordered; 5037 struct btrfs_ordered_extent *ordered;
5038 struct extent_state *cached_state = NULL;
5058 char *kaddr; 5039 char *kaddr;
5059 unsigned long zero_start; 5040 unsigned long zero_start;
5060 loff_t size; 5041 loff_t size;
@@ -5093,7 +5074,8 @@ again:
5093 } 5074 }
5094 wait_on_page_writeback(page); 5075 wait_on_page_writeback(page);
5095 5076
5096 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5077 lock_extent_bits(io_tree, page_start, page_end, 0, &cached_state,
5078 GFP_NOFS);
5097 set_page_extent_mapped(page); 5079 set_page_extent_mapped(page);
5098 5080
5099 /* 5081 /*
@@ -5102,7 +5084,8 @@ again:
5102 */ 5084 */
5103 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5085 ordered = btrfs_lookup_ordered_extent(inode, page_start);
5104 if (ordered) { 5086 if (ordered) {
5105 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5087 unlock_extent_cached(io_tree, page_start, page_end,
5088 &cached_state, GFP_NOFS);
5106 unlock_page(page); 5089 unlock_page(page);
5107 btrfs_start_ordered_extent(inode, ordered, 1); 5090 btrfs_start_ordered_extent(inode, ordered, 1);
5108 btrfs_put_ordered_extent(ordered); 5091 btrfs_put_ordered_extent(ordered);
@@ -5116,13 +5099,15 @@ again:
5116 * is probably a better way to do this, but for now keep consistent with 5099 * is probably a better way to do this, but for now keep consistent with
5117 * prepare_pages in the normal write path. 5100 * prepare_pages in the normal write path.
5118 */ 5101 */
5119 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end, 5102 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
5120 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 5103 EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING,
5121 GFP_NOFS); 5104 0, 0, &cached_state, GFP_NOFS);
5122 5105
5123 ret = btrfs_set_extent_delalloc(inode, page_start, page_end); 5106 ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
5107 &cached_state);
5124 if (ret) { 5108 if (ret) {
5125 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5109 unlock_extent_cached(io_tree, page_start, page_end,
5110 &cached_state, GFP_NOFS);
5126 ret = VM_FAULT_SIGBUS; 5111 ret = VM_FAULT_SIGBUS;
5127 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); 5112 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
5128 goto out_unlock; 5113 goto out_unlock;
@@ -5148,7 +5133,7 @@ again:
5148 BTRFS_I(inode)->last_trans = root->fs_info->generation; 5133 BTRFS_I(inode)->last_trans = root->fs_info->generation;
5149 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid; 5134 BTRFS_I(inode)->last_sub_trans = BTRFS_I(inode)->root->log_transid;
5150 5135
5151 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5136 unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
5152 5137
5153out_unlock: 5138out_unlock:
5154 btrfs_unreserve_metadata_for_delalloc(root, inode, 1); 5139 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
@@ -5353,7 +5338,6 @@ free:
5353void btrfs_drop_inode(struct inode *inode) 5338void btrfs_drop_inode(struct inode *inode)
5354{ 5339{
5355 struct btrfs_root *root = BTRFS_I(inode)->root; 5340 struct btrfs_root *root = BTRFS_I(inode)->root;
5356
5357 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) 5341 if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0)
5358 generic_delete_inode(inode); 5342 generic_delete_inode(inode);
5359 else 5343 else
@@ -5757,18 +5741,15 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
5757 struct btrfs_trans_handle *trans; 5741 struct btrfs_trans_handle *trans;
5758 struct btrfs_root *root = BTRFS_I(inode)->root; 5742 struct btrfs_root *root = BTRFS_I(inode)->root;
5759 struct btrfs_key ins; 5743 struct btrfs_key ins;
5760 u64 alloc_size;
5761 u64 cur_offset = start; 5744 u64 cur_offset = start;
5762 u64 num_bytes = end - start; 5745 u64 num_bytes = end - start;
5763 int ret = 0; 5746 int ret = 0;
5764 u64 i_size; 5747 u64 i_size;
5765 5748
5766 while (num_bytes > 0) { 5749 while (num_bytes > 0) {
5767 alloc_size = min(num_bytes, root->fs_info->max_extent);
5768
5769 trans = btrfs_start_transaction(root, 1); 5750 trans = btrfs_start_transaction(root, 1);
5770 5751
5771 ret = btrfs_reserve_extent(trans, root, alloc_size, 5752 ret = btrfs_reserve_extent(trans, root, num_bytes,
5772 root->sectorsize, 0, alloc_hint, 5753 root->sectorsize, 0, alloc_hint,
5773 (u64)-1, &ins, 1); 5754 (u64)-1, &ins, 1);
5774 if (ret) { 5755 if (ret) {
@@ -5827,6 +5808,7 @@ stop_trans:
5827static long btrfs_fallocate(struct inode *inode, int mode, 5808static long btrfs_fallocate(struct inode *inode, int mode,
5828 loff_t offset, loff_t len) 5809 loff_t offset, loff_t len)
5829{ 5810{
5811 struct extent_state *cached_state = NULL;
5830 u64 cur_offset; 5812 u64 cur_offset;
5831 u64 last_byte; 5813 u64 last_byte;
5832 u64 alloc_start; 5814 u64 alloc_start;
@@ -5865,16 +5847,17 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5865 /* the extent lock is ordered inside the running 5847 /* the extent lock is ordered inside the running
5866 * transaction 5848 * transaction
5867 */ 5849 */
5868 lock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5850 lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
5869 GFP_NOFS); 5851 locked_end, 0, &cached_state, GFP_NOFS);
5870 ordered = btrfs_lookup_first_ordered_extent(inode, 5852 ordered = btrfs_lookup_first_ordered_extent(inode,
5871 alloc_end - 1); 5853 alloc_end - 1);
5872 if (ordered && 5854 if (ordered &&
5873 ordered->file_offset + ordered->len > alloc_start && 5855 ordered->file_offset + ordered->len > alloc_start &&
5874 ordered->file_offset < alloc_end) { 5856 ordered->file_offset < alloc_end) {
5875 btrfs_put_ordered_extent(ordered); 5857 btrfs_put_ordered_extent(ordered);
5876 unlock_extent(&BTRFS_I(inode)->io_tree, 5858 unlock_extent_cached(&BTRFS_I(inode)->io_tree,
5877 alloc_start, locked_end, GFP_NOFS); 5859 alloc_start, locked_end,
5860 &cached_state, GFP_NOFS);
5878 /* 5861 /*
5879 * we can't wait on the range with the transaction 5862 * we can't wait on the range with the transaction
5880 * running or with the extent lock held 5863 * running or with the extent lock held
@@ -5916,8 +5899,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
5916 break; 5899 break;
5917 } 5900 }
5918 } 5901 }
5919 unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, 5902 unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
5920 GFP_NOFS); 5903 &cached_state, GFP_NOFS);
5921 5904
5922 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, 5905 btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
5923 alloc_end - alloc_start); 5906 alloc_end - alloc_start);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 645a17927a8f..e84ef60ffe35 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -39,6 +39,7 @@
39#include <linux/security.h> 39#include <linux/security.h>
40#include <linux/xattr.h> 40#include <linux/xattr.h>
41#include <linux/vmalloc.h> 41#include <linux/vmalloc.h>
42#include <linux/slab.h>
42#include "compat.h" 43#include "compat.h"
43#include "ctree.h" 44#include "ctree.h"
44#include "disk-io.h" 45#include "disk-io.h"
@@ -474,7 +475,79 @@ out_unlock:
474 return error; 475 return error;
475} 476}
476 477
477static int btrfs_defrag_file(struct file *file) 478static int should_defrag_range(struct inode *inode, u64 start, u64 len,
479 int thresh, u64 *last_len, u64 *skip,
480 u64 *defrag_end)
481{
482 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
483 struct extent_map *em = NULL;
484 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
485 int ret = 1;
486
487
488 if (thresh == 0)
489 thresh = 256 * 1024;
490
491 /*
492 * make sure that once we start defragging and extent, we keep on
493 * defragging it
494 */
495 if (start < *defrag_end)
496 return 1;
497
498 *skip = 0;
499
500 /*
501 * hopefully we have this extent in the tree already, try without
502 * the full extent lock
503 */
504 read_lock(&em_tree->lock);
505 em = lookup_extent_mapping(em_tree, start, len);
506 read_unlock(&em_tree->lock);
507
508 if (!em) {
509 /* get the big lock and read metadata off disk */
510 lock_extent(io_tree, start, start + len - 1, GFP_NOFS);
511 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
512 unlock_extent(io_tree, start, start + len - 1, GFP_NOFS);
513
514 if (IS_ERR(em))
515 return 0;
516 }
517
518 /* this will cover holes, and inline extents */
519 if (em->block_start >= EXTENT_MAP_LAST_BYTE)
520 ret = 0;
521
522 /*
523 * we hit a real extent, if it is big don't bother defragging it again
524 */
525 if ((*last_len == 0 || *last_len >= thresh) && em->len >= thresh)
526 ret = 0;
527
528 /*
529 * last_len ends up being a counter of how many bytes we've defragged.
530 * every time we choose not to defrag an extent, we reset *last_len
531 * so that the next tiny extent will force a defrag.
532 *
533 * The end result of this is that tiny extents before a single big
534 * extent will force at least part of that big extent to be defragged.
535 */
536 if (ret) {
537 *last_len += len;
538 *defrag_end = extent_map_end(em);
539 } else {
540 *last_len = 0;
541 *skip = extent_map_end(em);
542 *defrag_end = 0;
543 }
544
545 free_extent_map(em);
546 return ret;
547}
548
549static int btrfs_defrag_file(struct file *file,
550 struct btrfs_ioctl_defrag_range_args *range)
478{ 551{
479 struct inode *inode = fdentry(file)->d_inode; 552 struct inode *inode = fdentry(file)->d_inode;
480 struct btrfs_root *root = BTRFS_I(inode)->root; 553 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -486,37 +559,96 @@ static int btrfs_defrag_file(struct file *file)
486 unsigned long total_read = 0; 559 unsigned long total_read = 0;
487 u64 page_start; 560 u64 page_start;
488 u64 page_end; 561 u64 page_end;
562 u64 last_len = 0;
563 u64 skip = 0;
564 u64 defrag_end = 0;
489 unsigned long i; 565 unsigned long i;
490 int ret; 566 int ret;
491 567
492 ret = btrfs_check_data_free_space(root, inode, inode->i_size); 568 if (inode->i_size == 0)
493 if (ret) 569 return 0;
494 return -ENOSPC; 570
571 if (range->start + range->len > range->start) {
572 last_index = min_t(u64, inode->i_size - 1,
573 range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
574 } else {
575 last_index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
576 }
577
578 i = range->start >> PAGE_CACHE_SHIFT;
579 while (i <= last_index) {
580 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
581 PAGE_CACHE_SIZE,
582 range->extent_thresh,
583 &last_len, &skip,
584 &defrag_end)) {
585 unsigned long next;
586 /*
587 * the should_defrag function tells us how much to skip
588 * bump our counter by the suggested amount
589 */
590 next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
591 i = max(i + 1, next);
592 continue;
593 }
495 594
496 mutex_lock(&inode->i_mutex);
497 last_index = inode->i_size >> PAGE_CACHE_SHIFT;
498 for (i = 0; i <= last_index; i++) {
499 if (total_read % ra_pages == 0) { 595 if (total_read % ra_pages == 0) {
500 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i, 596 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
501 min(last_index, i + ra_pages - 1)); 597 min(last_index, i + ra_pages - 1));
502 } 598 }
503 total_read++; 599 total_read++;
600 mutex_lock(&inode->i_mutex);
601 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
602 BTRFS_I(inode)->force_compress = 1;
603
604 ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE);
605 if (ret) {
606 ret = -ENOSPC;
607 break;
608 }
609
610 ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
611 if (ret) {
612 btrfs_free_reserved_data_space(root, inode,
613 PAGE_CACHE_SIZE);
614 ret = -ENOSPC;
615 break;
616 }
504again: 617again:
618 if (inode->i_size == 0 ||
619 i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) {
620 ret = 0;
621 goto err_reservations;
622 }
623
505 page = grab_cache_page(inode->i_mapping, i); 624 page = grab_cache_page(inode->i_mapping, i);
506 if (!page) 625 if (!page)
507 goto out_unlock; 626 goto err_reservations;
627
508 if (!PageUptodate(page)) { 628 if (!PageUptodate(page)) {
509 btrfs_readpage(NULL, page); 629 btrfs_readpage(NULL, page);
510 lock_page(page); 630 lock_page(page);
511 if (!PageUptodate(page)) { 631 if (!PageUptodate(page)) {
512 unlock_page(page); 632 unlock_page(page);
513 page_cache_release(page); 633 page_cache_release(page);
514 goto out_unlock; 634 goto err_reservations;
515 } 635 }
516 } 636 }
517 637
638 if (page->mapping != inode->i_mapping) {
639 unlock_page(page);
640 page_cache_release(page);
641 goto again;
642 }
643
518 wait_on_page_writeback(page); 644 wait_on_page_writeback(page);
519 645
646 if (PageDirty(page)) {
647 btrfs_free_reserved_data_space(root, inode,
648 PAGE_CACHE_SIZE);
649 goto loop_unlock;
650 }
651
520 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 652 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
521 page_end = page_start + PAGE_CACHE_SIZE - 1; 653 page_end = page_start + PAGE_CACHE_SIZE - 1;
522 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 654 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
@@ -537,18 +669,54 @@ again:
537 * page if it is dirtied again later 669 * page if it is dirtied again later
538 */ 670 */
539 clear_page_dirty_for_io(page); 671 clear_page_dirty_for_io(page);
672 clear_extent_bits(&BTRFS_I(inode)->io_tree, page_start,
673 page_end, EXTENT_DIRTY | EXTENT_DELALLOC |
674 EXTENT_DO_ACCOUNTING, GFP_NOFS);
540 675
541 btrfs_set_extent_delalloc(inode, page_start, page_end); 676 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
677 ClearPageChecked(page);
542 set_page_dirty(page); 678 set_page_dirty(page);
543 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 679 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
680
681loop_unlock:
544 unlock_page(page); 682 unlock_page(page);
545 page_cache_release(page); 683 page_cache_release(page);
684 mutex_unlock(&inode->i_mutex);
685
686 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
546 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); 687 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
688 i++;
689 }
690
691 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
692 filemap_flush(inode->i_mapping);
693
694 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
695 /* the filemap_flush will queue IO into the worker threads, but
696 * we have to make sure the IO is actually started and that
697 * ordered extents get created before we return
698 */
699 atomic_inc(&root->fs_info->async_submit_draining);
700 while (atomic_read(&root->fs_info->nr_async_submits) ||
701 atomic_read(&root->fs_info->async_delalloc_pages)) {
702 wait_event(root->fs_info->async_submit_wait,
703 (atomic_read(&root->fs_info->nr_async_submits) == 0 &&
704 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
705 }
706 atomic_dec(&root->fs_info->async_submit_draining);
707
708 mutex_lock(&inode->i_mutex);
709 BTRFS_I(inode)->force_compress = 0;
710 mutex_unlock(&inode->i_mutex);
547 } 711 }
548 712
549out_unlock:
550 mutex_unlock(&inode->i_mutex);
551 return 0; 713 return 0;
714
715err_reservations:
716 mutex_unlock(&inode->i_mutex);
717 btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE);
718 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
719 return ret;
552} 720}
553 721
554static noinline int btrfs_ioctl_resize(struct btrfs_root *root, 722static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
@@ -608,7 +776,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
608 mod = 1; 776 mod = 1;
609 sizestr++; 777 sizestr++;
610 } 778 }
611 new_size = btrfs_parse_size(sizestr); 779 new_size = memparse(sizestr, NULL);
612 if (new_size == 0) { 780 if (new_size == 0) {
613 ret = -EINVAL; 781 ret = -EINVAL;
614 goto out_unlock; 782 goto out_unlock;
@@ -743,6 +911,330 @@ out:
743 return ret; 911 return ret;
744} 912}
745 913
914static noinline int key_in_sk(struct btrfs_key *key,
915 struct btrfs_ioctl_search_key *sk)
916{
917 struct btrfs_key test;
918 int ret;
919
920 test.objectid = sk->min_objectid;
921 test.type = sk->min_type;
922 test.offset = sk->min_offset;
923
924 ret = btrfs_comp_cpu_keys(key, &test);
925 if (ret < 0)
926 return 0;
927
928 test.objectid = sk->max_objectid;
929 test.type = sk->max_type;
930 test.offset = sk->max_offset;
931
932 ret = btrfs_comp_cpu_keys(key, &test);
933 if (ret > 0)
934 return 0;
935 return 1;
936}
937
938static noinline int copy_to_sk(struct btrfs_root *root,
939 struct btrfs_path *path,
940 struct btrfs_key *key,
941 struct btrfs_ioctl_search_key *sk,
942 char *buf,
943 unsigned long *sk_offset,
944 int *num_found)
945{
946 u64 found_transid;
947 struct extent_buffer *leaf;
948 struct btrfs_ioctl_search_header sh;
949 unsigned long item_off;
950 unsigned long item_len;
951 int nritems;
952 int i;
953 int slot;
954 int found = 0;
955 int ret = 0;
956
957 leaf = path->nodes[0];
958 slot = path->slots[0];
959 nritems = btrfs_header_nritems(leaf);
960
961 if (btrfs_header_generation(leaf) > sk->max_transid) {
962 i = nritems;
963 goto advance_key;
964 }
965 found_transid = btrfs_header_generation(leaf);
966
967 for (i = slot; i < nritems; i++) {
968 item_off = btrfs_item_ptr_offset(leaf, i);
969 item_len = btrfs_item_size_nr(leaf, i);
970
971 if (item_len > BTRFS_SEARCH_ARGS_BUFSIZE)
972 item_len = 0;
973
974 if (sizeof(sh) + item_len + *sk_offset >
975 BTRFS_SEARCH_ARGS_BUFSIZE) {
976 ret = 1;
977 goto overflow;
978 }
979
980 btrfs_item_key_to_cpu(leaf, key, i);
981 if (!key_in_sk(key, sk))
982 continue;
983
984 sh.objectid = key->objectid;
985 sh.offset = key->offset;
986 sh.type = key->type;
987 sh.len = item_len;
988 sh.transid = found_transid;
989
990 /* copy search result header */
991 memcpy(buf + *sk_offset, &sh, sizeof(sh));
992 *sk_offset += sizeof(sh);
993
994 if (item_len) {
995 char *p = buf + *sk_offset;
996 /* copy the item */
997 read_extent_buffer(leaf, p,
998 item_off, item_len);
999 *sk_offset += item_len;
1000 }
1001 found++;
1002
1003 if (*num_found >= sk->nr_items)
1004 break;
1005 }
1006advance_key:
1007 ret = 0;
1008 if (key->offset < (u64)-1 && key->offset < sk->max_offset)
1009 key->offset++;
1010 else if (key->type < (u8)-1 && key->type < sk->max_type) {
1011 key->offset = 0;
1012 key->type++;
1013 } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) {
1014 key->offset = 0;
1015 key->type = 0;
1016 key->objectid++;
1017 } else
1018 ret = 1;
1019overflow:
1020 *num_found += found;
1021 return ret;
1022}
1023
1024static noinline int search_ioctl(struct inode *inode,
1025 struct btrfs_ioctl_search_args *args)
1026{
1027 struct btrfs_root *root;
1028 struct btrfs_key key;
1029 struct btrfs_key max_key;
1030 struct btrfs_path *path;
1031 struct btrfs_ioctl_search_key *sk = &args->key;
1032 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;
1033 int ret;
1034 int num_found = 0;
1035 unsigned long sk_offset = 0;
1036
1037 path = btrfs_alloc_path();
1038 if (!path)
1039 return -ENOMEM;
1040
1041 if (sk->tree_id == 0) {
1042 /* search the root of the inode that was passed */
1043 root = BTRFS_I(inode)->root;
1044 } else {
1045 key.objectid = sk->tree_id;
1046 key.type = BTRFS_ROOT_ITEM_KEY;
1047 key.offset = (u64)-1;
1048 root = btrfs_read_fs_root_no_name(info, &key);
1049 if (IS_ERR(root)) {
1050 printk(KERN_ERR "could not find root %llu\n",
1051 sk->tree_id);
1052 btrfs_free_path(path);
1053 return -ENOENT;
1054 }
1055 }
1056
1057 key.objectid = sk->min_objectid;
1058 key.type = sk->min_type;
1059 key.offset = sk->min_offset;
1060
1061 max_key.objectid = sk->max_objectid;
1062 max_key.type = sk->max_type;
1063 max_key.offset = sk->max_offset;
1064
1065 path->keep_locks = 1;
1066
1067 while(1) {
1068 ret = btrfs_search_forward(root, &key, &max_key, path, 0,
1069 sk->min_transid);
1070 if (ret != 0) {
1071 if (ret > 0)
1072 ret = 0;
1073 goto err;
1074 }
1075 ret = copy_to_sk(root, path, &key, sk, args->buf,
1076 &sk_offset, &num_found);
1077 btrfs_release_path(root, path);
1078 if (ret || num_found >= sk->nr_items)
1079 break;
1080
1081 }
1082 ret = 0;
1083err:
1084 sk->nr_items = num_found;
1085 btrfs_free_path(path);
1086 return ret;
1087}
1088
1089static noinline int btrfs_ioctl_tree_search(struct file *file,
1090 void __user *argp)
1091{
1092 struct btrfs_ioctl_search_args *args;
1093 struct inode *inode;
1094 int ret;
1095
1096 if (!capable(CAP_SYS_ADMIN))
1097 return -EPERM;
1098
1099 args = kmalloc(sizeof(*args), GFP_KERNEL);
1100 if (!args)
1101 return -ENOMEM;
1102
1103 if (copy_from_user(args, argp, sizeof(*args))) {
1104 kfree(args);
1105 return -EFAULT;
1106 }
1107 inode = fdentry(file)->d_inode;
1108 ret = search_ioctl(inode, args);
1109 if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1110 ret = -EFAULT;
1111 kfree(args);
1112 return ret;
1113}
1114
1115/*
1116 * Search INODE_REFs to identify path name of 'dirid' directory
1117 * in a 'tree_id' tree. and sets path name to 'name'.
1118 */
1119static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
1120 u64 tree_id, u64 dirid, char *name)
1121{
1122 struct btrfs_root *root;
1123 struct btrfs_key key;
1124 char *ptr;
1125 int ret = -1;
1126 int slot;
1127 int len;
1128 int total_len = 0;
1129 struct btrfs_inode_ref *iref;
1130 struct extent_buffer *l;
1131 struct btrfs_path *path;
1132
1133 if (dirid == BTRFS_FIRST_FREE_OBJECTID) {
1134 name[0]='\0';
1135 return 0;
1136 }
1137
1138 path = btrfs_alloc_path();
1139 if (!path)
1140 return -ENOMEM;
1141
1142 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX];
1143
1144 key.objectid = tree_id;
1145 key.type = BTRFS_ROOT_ITEM_KEY;
1146 key.offset = (u64)-1;
1147 root = btrfs_read_fs_root_no_name(info, &key);
1148 if (IS_ERR(root)) {
1149 printk(KERN_ERR "could not find root %llu\n", tree_id);
1150 ret = -ENOENT;
1151 goto out;
1152 }
1153
1154 key.objectid = dirid;
1155 key.type = BTRFS_INODE_REF_KEY;
1156 key.offset = (u64)-1;
1157
1158 while(1) {
1159 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1160 if (ret < 0)
1161 goto out;
1162
1163 l = path->nodes[0];
1164 slot = path->slots[0];
1165 if (ret > 0 && slot > 0)
1166 slot--;
1167 btrfs_item_key_to_cpu(l, &key, slot);
1168
1169 if (ret > 0 && (key.objectid != dirid ||
1170 key.type != BTRFS_INODE_REF_KEY)) {
1171 ret = -ENOENT;
1172 goto out;
1173 }
1174
1175 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
1176 len = btrfs_inode_ref_name_len(l, iref);
1177 ptr -= len + 1;
1178 total_len += len + 1;
1179 if (ptr < name)
1180 goto out;
1181
1182 *(ptr + len) = '/';
1183 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
1184
1185 if (key.offset == BTRFS_FIRST_FREE_OBJECTID)
1186 break;
1187
1188 btrfs_release_path(root, path);
1189 key.objectid = key.offset;
1190 key.offset = (u64)-1;
1191 dirid = key.objectid;
1192
1193 }
1194 if (ptr < name)
1195 goto out;
1196 memcpy(name, ptr, total_len);
1197 name[total_len]='\0';
1198 ret = 0;
1199out:
1200 btrfs_free_path(path);
1201 return ret;
1202}
1203
1204static noinline int btrfs_ioctl_ino_lookup(struct file *file,
1205 void __user *argp)
1206{
1207 struct btrfs_ioctl_ino_lookup_args *args;
1208 struct inode *inode;
1209 int ret;
1210
1211 if (!capable(CAP_SYS_ADMIN))
1212 return -EPERM;
1213
1214 args = kmalloc(sizeof(*args), GFP_KERNEL);
1215 if (!args)
1216 return -ENOMEM;
1217
1218 if (copy_from_user(args, argp, sizeof(*args))) {
1219 kfree(args);
1220 return -EFAULT;
1221 }
1222 inode = fdentry(file)->d_inode;
1223
1224 if (args->treeid == 0)
1225 args->treeid = BTRFS_I(inode)->root->root_key.objectid;
1226
1227 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
1228 args->treeid, args->objectid,
1229 args->name);
1230
1231 if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
1232 ret = -EFAULT;
1233
1234 kfree(args);
1235 return ret;
1236}
1237
746static noinline int btrfs_ioctl_snap_destroy(struct file *file, 1238static noinline int btrfs_ioctl_snap_destroy(struct file *file,
747 void __user *arg) 1239 void __user *arg)
748{ 1240{
@@ -849,10 +1341,11 @@ out:
849 return err; 1341 return err;
850} 1342}
851 1343
852static int btrfs_ioctl_defrag(struct file *file) 1344static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
853{ 1345{
854 struct inode *inode = fdentry(file)->d_inode; 1346 struct inode *inode = fdentry(file)->d_inode;
855 struct btrfs_root *root = BTRFS_I(inode)->root; 1347 struct btrfs_root *root = BTRFS_I(inode)->root;
1348 struct btrfs_ioctl_defrag_range_args *range;
856 int ret; 1349 int ret;
857 1350
858 ret = mnt_want_write(file->f_path.mnt); 1351 ret = mnt_want_write(file->f_path.mnt);
@@ -873,7 +1366,31 @@ static int btrfs_ioctl_defrag(struct file *file)
873 ret = -EINVAL; 1366 ret = -EINVAL;
874 goto out; 1367 goto out;
875 } 1368 }
876 btrfs_defrag_file(file); 1369
1370 range = kzalloc(sizeof(*range), GFP_KERNEL);
1371 if (!range) {
1372 ret = -ENOMEM;
1373 goto out;
1374 }
1375
1376 if (argp) {
1377 if (copy_from_user(range, argp,
1378 sizeof(*range))) {
1379 ret = -EFAULT;
1380 kfree(range);
1381 goto out;
1382 }
1383 /* compression requires us to start the IO */
1384 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
1385 range->flags |= BTRFS_DEFRAG_RANGE_START_IO;
1386 range->extent_thresh = (u32)-1;
1387 }
1388 } else {
1389 /* the rest are all set to zero by kzalloc */
1390 range->len = (u64)-1;
1391 }
1392 btrfs_defrag_file(file, range);
1393 kfree(range);
877 break; 1394 break;
878 } 1395 }
879out: 1396out:
@@ -1274,6 +1791,157 @@ out:
1274 return ret; 1791 return ret;
1275} 1792}
1276 1793
1794static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
1795{
1796 struct inode *inode = fdentry(file)->d_inode;
1797 struct btrfs_root *root = BTRFS_I(inode)->root;
1798 struct btrfs_root *new_root;
1799 struct btrfs_dir_item *di;
1800 struct btrfs_trans_handle *trans;
1801 struct btrfs_path *path;
1802 struct btrfs_key location;
1803 struct btrfs_disk_key disk_key;
1804 struct btrfs_super_block *disk_super;
1805 u64 features;
1806 u64 objectid = 0;
1807 u64 dir_id;
1808
1809 if (!capable(CAP_SYS_ADMIN))
1810 return -EPERM;
1811
1812 if (copy_from_user(&objectid, argp, sizeof(objectid)))
1813 return -EFAULT;
1814
1815 if (!objectid)
1816 objectid = root->root_key.objectid;
1817
1818 location.objectid = objectid;
1819 location.type = BTRFS_ROOT_ITEM_KEY;
1820 location.offset = (u64)-1;
1821
1822 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
1823 if (IS_ERR(new_root))
1824 return PTR_ERR(new_root);
1825
1826 if (btrfs_root_refs(&new_root->root_item) == 0)
1827 return -ENOENT;
1828
1829 path = btrfs_alloc_path();
1830 if (!path)
1831 return -ENOMEM;
1832 path->leave_spinning = 1;
1833
1834 trans = btrfs_start_transaction(root, 1);
1835 if (!trans) {
1836 btrfs_free_path(path);
1837 return -ENOMEM;
1838 }
1839
1840 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
1841 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path,
1842 dir_id, "default", 7, 1);
1843 if (!di) {
1844 btrfs_free_path(path);
1845 btrfs_end_transaction(trans, root);
1846 printk(KERN_ERR "Umm, you don't have the default dir item, "
1847 "this isn't going to work\n");
1848 return -ENOENT;
1849 }
1850
1851 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);
1852 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key);
1853 btrfs_mark_buffer_dirty(path->nodes[0]);
1854 btrfs_free_path(path);
1855
1856 disk_super = &root->fs_info->super_copy;
1857 features = btrfs_super_incompat_flags(disk_super);
1858 if (!(features & BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)) {
1859 features |= BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL;
1860 btrfs_set_super_incompat_flags(disk_super, features);
1861 }
1862 btrfs_end_transaction(trans, root);
1863
1864 return 0;
1865}
1866
1867long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
1868{
1869 struct btrfs_ioctl_space_args space_args;
1870 struct btrfs_ioctl_space_info space;
1871 struct btrfs_ioctl_space_info *dest;
1872 struct btrfs_ioctl_space_info *dest_orig;
1873 struct btrfs_ioctl_space_info *user_dest;
1874 struct btrfs_space_info *info;
1875 int alloc_size;
1876 int ret = 0;
1877 int slot_count = 0;
1878
1879 if (copy_from_user(&space_args,
1880 (struct btrfs_ioctl_space_args __user *)arg,
1881 sizeof(space_args)))
1882 return -EFAULT;
1883
1884 /* first we count slots */
1885 rcu_read_lock();
1886 list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
1887 slot_count++;
1888 rcu_read_unlock();
1889
1890 /* space_slots == 0 means they are asking for a count */
1891 if (space_args.space_slots == 0) {
1892 space_args.total_spaces = slot_count;
1893 goto out;
1894 }
1895 alloc_size = sizeof(*dest) * slot_count;
1896 /* we generally have at most 6 or so space infos, one for each raid
1897 * level. So, a whole page should be more than enough for everyone
1898 */
1899 if (alloc_size > PAGE_CACHE_SIZE)
1900 return -ENOMEM;
1901
1902 space_args.total_spaces = 0;
1903 dest = kmalloc(alloc_size, GFP_NOFS);
1904 if (!dest)
1905 return -ENOMEM;
1906 dest_orig = dest;
1907
1908 /* now we have a buffer to copy into */
1909 rcu_read_lock();
1910 list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
1911 /* make sure we don't copy more than we allocated
1912 * in our buffer
1913 */
1914 if (slot_count == 0)
1915 break;
1916 slot_count--;
1917
1918 /* make sure userland has enough room in their buffer */
1919 if (space_args.total_spaces >= space_args.space_slots)
1920 break;
1921
1922 space.flags = info->flags;
1923 space.total_bytes = info->total_bytes;
1924 space.used_bytes = info->bytes_used;
1925 memcpy(dest, &space, sizeof(space));
1926 dest++;
1927 space_args.total_spaces++;
1928 }
1929 rcu_read_unlock();
1930
1931 user_dest = (struct btrfs_ioctl_space_info *)
1932 (arg + sizeof(struct btrfs_ioctl_space_args));
1933
1934 if (copy_to_user(user_dest, dest_orig, alloc_size))
1935 ret = -EFAULT;
1936
1937 kfree(dest_orig);
1938out:
1939 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args)))
1940 ret = -EFAULT;
1941
1942 return ret;
1943}
1944
1277/* 1945/*
1278 * there are many ways the trans_start and trans_end ioctls can lead 1946 * there are many ways the trans_start and trans_end ioctls can lead
1279 * to deadlocks. They should only be used by applications that 1947 * to deadlocks. They should only be used by applications that
@@ -1320,8 +1988,12 @@ long btrfs_ioctl(struct file *file, unsigned int
1320 return btrfs_ioctl_snap_create(file, argp, 1); 1988 return btrfs_ioctl_snap_create(file, argp, 1);
1321 case BTRFS_IOC_SNAP_DESTROY: 1989 case BTRFS_IOC_SNAP_DESTROY:
1322 return btrfs_ioctl_snap_destroy(file, argp); 1990 return btrfs_ioctl_snap_destroy(file, argp);
1991 case BTRFS_IOC_DEFAULT_SUBVOL:
1992 return btrfs_ioctl_default_subvol(file, argp);
1323 case BTRFS_IOC_DEFRAG: 1993 case BTRFS_IOC_DEFRAG:
1324 return btrfs_ioctl_defrag(file); 1994 return btrfs_ioctl_defrag(file, NULL);
1995 case BTRFS_IOC_DEFRAG_RANGE:
1996 return btrfs_ioctl_defrag(file, argp);
1325 case BTRFS_IOC_RESIZE: 1997 case BTRFS_IOC_RESIZE:
1326 return btrfs_ioctl_resize(root, argp); 1998 return btrfs_ioctl_resize(root, argp);
1327 case BTRFS_IOC_ADD_DEV: 1999 case BTRFS_IOC_ADD_DEV:
@@ -1338,6 +2010,12 @@ long btrfs_ioctl(struct file *file, unsigned int
1338 return btrfs_ioctl_trans_start(file); 2010 return btrfs_ioctl_trans_start(file);
1339 case BTRFS_IOC_TRANS_END: 2011 case BTRFS_IOC_TRANS_END:
1340 return btrfs_ioctl_trans_end(file); 2012 return btrfs_ioctl_trans_end(file);
2013 case BTRFS_IOC_TREE_SEARCH:
2014 return btrfs_ioctl_tree_search(file, argp);
2015 case BTRFS_IOC_INO_LOOKUP:
2016 return btrfs_ioctl_ino_lookup(file, argp);
2017 case BTRFS_IOC_SPACE_INFO:
2018 return btrfs_ioctl_space_info(root, argp);
1341 case BTRFS_IOC_SYNC: 2019 case BTRFS_IOC_SYNC:
1342 btrfs_sync_fs(file->f_dentry->d_sb, 1); 2020 btrfs_sync_fs(file->f_dentry->d_sb, 1);
1343 return 0; 2021 return 0;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index bc49914475eb..424694aa517f 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -30,12 +30,114 @@ struct btrfs_ioctl_vol_args {
30 char name[BTRFS_PATH_NAME_MAX + 1]; 30 char name[BTRFS_PATH_NAME_MAX + 1];
31}; 31};
32 32
33#define BTRFS_INO_LOOKUP_PATH_MAX 4080
34struct btrfs_ioctl_ino_lookup_args {
35 __u64 treeid;
36 __u64 objectid;
37 char name[BTRFS_INO_LOOKUP_PATH_MAX];
38};
39
40struct btrfs_ioctl_search_key {
41 /* which root are we searching. 0 is the tree of tree roots */
42 __u64 tree_id;
43
44 /* keys returned will be >= min and <= max */
45 __u64 min_objectid;
46 __u64 max_objectid;
47
48 /* keys returned will be >= min and <= max */
49 __u64 min_offset;
50 __u64 max_offset;
51
52 /* max and min transids to search for */
53 __u64 min_transid;
54 __u64 max_transid;
55
56 /* keys returned will be >= min and <= max */
57 __u32 min_type;
58 __u32 max_type;
59
60 /*
61 * how many items did userland ask for, and how many are we
62 * returning
63 */
64 __u32 nr_items;
65
66 /* align to 64 bits */
67 __u32 unused;
68
69 /* some extra for later */
70 __u64 unused1;
71 __u64 unused2;
72 __u64 unused3;
73 __u64 unused4;
74};
75
76struct btrfs_ioctl_search_header {
77 __u64 transid;
78 __u64 objectid;
79 __u64 offset;
80 __u32 type;
81 __u32 len;
82};
83
84#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
85/*
86 * the buf is an array of search headers where
87 * each header is followed by the actual item
88 * the type field is expanded to 32 bits for alignment
89 */
90struct btrfs_ioctl_search_args {
91 struct btrfs_ioctl_search_key key;
92 char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
93};
94
33struct btrfs_ioctl_clone_range_args { 95struct btrfs_ioctl_clone_range_args {
34 __s64 src_fd; 96 __s64 src_fd;
35 __u64 src_offset, src_length; 97 __u64 src_offset, src_length;
36 __u64 dest_offset; 98 __u64 dest_offset;
37}; 99};
38 100
101/* flags for the defrag range ioctl */
102#define BTRFS_DEFRAG_RANGE_COMPRESS 1
103#define BTRFS_DEFRAG_RANGE_START_IO 2
104
105struct btrfs_ioctl_defrag_range_args {
106 /* start of the defrag operation */
107 __u64 start;
108
109 /* number of bytes to defrag, use (u64)-1 to say all */
110 __u64 len;
111
112 /*
113 * flags for the operation, which can include turning
114 * on compression for this one defrag
115 */
116 __u64 flags;
117
118 /*
119 * any extent bigger than this will be considered
120 * already defragged. Use 0 to take the kernel default
121 * Use 1 to say every single extent must be rewritten
122 */
123 __u32 extent_thresh;
124
125 /* spare for later */
126 __u32 unused[5];
127};
128
129struct btrfs_ioctl_space_info {
130 __u64 flags;
131 __u64 total_bytes;
132 __u64 used_bytes;
133};
134
135struct btrfs_ioctl_space_args {
136 __u64 space_slots;
137 __u64 total_spaces;
138 struct btrfs_ioctl_space_info spaces[0];
139};
140
39#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \ 141#define BTRFS_IOC_SNAP_CREATE _IOW(BTRFS_IOCTL_MAGIC, 1, \
40 struct btrfs_ioctl_vol_args) 142 struct btrfs_ioctl_vol_args)
41#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \ 143#define BTRFS_IOC_DEFRAG _IOW(BTRFS_IOCTL_MAGIC, 2, \
@@ -67,4 +169,13 @@ struct btrfs_ioctl_clone_range_args {
67 struct btrfs_ioctl_vol_args) 169 struct btrfs_ioctl_vol_args)
68#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \ 170#define BTRFS_IOC_SNAP_DESTROY _IOW(BTRFS_IOCTL_MAGIC, 15, \
69 struct btrfs_ioctl_vol_args) 171 struct btrfs_ioctl_vol_args)
172#define BTRFS_IOC_DEFRAG_RANGE _IOW(BTRFS_IOCTL_MAGIC, 16, \
173 struct btrfs_ioctl_defrag_range_args)
174#define BTRFS_IOC_TREE_SEARCH _IOWR(BTRFS_IOCTL_MAGIC, 17, \
175 struct btrfs_ioctl_search_args)
176#define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
177 struct btrfs_ioctl_ino_lookup_args)
178#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
179#define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
180 struct btrfs_ioctl_space_args)
70#endif 181#endif
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 1c36e5cd8f55..6151f2ea38bb 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -16,7 +16,6 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/gfp.h>
20#include <linux/pagemap.h> 19#include <linux/pagemap.h>
21#include <linux/spinlock.h> 20#include <linux/spinlock.h>
22#include <linux/page-flags.h> 21#include <linux/page-flags.h>
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5c2a9e78a949..a127c0ebb2dc 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -16,7 +16,6 @@
16 * Boston, MA 021110-1307, USA. 16 * Boston, MA 021110-1307, USA.
17 */ 17 */
18 18
19#include <linux/gfp.h>
20#include <linux/slab.h> 19#include <linux/slab.h>
21#include <linux/blkdev.h> 20#include <linux/blkdev.h>
22#include <linux/writeback.h> 21#include <linux/writeback.h>
@@ -174,7 +173,6 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
174 if (!entry) 173 if (!entry)
175 return -ENOMEM; 174 return -ENOMEM;
176 175
177 mutex_lock(&tree->mutex);
178 entry->file_offset = file_offset; 176 entry->file_offset = file_offset;
179 entry->start = start; 177 entry->start = start;
180 entry->len = len; 178 entry->len = len;
@@ -190,16 +188,17 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
190 INIT_LIST_HEAD(&entry->list); 188 INIT_LIST_HEAD(&entry->list);
191 INIT_LIST_HEAD(&entry->root_extent_list); 189 INIT_LIST_HEAD(&entry->root_extent_list);
192 190
191 spin_lock(&tree->lock);
193 node = tree_insert(&tree->tree, file_offset, 192 node = tree_insert(&tree->tree, file_offset,
194 &entry->rb_node); 193 &entry->rb_node);
195 BUG_ON(node); 194 BUG_ON(node);
195 spin_unlock(&tree->lock);
196 196
197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 197 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
198 list_add_tail(&entry->root_extent_list, 198 list_add_tail(&entry->root_extent_list,
199 &BTRFS_I(inode)->root->fs_info->ordered_extents); 199 &BTRFS_I(inode)->root->fs_info->ordered_extents);
200 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 200 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
201 201
202 mutex_unlock(&tree->mutex);
203 BUG_ON(node); 202 BUG_ON(node);
204 return 0; 203 return 0;
205} 204}
@@ -216,9 +215,9 @@ int btrfs_add_ordered_sum(struct inode *inode,
216 struct btrfs_ordered_inode_tree *tree; 215 struct btrfs_ordered_inode_tree *tree;
217 216
218 tree = &BTRFS_I(inode)->ordered_tree; 217 tree = &BTRFS_I(inode)->ordered_tree;
219 mutex_lock(&tree->mutex); 218 spin_lock(&tree->lock);
220 list_add_tail(&sum->list, &entry->list); 219 list_add_tail(&sum->list, &entry->list);
221 mutex_unlock(&tree->mutex); 220 spin_unlock(&tree->lock);
222 return 0; 221 return 0;
223} 222}
224 223
@@ -232,15 +231,16 @@ int btrfs_add_ordered_sum(struct inode *inode,
232 * to make sure this function only returns 1 once for a given ordered extent. 231 * to make sure this function only returns 1 once for a given ordered extent.
233 */ 232 */
234int btrfs_dec_test_ordered_pending(struct inode *inode, 233int btrfs_dec_test_ordered_pending(struct inode *inode,
234 struct btrfs_ordered_extent **cached,
235 u64 file_offset, u64 io_size) 235 u64 file_offset, u64 io_size)
236{ 236{
237 struct btrfs_ordered_inode_tree *tree; 237 struct btrfs_ordered_inode_tree *tree;
238 struct rb_node *node; 238 struct rb_node *node;
239 struct btrfs_ordered_extent *entry; 239 struct btrfs_ordered_extent *entry = NULL;
240 int ret; 240 int ret;
241 241
242 tree = &BTRFS_I(inode)->ordered_tree; 242 tree = &BTRFS_I(inode)->ordered_tree;
243 mutex_lock(&tree->mutex); 243 spin_lock(&tree->lock);
244 node = tree_search(tree, file_offset); 244 node = tree_search(tree, file_offset);
245 if (!node) { 245 if (!node) {
246 ret = 1; 246 ret = 1;
@@ -264,7 +264,11 @@ int btrfs_dec_test_ordered_pending(struct inode *inode,
264 else 264 else
265 ret = 1; 265 ret = 1;
266out: 266out:
267 mutex_unlock(&tree->mutex); 267 if (!ret && cached && entry) {
268 *cached = entry;
269 atomic_inc(&entry->refs);
270 }
271 spin_unlock(&tree->lock);
268 return ret == 0; 272 return ret == 0;
269} 273}
270 274
@@ -291,13 +295,14 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
291 295
292/* 296/*
293 * remove an ordered extent from the tree. No references are dropped 297 * remove an ordered extent from the tree. No references are dropped
294 * and you must wake_up entry->wait. You must hold the tree mutex 298 * and you must wake_up entry->wait. You must hold the tree lock
295 * while you call this function. 299 * while you call this function.
296 */ 300 */
297static int __btrfs_remove_ordered_extent(struct inode *inode, 301static int __btrfs_remove_ordered_extent(struct inode *inode,
298 struct btrfs_ordered_extent *entry) 302 struct btrfs_ordered_extent *entry)
299{ 303{
300 struct btrfs_ordered_inode_tree *tree; 304 struct btrfs_ordered_inode_tree *tree;
305 struct btrfs_root *root = BTRFS_I(inode)->root;
301 struct rb_node *node; 306 struct rb_node *node;
302 307
303 tree = &BTRFS_I(inode)->ordered_tree; 308 tree = &BTRFS_I(inode)->ordered_tree;
@@ -307,12 +312,13 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
307 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); 312 set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags);
308 313
309 spin_lock(&BTRFS_I(inode)->accounting_lock); 314 spin_lock(&BTRFS_I(inode)->accounting_lock);
315 WARN_ON(!BTRFS_I(inode)->outstanding_extents);
310 BTRFS_I(inode)->outstanding_extents--; 316 BTRFS_I(inode)->outstanding_extents--;
311 spin_unlock(&BTRFS_I(inode)->accounting_lock); 317 spin_unlock(&BTRFS_I(inode)->accounting_lock);
312 btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, 318 btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root,
313 inode, 1); 319 inode, 1);
314 320
315 spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 321 spin_lock(&root->fs_info->ordered_extent_lock);
316 list_del_init(&entry->root_extent_list); 322 list_del_init(&entry->root_extent_list);
317 323
318 /* 324 /*
@@ -324,7 +330,7 @@ static int __btrfs_remove_ordered_extent(struct inode *inode,
324 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { 330 !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
325 list_del_init(&BTRFS_I(inode)->ordered_operations); 331 list_del_init(&BTRFS_I(inode)->ordered_operations);
326 } 332 }
327 spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); 333 spin_unlock(&root->fs_info->ordered_extent_lock);
328 334
329 return 0; 335 return 0;
330} 336}
@@ -340,9 +346,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
340 int ret; 346 int ret;
341 347
342 tree = &BTRFS_I(inode)->ordered_tree; 348 tree = &BTRFS_I(inode)->ordered_tree;
343 mutex_lock(&tree->mutex); 349 spin_lock(&tree->lock);
344 ret = __btrfs_remove_ordered_extent(inode, entry); 350 ret = __btrfs_remove_ordered_extent(inode, entry);
345 mutex_unlock(&tree->mutex); 351 spin_unlock(&tree->lock);
346 wake_up(&entry->wait); 352 wake_up(&entry->wait);
347 353
348 return ret; 354 return ret;
@@ -567,7 +573,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
567 struct btrfs_ordered_extent *entry = NULL; 573 struct btrfs_ordered_extent *entry = NULL;
568 574
569 tree = &BTRFS_I(inode)->ordered_tree; 575 tree = &BTRFS_I(inode)->ordered_tree;
570 mutex_lock(&tree->mutex); 576 spin_lock(&tree->lock);
571 node = tree_search(tree, file_offset); 577 node = tree_search(tree, file_offset);
572 if (!node) 578 if (!node)
573 goto out; 579 goto out;
@@ -578,7 +584,7 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct inode *inode,
578 if (entry) 584 if (entry)
579 atomic_inc(&entry->refs); 585 atomic_inc(&entry->refs);
580out: 586out:
581 mutex_unlock(&tree->mutex); 587 spin_unlock(&tree->lock);
582 return entry; 588 return entry;
583} 589}
584 590
@@ -594,7 +600,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
594 struct btrfs_ordered_extent *entry = NULL; 600 struct btrfs_ordered_extent *entry = NULL;
595 601
596 tree = &BTRFS_I(inode)->ordered_tree; 602 tree = &BTRFS_I(inode)->ordered_tree;
597 mutex_lock(&tree->mutex); 603 spin_lock(&tree->lock);
598 node = tree_search(tree, file_offset); 604 node = tree_search(tree, file_offset);
599 if (!node) 605 if (!node)
600 goto out; 606 goto out;
@@ -602,7 +608,7 @@ btrfs_lookup_first_ordered_extent(struct inode *inode, u64 file_offset)
602 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); 608 entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
603 atomic_inc(&entry->refs); 609 atomic_inc(&entry->refs);
604out: 610out:
605 mutex_unlock(&tree->mutex); 611 spin_unlock(&tree->lock);
606 return entry; 612 return entry;
607} 613}
608 614
@@ -629,7 +635,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
629 else 635 else
630 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize); 636 offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
631 637
632 mutex_lock(&tree->mutex); 638 spin_lock(&tree->lock);
633 disk_i_size = BTRFS_I(inode)->disk_i_size; 639 disk_i_size = BTRFS_I(inode)->disk_i_size;
634 640
635 /* truncate file */ 641 /* truncate file */
@@ -735,7 +741,7 @@ out:
735 */ 741 */
736 if (ordered) 742 if (ordered)
737 __btrfs_remove_ordered_extent(inode, ordered); 743 __btrfs_remove_ordered_extent(inode, ordered);
738 mutex_unlock(&tree->mutex); 744 spin_unlock(&tree->lock);
739 if (ordered) 745 if (ordered)
740 wake_up(&ordered->wait); 746 wake_up(&ordered->wait);
741 return ret; 747 return ret;
@@ -762,7 +768,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
762 if (!ordered) 768 if (!ordered)
763 return 1; 769 return 1;
764 770
765 mutex_lock(&tree->mutex); 771 spin_lock(&tree->lock);
766 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) { 772 list_for_each_entry_reverse(ordered_sum, &ordered->list, list) {
767 if (disk_bytenr >= ordered_sum->bytenr) { 773 if (disk_bytenr >= ordered_sum->bytenr) {
768 num_sectors = ordered_sum->len / sectorsize; 774 num_sectors = ordered_sum->len / sectorsize;
@@ -777,7 +783,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
777 } 783 }
778 } 784 }
779out: 785out:
780 mutex_unlock(&tree->mutex); 786 spin_unlock(&tree->lock);
781 btrfs_put_ordered_extent(ordered); 787 btrfs_put_ordered_extent(ordered);
782 return ret; 788 return ret;
783} 789}
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 9116c6d0c5a9..c82f76a9f040 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -21,7 +21,7 @@
21 21
22/* one of these per inode */ 22/* one of these per inode */
23struct btrfs_ordered_inode_tree { 23struct btrfs_ordered_inode_tree {
24 struct mutex mutex; 24 spinlock_t lock;
25 struct rb_root tree; 25 struct rb_root tree;
26 struct rb_node *last; 26 struct rb_node *last;
27}; 27};
@@ -128,7 +128,7 @@ static inline int btrfs_ordered_sum_size(struct btrfs_root *root,
128static inline void 128static inline void
129btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t) 129btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
130{ 130{
131 mutex_init(&t->mutex); 131 spin_lock_init(&t->lock);
132 t->tree = RB_ROOT; 132 t->tree = RB_ROOT;
133 t->last = NULL; 133 t->last = NULL;
134} 134}
@@ -137,7 +137,8 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
137int btrfs_remove_ordered_extent(struct inode *inode, 137int btrfs_remove_ordered_extent(struct inode *inode,
138 struct btrfs_ordered_extent *entry); 138 struct btrfs_ordered_extent *entry);
139int btrfs_dec_test_ordered_pending(struct inode *inode, 139int btrfs_dec_test_ordered_pending(struct inode *inode,
140 u64 file_offset, u64 io_size); 140 struct btrfs_ordered_extent **cached,
141 u64 file_offset, u64 io_size);
141int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, 142int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
142 u64 start, u64 len, u64 disk_len, int tyep); 143 u64 start, u64 len, u64 disk_len, int tyep);
143int btrfs_add_ordered_sum(struct inode *inode, 144int btrfs_add_ordered_sum(struct inode *inode,
diff --git a/fs/btrfs/ref-cache.c b/fs/btrfs/ref-cache.c
index d0cc62bccb94..a97314cf6bd6 100644
--- a/fs/btrfs/ref-cache.c
+++ b/fs/btrfs/ref-cache.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h>
20#include <linux/sort.h> 21#include <linux/sort.h>
21#include "ctree.h" 22#include "ctree.h"
22#include "ref-cache.h" 23#include "ref-cache.h"
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0109e5606bad..e558dd941ded 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -21,6 +21,7 @@
21#include <linux/writeback.h> 21#include <linux/writeback.h>
22#include <linux/blkdev.h> 22#include <linux/blkdev.h>
23#include <linux/rbtree.h> 23#include <linux/rbtree.h>
24#include <linux/slab.h>
24#include "ctree.h" 25#include "ctree.h"
25#include "disk-io.h" 26#include "disk-io.h"
26#include "transaction.h" 27#include "transaction.h"
@@ -2659,7 +2660,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
2659 EXTENT_BOUNDARY, GFP_NOFS); 2660 EXTENT_BOUNDARY, GFP_NOFS);
2660 nr++; 2661 nr++;
2661 } 2662 }
2662 btrfs_set_extent_delalloc(inode, page_start, page_end); 2663 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL);
2663 2664
2664 set_page_dirty(page); 2665 set_page_dirty(page);
2665 dirty_page++; 2666 dirty_page++;
@@ -3487,7 +3488,7 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
3487 key.objectid = objectid; 3488 key.objectid = objectid;
3488 key.type = BTRFS_INODE_ITEM_KEY; 3489 key.type = BTRFS_INODE_ITEM_KEY;
3489 key.offset = 0; 3490 key.offset = 0;
3490 inode = btrfs_iget(root->fs_info->sb, &key, root); 3491 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
3491 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 3492 BUG_ON(IS_ERR(inode) || is_bad_inode(inode));
3492 BTRFS_I(inode)->index_cnt = group->key.objectid; 3493 BTRFS_I(inode)->index_cnt = group->key.objectid;
3493 3494
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index f8b4521de907..1866dff0538e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -38,6 +38,7 @@
38#include <linux/namei.h> 38#include <linux/namei.h>
39#include <linux/miscdevice.h> 39#include <linux/miscdevice.h>
40#include <linux/magic.h> 40#include <linux/magic.h>
41#include <linux/slab.h>
41#include "compat.h" 42#include "compat.h"
42#include "ctree.h" 43#include "ctree.h"
43#include "disk-io.h" 44#include "disk-io.h"
@@ -63,22 +64,21 @@ static void btrfs_put_super(struct super_block *sb)
63} 64}
64 65
65enum { 66enum {
66 Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow, 67 Opt_degraded, Opt_subvol, Opt_subvolid, Opt_device, Opt_nodatasum,
67 Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, 68 Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
68 Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, 69 Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
69 Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio, 70 Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
70 Opt_flushoncommit,
71 Opt_discard, Opt_err, 71 Opt_discard, Opt_err,
72}; 72};
73 73
74static match_table_t tokens = { 74static match_table_t tokens = {
75 {Opt_degraded, "degraded"}, 75 {Opt_degraded, "degraded"},
76 {Opt_subvol, "subvol=%s"}, 76 {Opt_subvol, "subvol=%s"},
77 {Opt_subvolid, "subvolid=%d"},
77 {Opt_device, "device=%s"}, 78 {Opt_device, "device=%s"},
78 {Opt_nodatasum, "nodatasum"}, 79 {Opt_nodatasum, "nodatasum"},
79 {Opt_nodatacow, "nodatacow"}, 80 {Opt_nodatacow, "nodatacow"},
80 {Opt_nobarrier, "nobarrier"}, 81 {Opt_nobarrier, "nobarrier"},
81 {Opt_max_extent, "max_extent=%s"},
82 {Opt_max_inline, "max_inline=%s"}, 82 {Opt_max_inline, "max_inline=%s"},
83 {Opt_alloc_start, "alloc_start=%s"}, 83 {Opt_alloc_start, "alloc_start=%s"},
84 {Opt_thread_pool, "thread_pool=%d"}, 84 {Opt_thread_pool, "thread_pool=%d"},
@@ -95,31 +95,6 @@ static match_table_t tokens = {
95 {Opt_err, NULL}, 95 {Opt_err, NULL},
96}; 96};
97 97
98u64 btrfs_parse_size(char *str)
99{
100 u64 res;
101 int mult = 1;
102 char *end;
103 char last;
104
105 res = simple_strtoul(str, &end, 10);
106
107 last = end[0];
108 if (isalpha(last)) {
109 last = tolower(last);
110 switch (last) {
111 case 'g':
112 mult *= 1024;
113 case 'm':
114 mult *= 1024;
115 case 'k':
116 mult *= 1024;
117 }
118 res = res * mult;
119 }
120 return res;
121}
122
123/* 98/*
124 * Regular mount options parser. Everything that is needed only when 99 * Regular mount options parser. Everything that is needed only when
125 * reading in a new superblock is parsed here. 100 * reading in a new superblock is parsed here.
@@ -157,6 +132,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
157 btrfs_set_opt(info->mount_opt, DEGRADED); 132 btrfs_set_opt(info->mount_opt, DEGRADED);
158 break; 133 break;
159 case Opt_subvol: 134 case Opt_subvol:
135 case Opt_subvolid:
160 case Opt_device: 136 case Opt_device:
161 /* 137 /*
162 * These are parsed by btrfs_parse_early_options 138 * These are parsed by btrfs_parse_early_options
@@ -211,22 +187,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
211 info->thread_pool_size); 187 info->thread_pool_size);
212 } 188 }
213 break; 189 break;
214 case Opt_max_extent:
215 num = match_strdup(&args[0]);
216 if (num) {
217 info->max_extent = btrfs_parse_size(num);
218 kfree(num);
219
220 info->max_extent = max_t(u64,
221 info->max_extent, root->sectorsize);
222 printk(KERN_INFO "btrfs: max_extent at %llu\n",
223 (unsigned long long)info->max_extent);
224 }
225 break;
226 case Opt_max_inline: 190 case Opt_max_inline:
227 num = match_strdup(&args[0]); 191 num = match_strdup(&args[0]);
228 if (num) { 192 if (num) {
229 info->max_inline = btrfs_parse_size(num); 193 info->max_inline = memparse(num, NULL);
230 kfree(num); 194 kfree(num);
231 195
232 if (info->max_inline) { 196 if (info->max_inline) {
@@ -241,7 +205,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
241 case Opt_alloc_start: 205 case Opt_alloc_start:
242 num = match_strdup(&args[0]); 206 num = match_strdup(&args[0]);
243 if (num) { 207 if (num) {
244 info->alloc_start = btrfs_parse_size(num); 208 info->alloc_start = memparse(num, NULL);
245 kfree(num); 209 kfree(num);
246 printk(KERN_INFO 210 printk(KERN_INFO
247 "btrfs: allocations start at %llu\n", 211 "btrfs: allocations start at %llu\n",
@@ -292,12 +256,13 @@ out:
292 * only when we need to allocate a new super block. 256 * only when we need to allocate a new super block.
293 */ 257 */
294static int btrfs_parse_early_options(const char *options, fmode_t flags, 258static int btrfs_parse_early_options(const char *options, fmode_t flags,
295 void *holder, char **subvol_name, 259 void *holder, char **subvol_name, u64 *subvol_objectid,
296 struct btrfs_fs_devices **fs_devices) 260 struct btrfs_fs_devices **fs_devices)
297{ 261{
298 substring_t args[MAX_OPT_ARGS]; 262 substring_t args[MAX_OPT_ARGS];
299 char *opts, *p; 263 char *opts, *p;
300 int error = 0; 264 int error = 0;
265 int intarg;
301 266
302 if (!options) 267 if (!options)
303 goto out; 268 goto out;
@@ -320,6 +285,18 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
320 case Opt_subvol: 285 case Opt_subvol:
321 *subvol_name = match_strdup(&args[0]); 286 *subvol_name = match_strdup(&args[0]);
322 break; 287 break;
288 case Opt_subvolid:
289 intarg = 0;
290 error = match_int(&args[0], &intarg);
291 if (!error) {
292 /* we want the original fs_tree */
293 if (!intarg)
294 *subvol_objectid =
295 BTRFS_FS_TREE_OBJECTID;
296 else
297 *subvol_objectid = intarg;
298 }
299 break;
323 case Opt_device: 300 case Opt_device:
324 error = btrfs_scan_one_device(match_strdup(&args[0]), 301 error = btrfs_scan_one_device(match_strdup(&args[0]),
325 flags, holder, fs_devices); 302 flags, holder, fs_devices);
@@ -347,6 +324,110 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
347 return error; 324 return error;
348} 325}
349 326
327static struct dentry *get_default_root(struct super_block *sb,
328 u64 subvol_objectid)
329{
330 struct btrfs_root *root = sb->s_fs_info;
331 struct btrfs_root *new_root;
332 struct btrfs_dir_item *di;
333 struct btrfs_path *path;
334 struct btrfs_key location;
335 struct inode *inode;
336 struct dentry *dentry;
337 u64 dir_id;
338 int new = 0;
339
340 /*
341 * We have a specific subvol we want to mount, just setup location and
342 * go look up the root.
343 */
344 if (subvol_objectid) {
345 location.objectid = subvol_objectid;
346 location.type = BTRFS_ROOT_ITEM_KEY;
347 location.offset = (u64)-1;
348 goto find_root;
349 }
350
351 path = btrfs_alloc_path();
352 if (!path)
353 return ERR_PTR(-ENOMEM);
354 path->leave_spinning = 1;
355
356 /*
357 * Find the "default" dir item which points to the root item that we
358 * will mount by default if we haven't been given a specific subvolume
359 * to mount.
360 */
361 dir_id = btrfs_super_root_dir(&root->fs_info->super_copy);
362 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, "default", 7, 0);
363 if (!di) {
364 /*
365 * Ok the default dir item isn't there. This is weird since
366 * it's always been there, but don't freak out, just try and
367 * mount to root most subvolume.
368 */
369 btrfs_free_path(path);
370 dir_id = BTRFS_FIRST_FREE_OBJECTID;
371 new_root = root->fs_info->fs_root;
372 goto setup_root;
373 }
374
375 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location);
376 btrfs_free_path(path);
377
378find_root:
379 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
380 if (IS_ERR(new_root))
381 return ERR_PTR(PTR_ERR(new_root));
382
383 if (btrfs_root_refs(&new_root->root_item) == 0)
384 return ERR_PTR(-ENOENT);
385
386 dir_id = btrfs_root_dirid(&new_root->root_item);
387setup_root:
388 location.objectid = dir_id;
389 location.type = BTRFS_INODE_ITEM_KEY;
390 location.offset = 0;
391
392 inode = btrfs_iget(sb, &location, new_root, &new);
393 if (!inode)
394 return ERR_PTR(-ENOMEM);
395
396 /*
397 * If we're just mounting the root most subvol put the inode and return
398 * a reference to the dentry. We will have already gotten a reference
399 * to the inode in btrfs_fill_super so we're good to go.
400 */
401 if (!new && sb->s_root->d_inode == inode) {
402 iput(inode);
403 return dget(sb->s_root);
404 }
405
406 if (new) {
407 const struct qstr name = { .name = "/", .len = 1 };
408
409 /*
410 * New inode, we need to make the dentry a sibling of s_root so
411 * everything gets cleaned up properly on unmount.
412 */
413 dentry = d_alloc(sb->s_root, &name);
414 if (!dentry) {
415 iput(inode);
416 return ERR_PTR(-ENOMEM);
417 }
418 d_splice_alias(inode, dentry);
419 } else {
420 /*
421 * We found the inode in cache, just find a dentry for it and
422 * put the reference to the inode we just got.
423 */
424 dentry = d_find_alias(inode);
425 iput(inode);
426 }
427
428 return dentry;
429}
430
350static int btrfs_fill_super(struct super_block *sb, 431static int btrfs_fill_super(struct super_block *sb,
351 struct btrfs_fs_devices *fs_devices, 432 struct btrfs_fs_devices *fs_devices,
352 void *data, int silent) 433 void *data, int silent)
@@ -380,7 +461,7 @@ static int btrfs_fill_super(struct super_block *sb,
380 key.objectid = BTRFS_FIRST_FREE_OBJECTID; 461 key.objectid = BTRFS_FIRST_FREE_OBJECTID;
381 key.type = BTRFS_INODE_ITEM_KEY; 462 key.type = BTRFS_INODE_ITEM_KEY;
382 key.offset = 0; 463 key.offset = 0;
383 inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root); 464 inode = btrfs_iget(sb, &key, tree_root->fs_info->fs_root, NULL);
384 if (IS_ERR(inode)) { 465 if (IS_ERR(inode)) {
385 err = PTR_ERR(inode); 466 err = PTR_ERR(inode);
386 goto fail_close; 467 goto fail_close;
@@ -392,12 +473,6 @@ static int btrfs_fill_super(struct super_block *sb,
392 err = -ENOMEM; 473 err = -ENOMEM;
393 goto fail_close; 474 goto fail_close;
394 } 475 }
395#if 0
396 /* this does the super kobj at the same time */
397 err = btrfs_sysfs_add_super(tree_root->fs_info);
398 if (err)
399 goto fail_close;
400#endif
401 476
402 sb->s_root = root_dentry; 477 sb->s_root = root_dentry;
403 478
@@ -441,9 +516,6 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
441 seq_puts(seq, ",nodatacow"); 516 seq_puts(seq, ",nodatacow");
442 if (btrfs_test_opt(root, NOBARRIER)) 517 if (btrfs_test_opt(root, NOBARRIER))
443 seq_puts(seq, ",nobarrier"); 518 seq_puts(seq, ",nobarrier");
444 if (info->max_extent != (u64)-1)
445 seq_printf(seq, ",max_extent=%llu",
446 (unsigned long long)info->max_extent);
447 if (info->max_inline != 8192 * 1024) 519 if (info->max_inline != 8192 * 1024)
448 seq_printf(seq, ",max_inline=%llu", 520 seq_printf(seq, ",max_inline=%llu",
449 (unsigned long long)info->max_inline); 521 (unsigned long long)info->max_inline);
@@ -489,19 +561,22 @@ static int btrfs_test_super(struct super_block *s, void *data)
489static int btrfs_get_sb(struct file_system_type *fs_type, int flags, 561static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
490 const char *dev_name, void *data, struct vfsmount *mnt) 562 const char *dev_name, void *data, struct vfsmount *mnt)
491{ 563{
492 char *subvol_name = NULL;
493 struct block_device *bdev = NULL; 564 struct block_device *bdev = NULL;
494 struct super_block *s; 565 struct super_block *s;
495 struct dentry *root; 566 struct dentry *root;
496 struct btrfs_fs_devices *fs_devices = NULL; 567 struct btrfs_fs_devices *fs_devices = NULL;
497 fmode_t mode = FMODE_READ; 568 fmode_t mode = FMODE_READ;
569 char *subvol_name = NULL;
570 u64 subvol_objectid = 0;
498 int error = 0; 571 int error = 0;
572 int found = 0;
499 573
500 if (!(flags & MS_RDONLY)) 574 if (!(flags & MS_RDONLY))
501 mode |= FMODE_WRITE; 575 mode |= FMODE_WRITE;
502 576
503 error = btrfs_parse_early_options(data, mode, fs_type, 577 error = btrfs_parse_early_options(data, mode, fs_type,
504 &subvol_name, &fs_devices); 578 &subvol_name, &subvol_objectid,
579 &fs_devices);
505 if (error) 580 if (error)
506 return error; 581 return error;
507 582
@@ -530,6 +605,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
530 goto error_close_devices; 605 goto error_close_devices;
531 } 606 }
532 607
608 found = 1;
533 btrfs_close_devices(fs_devices); 609 btrfs_close_devices(fs_devices);
534 } else { 610 } else {
535 char b[BDEVNAME_SIZE]; 611 char b[BDEVNAME_SIZE];
@@ -547,25 +623,35 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
547 s->s_flags |= MS_ACTIVE; 623 s->s_flags |= MS_ACTIVE;
548 } 624 }
549 625
550 if (!strcmp(subvol_name, ".")) 626 root = get_default_root(s, subvol_objectid);
551 root = dget(s->s_root); 627 if (IS_ERR(root)) {
552 else { 628 error = PTR_ERR(root);
553 mutex_lock(&s->s_root->d_inode->i_mutex); 629 deactivate_locked_super(s);
554 root = lookup_one_len(subvol_name, s->s_root, 630 goto error;
631 }
632 /* if they gave us a subvolume name bind mount into that */
633 if (strcmp(subvol_name, ".")) {
634 struct dentry *new_root;
635 mutex_lock(&root->d_inode->i_mutex);
636 new_root = lookup_one_len(subvol_name, root,
555 strlen(subvol_name)); 637 strlen(subvol_name));
556 mutex_unlock(&s->s_root->d_inode->i_mutex); 638 mutex_unlock(&root->d_inode->i_mutex);
557 639
558 if (IS_ERR(root)) { 640 if (IS_ERR(new_root)) {
559 deactivate_locked_super(s); 641 deactivate_locked_super(s);
560 error = PTR_ERR(root); 642 error = PTR_ERR(new_root);
561 goto error_free_subvol_name; 643 dput(root);
644 goto error_close_devices;
562 } 645 }
563 if (!root->d_inode) { 646 if (!new_root->d_inode) {
564 dput(root); 647 dput(root);
648 dput(new_root);
565 deactivate_locked_super(s); 649 deactivate_locked_super(s);
566 error = -ENXIO; 650 error = -ENXIO;
567 goto error_free_subvol_name; 651 goto error_close_devices;
568 } 652 }
653 dput(root);
654 root = new_root;
569 } 655 }
570 656
571 mnt->mnt_sb = s; 657 mnt->mnt_sb = s;
@@ -580,6 +666,7 @@ error_close_devices:
580 btrfs_close_devices(fs_devices); 666 btrfs_close_devices(fs_devices);
581error_free_subvol_name: 667error_free_subvol_name:
582 kfree(subvol_name); 668 kfree(subvol_name);
669error:
583 return error; 670 return error;
584} 671}
585 672
@@ -624,14 +711,37 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
624{ 711{
625 struct btrfs_root *root = btrfs_sb(dentry->d_sb); 712 struct btrfs_root *root = btrfs_sb(dentry->d_sb);
626 struct btrfs_super_block *disk_super = &root->fs_info->super_copy; 713 struct btrfs_super_block *disk_super = &root->fs_info->super_copy;
714 struct list_head *head = &root->fs_info->space_info;
715 struct btrfs_space_info *found;
716 u64 total_used = 0;
717 u64 data_used = 0;
627 int bits = dentry->d_sb->s_blocksize_bits; 718 int bits = dentry->d_sb->s_blocksize_bits;
628 __be32 *fsid = (__be32 *)root->fs_info->fsid; 719 __be32 *fsid = (__be32 *)root->fs_info->fsid;
629 720
721 rcu_read_lock();
722 list_for_each_entry_rcu(found, head, list) {
723 if (found->flags & (BTRFS_BLOCK_GROUP_DUP|
724 BTRFS_BLOCK_GROUP_RAID10|
725 BTRFS_BLOCK_GROUP_RAID1)) {
726 total_used += found->bytes_used;
727 if (found->flags & BTRFS_BLOCK_GROUP_DATA)
728 data_used += found->bytes_used;
729 else
730 data_used += found->total_bytes;
731 }
732
733 total_used += found->bytes_used;
734 if (found->flags & BTRFS_BLOCK_GROUP_DATA)
735 data_used += found->bytes_used;
736 else
737 data_used += found->total_bytes;
738 }
739 rcu_read_unlock();
740
630 buf->f_namelen = BTRFS_NAME_LEN; 741 buf->f_namelen = BTRFS_NAME_LEN;
631 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; 742 buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
632 buf->f_bfree = buf->f_blocks - 743 buf->f_bfree = buf->f_blocks - (total_used >> bits);
633 (btrfs_super_bytes_used(disk_super) >> bits); 744 buf->f_bavail = buf->f_blocks - (data_used >> bits);
634 buf->f_bavail = buf->f_bfree;
635 buf->f_bsize = dentry->d_sb->s_blocksize; 745 buf->f_bsize = dentry->d_sb->s_blocksize;
636 buf->f_type = BTRFS_SUPER_MAGIC; 746 buf->f_type = BTRFS_SUPER_MAGIC;
637 747
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 2a36e236a492..2cb116099b90 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/slab.h>
20#include <linux/sched.h> 21#include <linux/sched.h>
21#include <linux/writeback.h> 22#include <linux/writeback.h>
22#include <linux/pagemap.h> 23#include <linux/pagemap.h>
@@ -147,18 +148,13 @@ static void wait_current_trans(struct btrfs_root *root)
147 while (1) { 148 while (1) {
148 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 149 prepare_to_wait(&root->fs_info->transaction_wait, &wait,
149 TASK_UNINTERRUPTIBLE); 150 TASK_UNINTERRUPTIBLE);
150 if (cur_trans->blocked) { 151 if (!cur_trans->blocked)
151 mutex_unlock(&root->fs_info->trans_mutex);
152 schedule();
153 mutex_lock(&root->fs_info->trans_mutex);
154 finish_wait(&root->fs_info->transaction_wait,
155 &wait);
156 } else {
157 finish_wait(&root->fs_info->transaction_wait,
158 &wait);
159 break; 152 break;
160 } 153 mutex_unlock(&root->fs_info->trans_mutex);
154 schedule();
155 mutex_lock(&root->fs_info->trans_mutex);
161 } 156 }
157 finish_wait(&root->fs_info->transaction_wait, &wait);
162 put_transaction(cur_trans); 158 put_transaction(cur_trans);
163 } 159 }
164} 160}
@@ -760,10 +756,17 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
760 struct btrfs_root_item *new_root_item; 756 struct btrfs_root_item *new_root_item;
761 struct btrfs_root *tree_root = fs_info->tree_root; 757 struct btrfs_root *tree_root = fs_info->tree_root;
762 struct btrfs_root *root = pending->root; 758 struct btrfs_root *root = pending->root;
759 struct btrfs_root *parent_root;
760 struct inode *parent_inode;
763 struct extent_buffer *tmp; 761 struct extent_buffer *tmp;
764 struct extent_buffer *old; 762 struct extent_buffer *old;
765 int ret; 763 int ret;
766 u64 objectid; 764 u64 objectid;
765 int namelen;
766 u64 index = 0;
767
768 parent_inode = pending->dentry->d_parent->d_inode;
769 parent_root = BTRFS_I(parent_inode)->root;
767 770
768 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 771 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS);
769 if (!new_root_item) { 772 if (!new_root_item) {
@@ -774,79 +777,59 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
774 if (ret) 777 if (ret)
775 goto fail; 778 goto fail;
776 779
777 record_root_in_trans(trans, root);
778 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
779 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
780
781 key.objectid = objectid; 780 key.objectid = objectid;
782 /* record when the snapshot was created in key.offset */ 781 /* record when the snapshot was created in key.offset */
783 key.offset = trans->transid; 782 key.offset = trans->transid;
784 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 783 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
785 784
786 old = btrfs_lock_root_node(root);
787 btrfs_cow_block(trans, root, old, NULL, 0, &old);
788 btrfs_set_lock_blocking(old);
789
790 btrfs_copy_root(trans, root, old, &tmp, objectid);
791 btrfs_tree_unlock(old);
792 free_extent_buffer(old);
793
794 btrfs_set_root_node(new_root_item, tmp);
795 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
796 new_root_item);
797 btrfs_tree_unlock(tmp);
798 free_extent_buffer(tmp);
799 if (ret)
800 goto fail;
801
802 key.offset = (u64)-1;
803 memcpy(&pending->root_key, &key, sizeof(key)); 785 memcpy(&pending->root_key, &key, sizeof(key));
804fail: 786 pending->root_key.offset = (u64)-1;
805 kfree(new_root_item);
806 return ret;
807}
808
809static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
810 struct btrfs_pending_snapshot *pending)
811{
812 int ret;
813 int namelen;
814 u64 index = 0;
815 struct btrfs_trans_handle *trans;
816 struct inode *parent_inode;
817 struct btrfs_root *parent_root;
818
819 parent_inode = pending->dentry->d_parent->d_inode;
820 parent_root = BTRFS_I(parent_inode)->root;
821 trans = btrfs_join_transaction(parent_root, 1);
822 787
788 record_root_in_trans(trans, parent_root);
823 /* 789 /*
824 * insert the directory item 790 * insert the directory item
825 */ 791 */
826 namelen = strlen(pending->name); 792 namelen = strlen(pending->name);
827 ret = btrfs_set_inode_index(parent_inode, &index); 793 ret = btrfs_set_inode_index(parent_inode, &index);
794 BUG_ON(ret);
828 ret = btrfs_insert_dir_item(trans, parent_root, 795 ret = btrfs_insert_dir_item(trans, parent_root,
829 pending->name, namelen, 796 pending->name, namelen,
830 parent_inode->i_ino, 797 parent_inode->i_ino,
831 &pending->root_key, BTRFS_FT_DIR, index); 798 &pending->root_key, BTRFS_FT_DIR, index);
832 799 BUG_ON(ret);
833 if (ret)
834 goto fail;
835 800
836 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); 801 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2);
837 ret = btrfs_update_inode(trans, parent_root, parent_inode); 802 ret = btrfs_update_inode(trans, parent_root, parent_inode);
838 BUG_ON(ret); 803 BUG_ON(ret);
839 804
805 record_root_in_trans(trans, root);
806 btrfs_set_root_last_snapshot(&root->root_item, trans->transid);
807 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item));
808
809 old = btrfs_lock_root_node(root);
810 btrfs_cow_block(trans, root, old, NULL, 0, &old);
811 btrfs_set_lock_blocking(old);
812
813 btrfs_copy_root(trans, root, old, &tmp, objectid);
814 btrfs_tree_unlock(old);
815 free_extent_buffer(old);
816
817 btrfs_set_root_node(new_root_item, tmp);
818 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
819 new_root_item);
820 BUG_ON(ret);
821 btrfs_tree_unlock(tmp);
822 free_extent_buffer(tmp);
823
840 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 824 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root,
841 pending->root_key.objectid, 825 pending->root_key.objectid,
842 parent_root->root_key.objectid, 826 parent_root->root_key.objectid,
843 parent_inode->i_ino, index, pending->name, 827 parent_inode->i_ino, index, pending->name,
844 namelen); 828 namelen);
845
846 BUG_ON(ret); 829 BUG_ON(ret);
847 830
848fail: 831fail:
849 btrfs_end_transaction(trans, fs_info->fs_root); 832 kfree(new_root_item);
850 return ret; 833 return ret;
851} 834}
852 835
@@ -867,25 +850,6 @@ static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans,
867 return 0; 850 return 0;
868} 851}
869 852
870static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans,
871 struct btrfs_fs_info *fs_info)
872{
873 struct btrfs_pending_snapshot *pending;
874 struct list_head *head = &trans->transaction->pending_snapshots;
875 int ret;
876
877 while (!list_empty(head)) {
878 pending = list_entry(head->next,
879 struct btrfs_pending_snapshot, list);
880 ret = finish_pending_snapshot(fs_info, pending);
881 BUG_ON(ret);
882 list_del(&pending->list);
883 kfree(pending->name);
884 kfree(pending);
885 }
886 return 0;
887}
888
889static void update_super_roots(struct btrfs_root *root) 853static void update_super_roots(struct btrfs_root *root)
890{ 854{
891 struct btrfs_root_item *root_item; 855 struct btrfs_root_item *root_item;
@@ -997,13 +961,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
997 961
998 mutex_unlock(&root->fs_info->trans_mutex); 962 mutex_unlock(&root->fs_info->trans_mutex);
999 963
1000 if (flush_on_commit) { 964 if (flush_on_commit || snap_pending) {
1001 btrfs_start_delalloc_inodes(root, 1); 965 btrfs_start_delalloc_inodes(root, 1);
1002 ret = btrfs_wait_ordered_extents(root, 0, 1); 966 ret = btrfs_wait_ordered_extents(root, 0, 1);
1003 BUG_ON(ret); 967 BUG_ON(ret);
1004 } else if (snap_pending) {
1005 ret = btrfs_wait_ordered_extents(root, 0, 1);
1006 BUG_ON(ret);
1007 } 968 }
1008 969
1009 /* 970 /*
@@ -1100,9 +1061,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1100 1061
1101 btrfs_finish_extent_commit(trans, root); 1062 btrfs_finish_extent_commit(trans, root);
1102 1063
1103 /* do the directory inserts of any pending snapshot creations */
1104 finish_pending_snapshots(trans, root->fs_info);
1105
1106 mutex_lock(&root->fs_info->trans_mutex); 1064 mutex_lock(&root->fs_info->trans_mutex);
1107 1065
1108 cur_trans->commit_done = 1; 1066 cur_trans->commit_done = 1;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 4a9434b622ec..af57dd2b43d4 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <linux/sched.h> 19#include <linux/sched.h>
20#include <linux/slab.h>
20#include "ctree.h" 21#include "ctree.h"
21#include "transaction.h" 22#include "transaction.h"
22#include "disk-io.h" 23#include "disk-io.h"
@@ -445,7 +446,7 @@ static noinline struct inode *read_one_inode(struct btrfs_root *root,
445 key.objectid = objectid; 446 key.objectid = objectid;
446 key.type = BTRFS_INODE_ITEM_KEY; 447 key.type = BTRFS_INODE_ITEM_KEY;
447 key.offset = 0; 448 key.offset = 0;
448 inode = btrfs_iget(root->fs_info->sb, &key, root); 449 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL);
449 if (IS_ERR(inode)) { 450 if (IS_ERR(inode)) {
450 inode = NULL; 451 inode = NULL;
451 } else if (is_bad_inode(inode)) { 452 } else if (is_bad_inode(inode)) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 41ecbb2347f2..8db7b14bbae8 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -17,6 +17,7 @@
17 */ 17 */
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/bio.h> 19#include <linux/bio.h>
20#include <linux/slab.h>
20#include <linux/buffer_head.h> 21#include <linux/buffer_head.h>
21#include <linux/blkdev.h> 22#include <linux/blkdev.h>
22#include <linux/random.h> 23#include <linux/random.h>
@@ -256,13 +257,13 @@ loop_lock:
256 wake_up(&fs_info->async_submit_wait); 257 wake_up(&fs_info->async_submit_wait);
257 258
258 BUG_ON(atomic_read(&cur->bi_cnt) == 0); 259 BUG_ON(atomic_read(&cur->bi_cnt) == 0);
259 submit_bio(cur->bi_rw, cur);
260 num_run++;
261 batch_run++;
262 260
263 if (bio_rw_flagged(cur, BIO_RW_SYNCIO)) 261 if (bio_rw_flagged(cur, BIO_RW_SYNCIO))
264 num_sync_run++; 262 num_sync_run++;
265 263
264 submit_bio(cur->bi_rw, cur);
265 num_run++;
266 batch_run++;
266 if (need_resched()) { 267 if (need_resched()) {
267 if (num_sync_run) { 268 if (num_sync_run) {
268 blk_run_backing_dev(bdi, NULL); 269 blk_run_backing_dev(bdi, NULL);
@@ -325,16 +326,6 @@ loop_lock:
325 num_sync_run = 0; 326 num_sync_run = 0;
326 blk_run_backing_dev(bdi, NULL); 327 blk_run_backing_dev(bdi, NULL);
327 } 328 }
328
329 cond_resched();
330 if (again)
331 goto loop;
332
333 spin_lock(&device->io_lock);
334 if (device->pending_bios.head || device->pending_sync_bios.head)
335 goto loop_lock;
336 spin_unlock(&device->io_lock);
337
338 /* 329 /*
339 * IO has already been through a long path to get here. Checksumming, 330 * IO has already been through a long path to get here. Checksumming,
340 * async helper threads, perhaps compression. We've done a pretty 331 * async helper threads, perhaps compression. We've done a pretty
@@ -346,6 +337,16 @@ loop_lock:
346 * cared about found its way down here. 337 * cared about found its way down here.
347 */ 338 */
348 blk_run_backing_dev(bdi, NULL); 339 blk_run_backing_dev(bdi, NULL);
340
341 cond_resched();
342 if (again)
343 goto loop;
344
345 spin_lock(&device->io_lock);
346 if (device->pending_bios.head || device->pending_sync_bios.head)
347 goto loop_lock;
348 spin_unlock(&device->io_lock);
349
349done: 350done:
350 return 0; 351 return 0;
351} 352}
@@ -365,6 +366,7 @@ static noinline int device_list_add(const char *path,
365 struct btrfs_device *device; 366 struct btrfs_device *device;
366 struct btrfs_fs_devices *fs_devices; 367 struct btrfs_fs_devices *fs_devices;
367 u64 found_transid = btrfs_super_generation(disk_super); 368 u64 found_transid = btrfs_super_generation(disk_super);
369 char *name;
368 370
369 fs_devices = find_fsid(disk_super->fsid); 371 fs_devices = find_fsid(disk_super->fsid);
370 if (!fs_devices) { 372 if (!fs_devices) {
@@ -411,6 +413,12 @@ static noinline int device_list_add(const char *path,
411 413
412 device->fs_devices = fs_devices; 414 device->fs_devices = fs_devices;
413 fs_devices->num_devices++; 415 fs_devices->num_devices++;
416 } else if (strcmp(device->name, path)) {
417 name = kstrdup(path, GFP_NOFS);
418 if (!name)
419 return -ENOMEM;
420 kfree(device->name);
421 device->name = name;
414 } 422 }
415 423
416 if (found_transid > fs_devices->latest_trans) { 424 if (found_transid > fs_devices->latest_trans) {
@@ -592,7 +600,7 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
592 goto error_close; 600 goto error_close;
593 601
594 disk_super = (struct btrfs_super_block *)bh->b_data; 602 disk_super = (struct btrfs_super_block *)bh->b_data;
595 devid = le64_to_cpu(disk_super->dev_item.devid); 603 devid = btrfs_stack_device_id(&disk_super->dev_item);
596 if (devid != device->devid) 604 if (devid != device->devid)
597 goto error_brelse; 605 goto error_brelse;
598 606
@@ -694,7 +702,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
694 goto error_close; 702 goto error_close;
695 } 703 }
696 disk_super = (struct btrfs_super_block *)bh->b_data; 704 disk_super = (struct btrfs_super_block *)bh->b_data;
697 devid = le64_to_cpu(disk_super->dev_item.devid); 705 devid = btrfs_stack_device_id(&disk_super->dev_item);
698 transid = btrfs_super_generation(disk_super); 706 transid = btrfs_super_generation(disk_super);
699 if (disk_super->label[0]) 707 if (disk_super->label[0])
700 printk(KERN_INFO "device label %s ", disk_super->label); 708 printk(KERN_INFO "device label %s ", disk_super->label);
@@ -1187,7 +1195,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1187 goto error_close; 1195 goto error_close;
1188 } 1196 }
1189 disk_super = (struct btrfs_super_block *)bh->b_data; 1197 disk_super = (struct btrfs_super_block *)bh->b_data;
1190 devid = le64_to_cpu(disk_super->dev_item.devid); 1198 devid = btrfs_stack_device_id(&disk_super->dev_item);
1191 dev_uuid = disk_super->dev_item.uuid; 1199 dev_uuid = disk_super->dev_item.uuid;
1192 device = btrfs_find_device(root, devid, dev_uuid, 1200 device = btrfs_find_device(root, devid, dev_uuid,
1193 disk_super->fsid); 1201 disk_super->fsid);
@@ -2191,9 +2199,9 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
2191 min_stripes = 2; 2199 min_stripes = 2;
2192 } 2200 }
2193 if (type & (BTRFS_BLOCK_GROUP_RAID1)) { 2201 if (type & (BTRFS_BLOCK_GROUP_RAID1)) {
2194 num_stripes = min_t(u64, 2, fs_devices->rw_devices); 2202 if (fs_devices->rw_devices < 2)
2195 if (num_stripes < 2)
2196 return -ENOSPC; 2203 return -ENOSPC;
2204 num_stripes = 2;
2197 min_stripes = 2; 2205 min_stripes = 2;
2198 } 2206 }
2199 if (type & (BTRFS_BLOCK_GROUP_RAID10)) { 2207 if (type & (BTRFS_BLOCK_GROUP_RAID10)) {
@@ -2237,8 +2245,16 @@ again:
2237 do_div(calc_size, stripe_len); 2245 do_div(calc_size, stripe_len);
2238 calc_size *= stripe_len; 2246 calc_size *= stripe_len;
2239 } 2247 }
2248
2240 /* we don't want tiny stripes */ 2249 /* we don't want tiny stripes */
2241 calc_size = max_t(u64, min_stripe_size, calc_size); 2250 if (!looped)
2251 calc_size = max_t(u64, min_stripe_size, calc_size);
2252
2253 /*
2254 * we're about to do_div by the stripe_len so lets make sure
2255 * we end up with something bigger than a stripe
2256 */
2257 calc_size = max_t(u64, calc_size, stripe_len * 4);
2242 2258
2243 do_div(calc_size, stripe_len); 2259 do_div(calc_size, stripe_len);
2244 calc_size *= stripe_len; 2260 calc_size *= stripe_len;
@@ -3382,6 +3398,8 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
3382 key.type = 0; 3398 key.type = 0;
3383again: 3399again:
3384 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3400 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
3401 if (ret < 0)
3402 goto error;
3385 while (1) { 3403 while (1) {
3386 leaf = path->nodes[0]; 3404 leaf = path->nodes[0];
3387 slot = path->slots[0]; 3405 slot = path->slots[0];