aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-12-14 14:08:13 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-12-14 14:08:13 -0500
commite13cf63f2bbd38721af557f0205da994ea068427 (patch)
tree26d0d779fadd58814eea18ab2b16370a8565c837 /fs/btrfs
parent073f21ae1319348f4f8630003b7901e3be254327 (diff)
parent83a50de97fe96aca82389e061862ed760ece2283 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: Btrfs: prevent RAID level downgrades when space is low Btrfs: account for missing devices in RAID allocation profiles Btrfs: EIO when we fail to read tree roots Btrfs: fix compiler warnings Btrfs: Make async snapshot ioctl more generic Btrfs: pwrite blocked when writing from the mmaped buffer of the same page Btrfs: Fix a crash when mounting a subvolume Btrfs: fix sync subvol/snapshot creation Btrfs: Fix page leak in compressed writeback path Btrfs: do not BUG if we fail to remove the orphan item for dead snapshots Btrfs: fixup return code for btrfs_del_orphan_item Btrfs: do not do fast caching if we are allocating blocks for tree_root Btrfs: deal with space cache errors better Btrfs: fix use after free in O_DIRECT
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/disk-io.c11
-rw-r--r--fs/btrfs/extent-tree.c75
-rw-r--r--fs/btrfs/file.c92
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/inode.c11
-rw-r--r--fs/btrfs/ioctl.c56
-rw-r--r--fs/btrfs/ioctl.h14
-rw-r--r--fs/btrfs/orphan.c6
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/volumes.c20
-rw-r--r--fs/btrfs/volumes.h2
11 files changed, 207 insertions, 94 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c547cca26a26..51d2e4de34eb 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -696,6 +696,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
696 __btree_submit_bio_done); 696 __btree_submit_bio_done);
697} 697}
698 698
699#ifdef CONFIG_MIGRATION
699static int btree_migratepage(struct address_space *mapping, 700static int btree_migratepage(struct address_space *mapping,
700 struct page *newpage, struct page *page) 701 struct page *newpage, struct page *page)
701{ 702{
@@ -712,12 +713,9 @@ static int btree_migratepage(struct address_space *mapping,
712 if (page_has_private(page) && 713 if (page_has_private(page) &&
713 !try_to_release_page(page, GFP_KERNEL)) 714 !try_to_release_page(page, GFP_KERNEL))
714 return -EAGAIN; 715 return -EAGAIN;
715#ifdef CONFIG_MIGRATION
716 return migrate_page(mapping, newpage, page); 716 return migrate_page(mapping, newpage, page);
717#else
718 return -ENOSYS;
719#endif
720} 717}
718#endif
721 719
722static int btree_writepage(struct page *page, struct writeback_control *wbc) 720static int btree_writepage(struct page *page, struct writeback_control *wbc)
723{ 721{
@@ -1009,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
1009 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); 1007 blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
1010 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), 1008 root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
1011 blocksize, generation); 1009 blocksize, generation);
1012 BUG_ON(!root->node); 1010 if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
1011 free_extent_buffer(root->node);
1012 return -EIO;
1013 }
1013 root->commit_root = btrfs_root_node(root); 1014 root->commit_root = btrfs_root_node(root);
1014 return 0; 1015 return 0;
1015} 1016}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bcd59c7dfb57..227e5815d838 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -429,6 +429,7 @@ err:
429 429
430static int cache_block_group(struct btrfs_block_group_cache *cache, 430static int cache_block_group(struct btrfs_block_group_cache *cache,
431 struct btrfs_trans_handle *trans, 431 struct btrfs_trans_handle *trans,
432 struct btrfs_root *root,
432 int load_cache_only) 433 int load_cache_only)
433{ 434{
434 struct btrfs_fs_info *fs_info = cache->fs_info; 435 struct btrfs_fs_info *fs_info = cache->fs_info;
@@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
442 443
443 /* 444 /*
444 * We can't do the read from on-disk cache during a commit since we need 445 * We can't do the read from on-disk cache during a commit since we need
445 * to have the normal tree locking. 446 * to have the normal tree locking. Also if we are currently trying to
447 * allocate blocks for the tree root we can't do the fast caching since
448 * we likely hold important locks.
446 */ 449 */
447 if (!trans->transaction->in_commit) { 450 if (!trans->transaction->in_commit &&
451 (root && root != root->fs_info->tree_root)) {
448 spin_lock(&cache->lock); 452 spin_lock(&cache->lock);
449 if (cache->cached != BTRFS_CACHE_NO) { 453 if (cache->cached != BTRFS_CACHE_NO) {
450 spin_unlock(&cache->lock); 454 spin_unlock(&cache->lock);
@@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
2741 struct btrfs_root *root = block_group->fs_info->tree_root; 2745 struct btrfs_root *root = block_group->fs_info->tree_root;
2742 struct inode *inode = NULL; 2746 struct inode *inode = NULL;
2743 u64 alloc_hint = 0; 2747 u64 alloc_hint = 0;
2748 int dcs = BTRFS_DC_ERROR;
2744 int num_pages = 0; 2749 int num_pages = 0;
2745 int retries = 0; 2750 int retries = 0;
2746 int ret = 0; 2751 int ret = 0;
@@ -2795,6 +2800,8 @@ again:
2795 2800
2796 spin_lock(&block_group->lock); 2801 spin_lock(&block_group->lock);
2797 if (block_group->cached != BTRFS_CACHE_FINISHED) { 2802 if (block_group->cached != BTRFS_CACHE_FINISHED) {
2803 /* We're not cached, don't bother trying to write stuff out */
2804 dcs = BTRFS_DC_WRITTEN;
2798 spin_unlock(&block_group->lock); 2805 spin_unlock(&block_group->lock);
2799 goto out_put; 2806 goto out_put;
2800 } 2807 }
@@ -2821,6 +2828,8 @@ again:
2821 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, 2828 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
2822 num_pages, num_pages, 2829 num_pages, num_pages,
2823 &alloc_hint); 2830 &alloc_hint);
2831 if (!ret)
2832 dcs = BTRFS_DC_SETUP;
2824 btrfs_free_reserved_data_space(inode, num_pages); 2833 btrfs_free_reserved_data_space(inode, num_pages);
2825out_put: 2834out_put:
2826 iput(inode); 2835 iput(inode);
@@ -2828,10 +2837,7 @@ out_free:
2828 btrfs_release_path(root, path); 2837 btrfs_release_path(root, path);
2829out: 2838out:
2830 spin_lock(&block_group->lock); 2839 spin_lock(&block_group->lock);
2831 if (ret) 2840 block_group->disk_cache_state = dcs;
2832 block_group->disk_cache_state = BTRFS_DC_ERROR;
2833 else
2834 block_group->disk_cache_state = BTRFS_DC_SETUP;
2835 spin_unlock(&block_group->lock); 2841 spin_unlock(&block_group->lock);
2836 2842
2837 return ret; 2843 return ret;
@@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
3037 3043
3038u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 3044u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
3039{ 3045{
3040 u64 num_devices = root->fs_info->fs_devices->rw_devices; 3046 /*
3047 * we add in the count of missing devices because we want
3048 * to make sure that any RAID levels on a degraded FS
3049 * continue to be honored.
3050 */
3051 u64 num_devices = root->fs_info->fs_devices->rw_devices +
3052 root->fs_info->fs_devices->missing_devices;
3041 3053
3042 if (num_devices == 1) 3054 if (num_devices == 1)
3043 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 3055 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
4080 * space back to the block group, otherwise we will leak space. 4092 * space back to the block group, otherwise we will leak space.
4081 */ 4093 */
4082 if (!alloc && cache->cached == BTRFS_CACHE_NO) 4094 if (!alloc && cache->cached == BTRFS_CACHE_NO)
4083 cache_block_group(cache, trans, 1); 4095 cache_block_group(cache, trans, NULL, 1);
4084 4096
4085 byte_in_group = bytenr - cache->key.objectid; 4097 byte_in_group = bytenr - cache->key.objectid;
4086 WARN_ON(byte_in_group > cache->key.offset); 4098 WARN_ON(byte_in_group > cache->key.offset);
@@ -4930,11 +4942,31 @@ search:
4930 btrfs_get_block_group(block_group); 4942 btrfs_get_block_group(block_group);
4931 search_start = block_group->key.objectid; 4943 search_start = block_group->key.objectid;
4932 4944
4945 /*
4946 * this can happen if we end up cycling through all the
4947 * raid types, but we want to make sure we only allocate
4948 * for the proper type.
4949 */
4950 if (!block_group_bits(block_group, data)) {
4951 u64 extra = BTRFS_BLOCK_GROUP_DUP |
4952 BTRFS_BLOCK_GROUP_RAID1 |
4953 BTRFS_BLOCK_GROUP_RAID10;
4954
4955 /*
4956 * if they asked for extra copies and this block group
4957 * doesn't provide them, bail. This does allow us to
4958 * fill raid0 from raid1.
4959 */
4960 if ((data & extra) && !(block_group->flags & extra))
4961 goto loop;
4962 }
4963
4933have_block_group: 4964have_block_group:
4934 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4965 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
4935 u64 free_percent; 4966 u64 free_percent;
4936 4967
4937 ret = cache_block_group(block_group, trans, 1); 4968 ret = cache_block_group(block_group, trans,
4969 orig_root, 1);
4938 if (block_group->cached == BTRFS_CACHE_FINISHED) 4970 if (block_group->cached == BTRFS_CACHE_FINISHED)
4939 goto have_block_group; 4971 goto have_block_group;
4940 4972
@@ -4958,7 +4990,8 @@ have_block_group:
4958 if (loop > LOOP_CACHING_NOWAIT || 4990 if (loop > LOOP_CACHING_NOWAIT ||
4959 (loop > LOOP_FIND_IDEAL && 4991 (loop > LOOP_FIND_IDEAL &&
4960 atomic_read(&space_info->caching_threads) < 2)) { 4992 atomic_read(&space_info->caching_threads) < 2)) {
4961 ret = cache_block_group(block_group, trans, 0); 4993 ret = cache_block_group(block_group, trans,
4994 orig_root, 0);
4962 BUG_ON(ret); 4995 BUG_ON(ret);
4963 } 4996 }
4964 found_uncached_bg = true; 4997 found_uncached_bg = true;
@@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
5515 u64 num_bytes = ins->offset; 5548 u64 num_bytes = ins->offset;
5516 5549
5517 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5550 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
5518 cache_block_group(block_group, trans, 0); 5551 cache_block_group(block_group, trans, NULL, 0);
5519 caching_ctl = get_caching_control(block_group); 5552 caching_ctl = get_caching_control(block_group);
5520 5553
5521 if (!caching_ctl) { 5554 if (!caching_ctl) {
@@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
6300 NULL, NULL); 6333 NULL, NULL);
6301 BUG_ON(ret < 0); 6334 BUG_ON(ret < 0);
6302 if (ret > 0) { 6335 if (ret > 0) {
6303 ret = btrfs_del_orphan_item(trans, tree_root, 6336 /* if we fail to delete the orphan item this time
6304 root->root_key.objectid); 6337 * around, it'll get picked up the next time.
6305 BUG_ON(ret); 6338 *
6339 * The most common failure here is just -ENOENT.
6340 */
6341 btrfs_del_orphan_item(trans, tree_root,
6342 root->root_key.objectid);
6306 } 6343 }
6307 } 6344 }
6308 6345
@@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
7878 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 7915 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
7879 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 7916 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
7880 7917
7881 num_devices = root->fs_info->fs_devices->rw_devices; 7918 /*
7919 * we add in the count of missing devices because we want
7920 * to make sure that any RAID levels on a degraded FS
7921 * continue to be honored.
7922 */
7923 num_devices = root->fs_info->fs_devices->rw_devices +
7924 root->fs_info->fs_devices->missing_devices;
7925
7882 if (num_devices == 1) { 7926 if (num_devices == 1) {
7883 stripped |= BTRFS_BLOCK_GROUP_DUP; 7927 stripped |= BTRFS_BLOCK_GROUP_DUP;
7884 stripped = flags & ~stripped; 7928 stripped = flags & ~stripped;
@@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
8247 break; 8291 break;
8248 if (ret != 0) 8292 if (ret != 0)
8249 goto error; 8293 goto error;
8250
8251 leaf = path->nodes[0]; 8294 leaf = path->nodes[0];
8252 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 8295 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
8253 cache = kzalloc(sizeof(*cache), GFP_NOFS); 8296 cache = kzalloc(sizeof(*cache), GFP_NOFS);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c1faded5fca0..66836d85763b 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
48 struct page **prepared_pages, 48 struct page **prepared_pages,
49 struct iov_iter *i) 49 struct iov_iter *i)
50{ 50{
51 size_t copied; 51 size_t copied = 0;
52 int pg = 0; 52 int pg = 0;
53 int offset = pos & (PAGE_CACHE_SIZE - 1); 53 int offset = pos & (PAGE_CACHE_SIZE - 1);
54 int total_copied = 0;
54 55
55 while (write_bytes > 0) { 56 while (write_bytes > 0) {
56 size_t count = min_t(size_t, 57 size_t count = min_t(size_t,
57 PAGE_CACHE_SIZE - offset, write_bytes); 58 PAGE_CACHE_SIZE - offset, write_bytes);
58 struct page *page = prepared_pages[pg]; 59 struct page *page = prepared_pages[pg];
59again: 60 /*
60 if (unlikely(iov_iter_fault_in_readable(i, count))) 61 * Copy data from userspace to the current page
61 return -EFAULT; 62 *
62 63 * Disable pagefault to avoid recursive lock since
63 /* Copy data from userspace to the current page */ 64 * the pages are already locked
64 copied = iov_iter_copy_from_user(page, i, offset, count); 65 */
66 pagefault_disable();
67 copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
68 pagefault_enable();
65 69
66 /* Flush processor's dcache for this page */ 70 /* Flush processor's dcache for this page */
67 flush_dcache_page(page); 71 flush_dcache_page(page);
68 iov_iter_advance(i, copied); 72 iov_iter_advance(i, copied);
69 write_bytes -= copied; 73 write_bytes -= copied;
74 total_copied += copied;
70 75
76 /* Return to btrfs_file_aio_write to fault page */
71 if (unlikely(copied == 0)) { 77 if (unlikely(copied == 0)) {
72 count = min_t(size_t, PAGE_CACHE_SIZE - offset, 78 break;
73 iov_iter_single_seg_count(i));
74 goto again;
75 } 79 }
76 80
77 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { 81 if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +85,7 @@ again:
81 offset = 0; 85 offset = 0;
82 } 86 }
83 } 87 }
84 return 0; 88 return total_copied;
85} 89}
86 90
87/* 91/*
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
854 unsigned long last_index; 858 unsigned long last_index;
855 int will_write; 859 int will_write;
856 int buffered = 0; 860 int buffered = 0;
861 int copied = 0;
862 int dirty_pages = 0;
857 863
858 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || 864 will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
859 (file->f_flags & O_DIRECT)); 865 (file->f_flags & O_DIRECT));
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
970 WARN_ON(num_pages > nrptrs); 976 WARN_ON(num_pages > nrptrs);
971 memset(pages, 0, sizeof(struct page *) * nrptrs); 977 memset(pages, 0, sizeof(struct page *) * nrptrs);
972 978
973 ret = btrfs_delalloc_reserve_space(inode, write_bytes); 979 /*
980 * Fault pages before locking them in prepare_pages
981 * to avoid recursive lock
982 */
983 if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
984 ret = -EFAULT;
985 goto out;
986 }
987
988 ret = btrfs_delalloc_reserve_space(inode,
989 num_pages << PAGE_CACHE_SHIFT);
974 if (ret) 990 if (ret)
975 goto out; 991 goto out;
976 992
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
978 pos, first_index, last_index, 994 pos, first_index, last_index,
979 write_bytes); 995 write_bytes);
980 if (ret) { 996 if (ret) {
981 btrfs_delalloc_release_space(inode, write_bytes); 997 btrfs_delalloc_release_space(inode,
998 num_pages << PAGE_CACHE_SHIFT);
982 goto out; 999 goto out;
983 } 1000 }
984 1001
985 ret = btrfs_copy_from_user(pos, num_pages, 1002 copied = btrfs_copy_from_user(pos, num_pages,
986 write_bytes, pages, &i); 1003 write_bytes, pages, &i);
987 if (ret == 0) { 1004 dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
1005 PAGE_CACHE_SHIFT;
1006
1007 if (num_pages > dirty_pages) {
1008 if (copied > 0)
1009 atomic_inc(
1010 &BTRFS_I(inode)->outstanding_extents);
1011 btrfs_delalloc_release_space(inode,
1012 (num_pages - dirty_pages) <<
1013 PAGE_CACHE_SHIFT);
1014 }
1015
1016 if (copied > 0) {
988 dirty_and_release_pages(NULL, root, file, pages, 1017 dirty_and_release_pages(NULL, root, file, pages,
989 num_pages, pos, write_bytes); 1018 dirty_pages, pos, copied);
990 } 1019 }
991 1020
992 btrfs_drop_pages(pages, num_pages); 1021 btrfs_drop_pages(pages, num_pages);
993 if (ret) {
994 btrfs_delalloc_release_space(inode, write_bytes);
995 goto out;
996 }
997 1022
998 if (will_write) { 1023 if (copied > 0) {
999 filemap_fdatawrite_range(inode->i_mapping, pos, 1024 if (will_write) {
1000 pos + write_bytes - 1); 1025 filemap_fdatawrite_range(inode->i_mapping, pos,
1001 } else { 1026 pos + copied - 1);
1002 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1027 } else {
1003 num_pages); 1028 balance_dirty_pages_ratelimited_nr(
1004 if (num_pages < 1029 inode->i_mapping,
1005 (root->leafsize >> PAGE_CACHE_SHIFT) + 1) 1030 dirty_pages);
1006 btrfs_btree_balance_dirty(root, 1); 1031 if (dirty_pages <
1007 btrfs_throttle(root); 1032 (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
1033 btrfs_btree_balance_dirty(root, 1);
1034 btrfs_throttle(root);
1035 }
1008 } 1036 }
1009 1037
1010 pos += write_bytes; 1038 pos += copied;
1011 num_written += write_bytes; 1039 num_written += copied;
1012 1040
1013 cond_resched(); 1041 cond_resched();
1014 } 1042 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 22ee0dc2e6b8..60d684266959 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -290,7 +290,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
290 (unsigned long long)BTRFS_I(inode)->generation, 290 (unsigned long long)BTRFS_I(inode)->generation,
291 (unsigned long long)generation, 291 (unsigned long long)generation,
292 (unsigned long long)block_group->key.objectid); 292 (unsigned long long)block_group->key.objectid);
293 goto out; 293 goto free_cache;
294 } 294 }
295 295
296 if (!num_entries) 296 if (!num_entries)
@@ -524,6 +524,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
524 return 0; 524 return 0;
525 } 525 }
526 526
527 node = rb_first(&block_group->free_space_offset);
528 if (!node) {
529 iput(inode);
530 return 0;
531 }
532
527 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 533 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
528 filemap_write_and_wait(inode->i_mapping); 534 filemap_write_and_wait(inode->i_mapping);
529 btrfs_wait_ordered_range(inode, inode->i_size & 535 btrfs_wait_ordered_range(inode, inode->i_size &
@@ -543,10 +549,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
543 */ 549 */
544 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 550 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
545 551
546 node = rb_first(&block_group->free_space_offset);
547 if (!node)
548 goto out_free;
549
550 /* 552 /*
551 * Lock all pages first so we can lock the extent safely. 553 * Lock all pages first so we can lock the extent safely.
552 * 554 *
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8039390bd6a6..72f31ecb5c90 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -495,7 +495,7 @@ again:
495 add_async_extent(async_cow, start, num_bytes, 495 add_async_extent(async_cow, start, num_bytes,
496 total_compressed, pages, nr_pages_ret); 496 total_compressed, pages, nr_pages_ret);
497 497
498 if (start + num_bytes < end && start + num_bytes < actual_end) { 498 if (start + num_bytes < end) {
499 start += num_bytes; 499 start += num_bytes;
500 pages = NULL; 500 pages = NULL;
501 cond_resched(); 501 cond_resched();
@@ -5712,9 +5712,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
5712 5712
5713 if (err) { 5713 if (err) {
5714 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu " 5714 printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
5715 "disk_bytenr %lu len %u err no %d\n", 5715 "sector %#Lx len %u err no %d\n",
5716 dip->inode->i_ino, bio->bi_rw, bio->bi_sector, 5716 dip->inode->i_ino, bio->bi_rw,
5717 bio->bi_size, err); 5717 (unsigned long long)bio->bi_sector, bio->bi_size, err);
5718 dip->errors = 1; 5718 dip->errors = 1;
5719 5719
5720 /* 5720 /*
@@ -5934,8 +5934,7 @@ free_ordered:
5934 */ 5934 */
5935 if (write) { 5935 if (write) {
5936 struct btrfs_ordered_extent *ordered; 5936 struct btrfs_ordered_extent *ordered;
5937 ordered = btrfs_lookup_ordered_extent(inode, 5937 ordered = btrfs_lookup_ordered_extent(inode, file_offset);
5938 dip->logical_offset);
5939 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && 5938 if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
5940 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) 5939 !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
5941 btrfs_free_reserved_extent(root, ordered->start, 5940 btrfs_free_reserved_extent(root, ordered->start,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f1c9bb4079ed..f87552a1d7ea 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -947,23 +947,42 @@ out:
947 947
948static noinline int btrfs_ioctl_snap_create(struct file *file, 948static noinline int btrfs_ioctl_snap_create(struct file *file,
949 void __user *arg, int subvol, 949 void __user *arg, int subvol,
950 int async) 950 int v2)
951{ 951{
952 struct btrfs_ioctl_vol_args *vol_args = NULL; 952 struct btrfs_ioctl_vol_args *vol_args = NULL;
953 struct btrfs_ioctl_async_vol_args *async_vol_args = NULL; 953 struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
954 char *name; 954 char *name;
955 u64 fd; 955 u64 fd;
956 u64 transid = 0;
957 int ret; 956 int ret;
958 957
959 if (async) { 958 if (v2) {
960 async_vol_args = memdup_user(arg, sizeof(*async_vol_args)); 959 u64 transid = 0;
961 if (IS_ERR(async_vol_args)) 960 u64 *ptr = NULL;
962 return PTR_ERR(async_vol_args);
963 961
964 name = async_vol_args->name; 962 vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
965 fd = async_vol_args->fd; 963 if (IS_ERR(vol_args_v2))
966 async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0'; 964 return PTR_ERR(vol_args_v2);
965
966 if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
967 ret = -EINVAL;
968 goto out;
969 }
970
971 name = vol_args_v2->name;
972 fd = vol_args_v2->fd;
973 vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
974
975 if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
976 ptr = &transid;
977
978 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
979 subvol, ptr);
980
981 if (ret == 0 && ptr &&
982 copy_to_user(arg +
983 offsetof(struct btrfs_ioctl_vol_args_v2,
984 transid), ptr, sizeof(*ptr)))
985 ret = -EFAULT;
967 } else { 986 } else {
968 vol_args = memdup_user(arg, sizeof(*vol_args)); 987 vol_args = memdup_user(arg, sizeof(*vol_args));
969 if (IS_ERR(vol_args)) 988 if (IS_ERR(vol_args))
@@ -971,20 +990,13 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
971 name = vol_args->name; 990 name = vol_args->name;
972 fd = vol_args->fd; 991 fd = vol_args->fd;
973 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 992 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
974 }
975
976 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
977 subvol, &transid);
978 993
979 if (!ret && async) { 994 ret = btrfs_ioctl_snap_create_transid(file, name, fd,
980 if (copy_to_user(arg + 995 subvol, NULL);
981 offsetof(struct btrfs_ioctl_async_vol_args,
982 transid), &transid, sizeof(transid)))
983 return -EFAULT;
984 } 996 }
985 997out:
986 kfree(vol_args); 998 kfree(vol_args);
987 kfree(async_vol_args); 999 kfree(vol_args_v2);
988 1000
989 return ret; 1001 return ret;
990} 1002}
@@ -2246,7 +2258,7 @@ long btrfs_ioctl(struct file *file, unsigned int
2246 return btrfs_ioctl_getversion(file, argp); 2258 return btrfs_ioctl_getversion(file, argp);
2247 case BTRFS_IOC_SNAP_CREATE: 2259 case BTRFS_IOC_SNAP_CREATE:
2248 return btrfs_ioctl_snap_create(file, argp, 0, 0); 2260 return btrfs_ioctl_snap_create(file, argp, 0, 0);
2249 case BTRFS_IOC_SNAP_CREATE_ASYNC: 2261 case BTRFS_IOC_SNAP_CREATE_V2:
2250 return btrfs_ioctl_snap_create(file, argp, 0, 1); 2262 return btrfs_ioctl_snap_create(file, argp, 0, 1);
2251 case BTRFS_IOC_SUBVOL_CREATE: 2263 case BTRFS_IOC_SUBVOL_CREATE:
2252 return btrfs_ioctl_snap_create(file, argp, 1, 0); 2264 return btrfs_ioctl_snap_create(file, argp, 1, 0);
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 17c99ebdf960..c344d12c646b 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -30,11 +30,15 @@ struct btrfs_ioctl_vol_args {
30 char name[BTRFS_PATH_NAME_MAX + 1]; 30 char name[BTRFS_PATH_NAME_MAX + 1];
31}; 31};
32 32
33#define BTRFS_SNAPSHOT_NAME_MAX 4079 33#define BTRFS_SUBVOL_CREATE_ASYNC (1ULL << 0)
34struct btrfs_ioctl_async_vol_args { 34
35#define BTRFS_SUBVOL_NAME_MAX 4039
36struct btrfs_ioctl_vol_args_v2 {
35 __s64 fd; 37 __s64 fd;
36 __u64 transid; 38 __u64 transid;
37 char name[BTRFS_SNAPSHOT_NAME_MAX + 1]; 39 __u64 flags;
40 __u64 unused[4];
41 char name[BTRFS_SUBVOL_NAME_MAX + 1];
38}; 42};
39 43
40#define BTRFS_INO_LOOKUP_PATH_MAX 4080 44#define BTRFS_INO_LOOKUP_PATH_MAX 4080
@@ -187,6 +191,6 @@ struct btrfs_ioctl_space_args {
187 struct btrfs_ioctl_space_args) 191 struct btrfs_ioctl_space_args)
188#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64) 192#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
189#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64) 193#define BTRFS_IOC_WAIT_SYNC _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
190#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \ 194#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
191 struct btrfs_ioctl_async_vol_args) 195 struct btrfs_ioctl_vol_args_v2)
192#endif 196#endif
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 79cba5fbc28e..f8be250963a0 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
56 return -ENOMEM; 56 return -ENOMEM;
57 57
58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 58 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
59 if (ret) 59 if (ret < 0)
60 goto out; 60 goto out;
61 if (ret) {
62 ret = -ENOENT;
63 goto out;
64 }
61 65
62 ret = btrfs_del_item(trans, root, path); 66 ret = btrfs_del_item(trans, root, path);
63 67
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index dbb51ea7a13c..883c6fa1367e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -685,9 +685,9 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
685 mutex_unlock(&root->d_inode->i_mutex); 685 mutex_unlock(&root->d_inode->i_mutex);
686 686
687 if (IS_ERR(new_root)) { 687 if (IS_ERR(new_root)) {
688 dput(root);
688 deactivate_locked_super(s); 689 deactivate_locked_super(s);
689 error = PTR_ERR(new_root); 690 error = PTR_ERR(new_root);
690 dput(root);
691 goto error_free_subvol_name; 691 goto error_free_subvol_name;
692 } 692 }
693 if (!new_root->d_inode) { 693 if (!new_root->d_inode) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cc04dc1445d6..6b9884507837 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -412,12 +412,16 @@ static noinline int device_list_add(const char *path,
412 412
413 device->fs_devices = fs_devices; 413 device->fs_devices = fs_devices;
414 fs_devices->num_devices++; 414 fs_devices->num_devices++;
415 } else if (strcmp(device->name, path)) { 415 } else if (!device->name || strcmp(device->name, path)) {
416 name = kstrdup(path, GFP_NOFS); 416 name = kstrdup(path, GFP_NOFS);
417 if (!name) 417 if (!name)
418 return -ENOMEM; 418 return -ENOMEM;
419 kfree(device->name); 419 kfree(device->name);
420 device->name = name; 420 device->name = name;
421 if (device->missing) {
422 fs_devices->missing_devices--;
423 device->missing = 0;
424 }
421 } 425 }
422 426
423 if (found_transid > fs_devices->latest_trans) { 427 if (found_transid > fs_devices->latest_trans) {
@@ -1236,6 +1240,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1236 1240
1237 device->fs_devices->num_devices--; 1241 device->fs_devices->num_devices--;
1238 1242
1243 if (device->missing)
1244 root->fs_info->fs_devices->missing_devices--;
1245
1239 next_device = list_entry(root->fs_info->fs_devices->devices.next, 1246 next_device = list_entry(root->fs_info->fs_devices->devices.next,
1240 struct btrfs_device, dev_list); 1247 struct btrfs_device, dev_list);
1241 if (device->bdev == root->fs_info->sb->s_bdev) 1248 if (device->bdev == root->fs_info->sb->s_bdev)
@@ -3080,7 +3087,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
3080 device->devid = devid; 3087 device->devid = devid;
3081 device->work.func = pending_bios_fn; 3088 device->work.func = pending_bios_fn;
3082 device->fs_devices = fs_devices; 3089 device->fs_devices = fs_devices;
3090 device->missing = 1;
3083 fs_devices->num_devices++; 3091 fs_devices->num_devices++;
3092 fs_devices->missing_devices++;
3084 spin_lock_init(&device->io_lock); 3093 spin_lock_init(&device->io_lock);
3085 INIT_LIST_HEAD(&device->dev_alloc_list); 3094 INIT_LIST_HEAD(&device->dev_alloc_list);
3086 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); 3095 memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@ -3278,6 +3287,15 @@ static int read_one_dev(struct btrfs_root *root,
3278 device = add_missing_dev(root, devid, dev_uuid); 3287 device = add_missing_dev(root, devid, dev_uuid);
3279 if (!device) 3288 if (!device)
3280 return -ENOMEM; 3289 return -ENOMEM;
3290 } else if (!device->missing) {
3291 /*
3292 * this happens when a device that was properly setup
3293 * in the device info lists suddenly goes bad.
3294 * device->bdev is NULL, and so we have to set
3295 * device->missing to one here
3296 */
3297 root->fs_info->fs_devices->missing_devices++;
3298 device->missing = 1;
3281 } 3299 }
3282 } 3300 }
3283 3301
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2b638b6e4eea..2740db49eb04 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -44,6 +44,7 @@ struct btrfs_device {
44 44
45 int writeable; 45 int writeable;
46 int in_fs_metadata; 46 int in_fs_metadata;
47 int missing;
47 48
48 spinlock_t io_lock; 49 spinlock_t io_lock;
49 50
@@ -93,6 +94,7 @@ struct btrfs_fs_devices {
93 u64 num_devices; 94 u64 num_devices;
94 u64 open_devices; 95 u64 open_devices;
95 u64 rw_devices; 96 u64 rw_devices;
97 u64 missing_devices;
96 u64 total_rw_bytes; 98 u64 total_rw_bytes;
97 struct block_device *latest_bdev; 99 struct block_device *latest_bdev;
98 100